summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchertus <[email protected]>2023-06-13 11:51:37 +0300
committerchertus <[email protected]>2023-06-13 11:51:37 +0300
commit86e04425d6160fe5ee39ce6161b3c826fb413efb (patch)
tree01d9d22a3dd520128d2d6a5d585ca1dd5bd3f700
parent8157768754529d39414738f449c240c25ed994a0 (diff)
more verifies
-rw-r--r--ydb/core/tx/columnshard/engines/index_logic_logs.cpp11
-rw-r--r--ydb/core/tx/columnshard/engines/portion_info.h4
2 files changed, 8 insertions, 7 deletions
diff --git a/ydb/core/tx/columnshard/engines/index_logic_logs.cpp b/ydb/core/tx/columnshard/engines/index_logic_logs.cpp
index e4e693f1205..4883fe194e9 100644
--- a/ydb/core/tx/columnshard/engines/index_logic_logs.cpp
+++ b/ydb/core/tx/columnshard/engines/index_logic_logs.cpp
@@ -474,8 +474,7 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo,
Y_VERIFY(minTs >= ts0.GetBorder());
// It's an estimation of needed count cause numRows calculated before key replaces
- ui32 numSplitInto = changes.NumSplitInto(numRows);
- ui32 rowsInGranule = numRows / numSplitInto;
+ ui32 rowsInGranule = numRows / changes.NumSplitInto(numRows);
Y_VERIFY(rowsInGranule);
// Cannot split in case of one unique key
@@ -530,6 +529,7 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo,
for (const auto& border : borders) {
int offset = NArrow::LowerBound(keys, border, batchOffsets.back());
Y_VERIFY(offset >= batchOffsets.back());
+ Y_VERIFY(offset <= batch->num_rows());
batchOffsets.push_back(offset);
}
@@ -540,6 +540,7 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo,
for (ui32 granuleNo = 0; granuleNo < borders.size() + 1; ++granuleNo) {
std::vector<std::shared_ptr<arrow::RecordBatch>> granuleBatches;
granuleBatches.reserve(batches.size());
+ const bool lastGranule = (granuleNo == borders.size());
// Extract granule: slice source batches with offsets
i64 granuleNumRows = 0;
@@ -548,14 +549,12 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo,
auto& batchOffsets = offsets[i];
int offset = batchOffsets[granuleNo];
- int end = batch->num_rows();
- if (granuleNo < borders.size()) {
- end = batchOffsets[granuleNo + 1];
- }
+ int end = lastGranule ? batch->num_rows() : batchOffsets[granuleNo + 1];
int size = end - offset;
Y_VERIFY(size >= 0);
if (size) {
+ Y_VERIFY(offset < batch->num_rows());
auto slice = batch->Slice(offset, size);
Y_VERIFY(slice->num_rows());
granuleNumRows += slice->num_rows();
diff --git a/ydb/core/tx/columnshard/engines/portion_info.h b/ydb/core/tx/columnshard/engines/portion_info.h
index e943c29f710..918d419f38d 100644
--- a/ydb/core/tx/columnshard/engines/portion_info.h
+++ b/ydb/core/tx/columnshard/engines/portion_info.h
@@ -614,7 +614,9 @@ public:
std::shared_ptr<arrow::RecordBatch> AssembleInBatch(const ISnapshotSchema& dataSchema,
const ISnapshotSchema& resultSchema,
const THashMap<TBlobRange, TString>& data) const {
- return PrepareForAssemble(dataSchema, resultSchema, data).Assemble();
+ auto batch = PrepareForAssemble(dataSchema, resultSchema, data).Assemble();
+ Y_VERIFY(batch->Validate().ok());
+ return batch;
}
static TString SerializeColumn(const std::shared_ptr<arrow::Array>& array,