diff options
author | chertus <[email protected]> | 2023-06-13 11:51:37 +0300 |
---|---|---|
committer | chertus <[email protected]> | 2023-06-13 11:51:37 +0300 |
commit | 86e04425d6160fe5ee39ce6161b3c826fb413efb (patch) | |
tree | 01d9d22a3dd520128d2d6a5d585ca1dd5bd3f700 | |
parent | 8157768754529d39414738f449c240c25ed994a0 (diff) |
more verifies
-rw-r--r-- | ydb/core/tx/columnshard/engines/index_logic_logs.cpp | 11 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/portion_info.h | 4 |
2 files changed, 8 insertions, 7 deletions
diff --git a/ydb/core/tx/columnshard/engines/index_logic_logs.cpp b/ydb/core/tx/columnshard/engines/index_logic_logs.cpp index e4e693f1205..4883fe194e9 100644 --- a/ydb/core/tx/columnshard/engines/index_logic_logs.cpp +++ b/ydb/core/tx/columnshard/engines/index_logic_logs.cpp @@ -474,8 +474,7 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo, Y_VERIFY(minTs >= ts0.GetBorder()); // It's an estimation of needed count cause numRows calculated before key replaces - ui32 numSplitInto = changes.NumSplitInto(numRows); - ui32 rowsInGranule = numRows / numSplitInto; + ui32 rowsInGranule = numRows / changes.NumSplitInto(numRows); Y_VERIFY(rowsInGranule); // Cannot split in case of one unique key @@ -530,6 +529,7 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo, for (const auto& border : borders) { int offset = NArrow::LowerBound(keys, border, batchOffsets.back()); Y_VERIFY(offset >= batchOffsets.back()); + Y_VERIFY(offset <= batch->num_rows()); batchOffsets.push_back(offset); } @@ -540,6 +540,7 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo, for (ui32 granuleNo = 0; granuleNo < borders.size() + 1; ++granuleNo) { std::vector<std::shared_ptr<arrow::RecordBatch>> granuleBatches; granuleBatches.reserve(batches.size()); + const bool lastGranule = (granuleNo == borders.size()); // Extract granule: slice source batches with offsets i64 granuleNumRows = 0; @@ -548,14 +549,12 @@ TCompactionLogic::SliceGranuleBatches(const TIndexInfo& indexInfo, auto& batchOffsets = offsets[i]; int offset = batchOffsets[granuleNo]; - int end = batch->num_rows(); - if (granuleNo < borders.size()) { - end = batchOffsets[granuleNo + 1]; - } + int end = lastGranule ? batch->num_rows() : batchOffsets[granuleNo + 1]; int size = end - offset; Y_VERIFY(size >= 0); if (size) { + Y_VERIFY(offset < batch->num_rows()); auto slice = batch->Slice(offset, size); Y_VERIFY(slice->num_rows()); granuleNumRows += slice->num_rows(); diff --git a/ydb/core/tx/columnshard/engines/portion_info.h b/ydb/core/tx/columnshard/engines/portion_info.h index e943c29f710..918d419f38d 100644 --- a/ydb/core/tx/columnshard/engines/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portion_info.h @@ -614,7 +614,9 @@ public: std::shared_ptr<arrow::RecordBatch> AssembleInBatch(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, const THashMap<TBlobRange, TString>& data) const { - return PrepareForAssemble(dataSchema, resultSchema, data).Assemble(); + auto batch = PrepareForAssemble(dataSchema, resultSchema, data).Assemble(); + Y_VERIFY(batch->Validate().ok()); + return batch; } static TString SerializeColumn(const std::shared_ptr<arrow::Array>& array, |