diff options
author | chertus <[email protected]> | 2023-03-23 11:13:57 +0300 |
---|---|---|
committer | chertus <[email protected]> | 2023-03-23 11:13:57 +0300 |
commit | d6d05404a273de5c6a0c3b4cfd0a190743e6adbb (patch) | |
tree | e56ad0f905f8b2b198f52d53de23e799cddf01f7 | |
parent | 224cb4a695bb3ddff3ceda4f75e6965e83c12e76 (diff) |
do not save dups in Inserted portions
-rw-r--r-- | ydb/core/tx/columnshard/engines/column_engine_logs.cpp | 7 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/indexed_read_data.cpp | 6 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/portion_info.h | 3 |
3 files changed, 13 insertions, 3 deletions
diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 4eae943915e..170991ba2ae 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -1562,9 +1562,16 @@ TVector<TString> TColumnEngineForLogs::IndexBlobs(const TIndexInfo& indexInfo, changes->AddPathIfNotExists(pathId); // We could merge data here cause tablet limits indexing data portions +#if 0 auto merged = NArrow::CombineSortedBatches(batches, indexInfo.SortDescription()); // insert: no replace Y_VERIFY(merged); Y_VERIFY_DEBUG(NArrow::IsSorted(merged, indexInfo.GetReplaceKey())); +#else + auto merged = NArrow::CombineSortedBatches(batches, indexInfo.SortReplaceDescription()); + Y_VERIFY(merged); + Y_VERIFY_DEBUG(NArrow::IsSortedAndUnique(merged, indexInfo.GetReplaceKey())); + +#endif auto granuleBatches = SliceIntoGranules(merged, changes->PathToGranule[pathId], indexInfo); for (auto& [granule, batch] : granuleBatches) { diff --git a/ydb/core/tx/columnshard/engines/indexed_read_data.cpp b/ydb/core/tx/columnshard/engines/indexed_read_data.cpp index b20269dfa99..4b5e81677ab 100644 --- a/ydb/core/tx/columnshard/engines/indexed_read_data.cpp +++ b/ydb/core/tx/columnshard/engines/indexed_read_data.cpp @@ -179,9 +179,11 @@ THashMap<TBlobRange, ui64> TIndexedReadData::InitRead(ui32 inputBatch, bool inGr } // If there's no PK dups in granule we could use optimized version of merge - if (portionInfo.CanHaveDups()) { + if (portionInfo.CanIntersectOthers()) { GranulesWithDups.emplace(granule); - PortionsWithDups.emplace(portion); + if (portionInfo.CanHaveDups()) { + PortionsWithDups.emplace(portion); + } } for (const NOlap::TColumnRecord& rec : portionInfo.Records) { diff --git a/ydb/core/tx/columnshard/engines/portion_info.h b/ydb/core/tx/columnshard/engines/portion_info.h index 2374bc3dc6a..15428e11573 100644 --- a/ydb/core/tx/columnshard/engines/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portion_info.h @@ -53,7 +53,8 @@ struct TPortionInfo { bool Empty() const { return Records.empty(); } bool Valid() const { return !Empty() && Meta.Produced != TPortionMeta::UNSPECIFIED && HasMinMax(FirstPkColumn); } bool IsInserted() const { return Meta.Produced == TPortionMeta::INSERTED; } - bool CanHaveDups() const { return !Valid() || IsInserted(); } + bool CanHaveDups() const { return !Valid(); /* || IsInserted(); */ } + bool CanIntersectOthers() const { return !Valid() || IsInserted(); } ui32 NumRecords() const { return Records.size(); } bool EvictReady(size_t hotSize) const { |