summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchertus <[email protected]>2023-03-23 11:13:57 +0300
committerchertus <[email protected]>2023-03-23 11:13:57 +0300
commitd6d05404a273de5c6a0c3b4cfd0a190743e6adbb (patch)
treee56ad0f905f8b2b198f52d53de23e799cddf01f7
parent224cb4a695bb3ddff3ceda4f75e6965e83c12e76 (diff)
do not save dups in Inserted portions
-rw-r--r--ydb/core/tx/columnshard/engines/column_engine_logs.cpp7
-rw-r--r--ydb/core/tx/columnshard/engines/indexed_read_data.cpp6
-rw-r--r--ydb/core/tx/columnshard/engines/portion_info.h3
3 files changed, 13 insertions, 3 deletions
diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
index 4eae943915e..170991ba2ae 100644
--- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
+++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
@@ -1562,9 +1562,16 @@ TVector<TString> TColumnEngineForLogs::IndexBlobs(const TIndexInfo& indexInfo,
changes->AddPathIfNotExists(pathId);
// We could merge data here cause tablet limits indexing data portions
+#if 0
auto merged = NArrow::CombineSortedBatches(batches, indexInfo.SortDescription()); // insert: no replace
Y_VERIFY(merged);
Y_VERIFY_DEBUG(NArrow::IsSorted(merged, indexInfo.GetReplaceKey()));
+#else
+ auto merged = NArrow::CombineSortedBatches(batches, indexInfo.SortReplaceDescription());
+ Y_VERIFY(merged);
+ Y_VERIFY_DEBUG(NArrow::IsSortedAndUnique(merged, indexInfo.GetReplaceKey()));
+
+#endif
auto granuleBatches = SliceIntoGranules(merged, changes->PathToGranule[pathId], indexInfo);
for (auto& [granule, batch] : granuleBatches) {
diff --git a/ydb/core/tx/columnshard/engines/indexed_read_data.cpp b/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
index b20269dfa99..4b5e81677ab 100644
--- a/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
+++ b/ydb/core/tx/columnshard/engines/indexed_read_data.cpp
@@ -179,9 +179,11 @@ THashMap<TBlobRange, ui64> TIndexedReadData::InitRead(ui32 inputBatch, bool inGr
}
// If there's no PK dups in granule we could use optimized version of merge
- if (portionInfo.CanHaveDups()) {
+ if (portionInfo.CanIntersectOthers()) {
GranulesWithDups.emplace(granule);
- PortionsWithDups.emplace(portion);
+ if (portionInfo.CanHaveDups()) {
+ PortionsWithDups.emplace(portion);
+ }
}
for (const NOlap::TColumnRecord& rec : portionInfo.Records) {
diff --git a/ydb/core/tx/columnshard/engines/portion_info.h b/ydb/core/tx/columnshard/engines/portion_info.h
index 2374bc3dc6a..15428e11573 100644
--- a/ydb/core/tx/columnshard/engines/portion_info.h
+++ b/ydb/core/tx/columnshard/engines/portion_info.h
@@ -53,7 +53,8 @@ struct TPortionInfo {
bool Empty() const { return Records.empty(); }
bool Valid() const { return !Empty() && Meta.Produced != TPortionMeta::UNSPECIFIED && HasMinMax(FirstPkColumn); }
bool IsInserted() const { return Meta.Produced == TPortionMeta::INSERTED; }
- bool CanHaveDups() const { return !Valid() || IsInserted(); }
+ bool CanHaveDups() const { return !Valid(); /* || IsInserted(); */ }
+ bool CanIntersectOthers() const { return !Valid() || IsInserted(); }
ui32 NumRecords() const { return Records.size(); }
bool EvictReady(size_t hotSize) const {