aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@ydb.tech>2023-12-13 20:30:28 +0300
committerivanmorozov <ivanmorozov@ydb.tech>2023-12-13 23:26:32 +0300
commit7873f211fb2af52c9c544317f4d987ae062e2519 (patch)
tree1fe6cc11b7475c2712fddd5c46e0876f394df026
parent58ba8bb28c76ab2173c65407ff16e40d52b7e543 (diff)
downloadydb-7873f211fb2af52c9c544317f4d987ae062e2519.tar.gz
primary keys naming unification
-rw-r--r--ydb/core/tx/columnshard/columnshard__scan.cpp2
-rw-r--r--ydb/core/tx/columnshard/engines/column_engine.h12
-rw-r--r--ydb/core/tx/columnshard/engines/predicate/range.cpp4
-rw-r--r--ydb/core/tx/columnshard/engines/reader/read_metadata.cpp2
-rw-r--r--ydb/core/tx/columnshard/engines/reader/read_metadata.h6
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp2
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/index_info.cpp28
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/index_info.h17
8 files changed, 26 insertions, 47 deletions
diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp
index d3e7138b5a..c261b70dfa 100644
--- a/ydb/core/tx/columnshard/columnshard__scan.cpp
+++ b/ydb/core/tx/columnshard/columnshard__scan.cpp
@@ -749,7 +749,7 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) {
auto ydbKey = isIndexStats ?
NOlap::GetColumns(PrimaryIndexStatsSchema, PrimaryIndexStatsSchema.KeyColumns) :
- indexInfo->GetPrimaryKey();
+ indexInfo->GetPrimaryKeyColumns();
for (auto& range: record.GetRanges()) {
if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), isIndexStats ? nullptr : indexInfo, ErrorDescription)) {
diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h
index 7697e28fb0..d823af9629 100644
--- a/ydb/core/tx/columnshard/engines/column_engine.h
+++ b/ydb/core/tx/columnshard/engines/column_engine.h
@@ -288,7 +288,7 @@ public:
class TVersionedIndex {
std::map<TSnapshot, ISnapshotSchema::TPtr> Snapshots;
- std::shared_ptr<arrow::Schema> IndexKey;
+ std::shared_ptr<arrow::Schema> PrimaryKey;
std::map<ui64, ISnapshotSchema::TPtr> SnapshotByVersion;
ui64 LastSchemaVersion = 0;
public:
@@ -327,15 +327,15 @@ public:
return Snapshots.rbegin()->second;
}
- const std::shared_ptr<arrow::Schema>& GetIndexKey() const noexcept {
- return IndexKey;
+ const std::shared_ptr<arrow::Schema>& GetPrimaryKey() const noexcept {
+ return PrimaryKey;
}
void AddIndex(const TSnapshot& snapshot, TIndexInfo&& indexInfo) {
if (Snapshots.empty()) {
- IndexKey = indexInfo.GetIndexKey();
+ PrimaryKey = indexInfo.GetPrimaryKey();
} else {
- Y_ABORT_UNLESS(IndexKey->Equals(indexInfo.GetIndexKey()));
+ Y_ABORT_UNLESS(PrimaryKey->Equals(indexInfo.GetPrimaryKey()));
}
auto newVersion = indexInfo.GetVersion();
@@ -358,8 +358,6 @@ public:
virtual const TVersionedIndex& GetVersionedIndex() const = 0;
virtual const std::shared_ptr<arrow::Schema>& GetReplaceKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetReplaceKey(); }
- virtual const std::shared_ptr<arrow::Schema>& GetSortingKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetSortingKey(); }
- virtual const std::shared_ptr<arrow::Schema>& GetIndexKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetIndexKey(); }
virtual bool HasDataInPathId(const ui64 pathId) const = 0;
virtual bool Load(IDbWrapper& db) = 0;
diff --git a/ydb/core/tx/columnshard/engines/predicate/range.cpp b/ydb/core/tx/columnshard/engines/predicate/range.cpp
index adebf3860b..53e10174b8 100644
--- a/ydb/core/tx/columnshard/engines/predicate/range.cpp
+++ b/ydb/core/tx/columnshard/engines/predicate/range.cpp
@@ -40,7 +40,7 @@ NKikimr::NArrow::TColumnFilter TPKRangeFilter::BuildFilter(const arrow::Datum& d
}
bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const {
- if (auto from = PredicateFrom.ExtractKey(indexInfo.GetIndexKey())) {
+ if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) {
const auto& portionEnd = info.IndexKeyEnd();
const int commonSize = std::min(from->Size(), portionEnd.Size());
if (std::is_gt(from->ComparePartNotNull(portionEnd, commonSize))) {
@@ -48,7 +48,7 @@ bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo
}
}
- if (auto to = PredicateTo.ExtractKey(indexInfo.GetIndexKey())) {
+ if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) {
const auto& portionStart = info.IndexKeyStart();
const int commonSize = std::min(to->Size(), portionStart.Size());
if (std::is_lt(to->ComparePartNotNull(portionStart, commonSize))) {
diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp
index 83d0ded5f5..729a50ae7a 100644
--- a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp
+++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp
@@ -69,7 +69,7 @@ std::set<ui32> TReadMetadata::GetEarlyFilterColumnIds() const {
std::set<ui32> TReadMetadata::GetPKColumnIds() const {
std::set<ui32> result;
auto& indexInfo = ResultIndexSchema->GetIndexInfo();
- for (auto&& i : indexInfo.GetPrimaryKey()) {
+ for (auto&& i : indexInfo.GetPrimaryKeyColumns()) {
Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second);
}
return result;
diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h
index 01ca8575e4..1d26b3399e 100644
--- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h
+++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h
@@ -214,16 +214,12 @@ public:
return SelectInfo->PortionsOrderedPK.empty() && CommittedBlobs.empty();
}
- std::shared_ptr<arrow::Schema> GetSortingKey() const {
- return ResultIndexSchema->GetIndexInfo().GetSortingKey();
- }
-
std::shared_ptr<arrow::Schema> GetReplaceKey() const {
return ResultIndexSchema->GetIndexInfo().GetReplaceKey();
}
std::vector<TNameTypeInfo> GetKeyYqlSchema() const override {
- return ResultIndexSchema->GetIndexInfo().GetPrimaryKey();
+ return ResultIndexSchema->GetIndexInfo().GetPrimaryKeyColumns();
}
size_t NumIndexedChunks() const {
diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp
index 7bcfff8a55..f138fc6b5f 100644
--- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp
+++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp
@@ -78,7 +78,7 @@ std::shared_ptr<arrow::RecordBatch> ISnapshotSchema::PrepareForInsert(const TStr
return nullptr;
}
- const auto& sortingKey = GetIndexInfo().GetSortingKey();
+ const auto& sortingKey = GetIndexInfo().GetPrimaryKey();
Y_ABORT_UNLESS(sortingKey);
// Check PK is NOT NULL
diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp
index b438621663..82eeb52686 100644
--- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp
+++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp
@@ -29,10 +29,7 @@ TIndexInfo::TIndexInfo(const TString& name, ui32 id)
{}
bool TIndexInfo::CheckCompatible(const TIndexInfo& other) const {
- if (!other.GetReplaceKey()->Equals(GetReplaceKey())) {
- return false;
- }
- if (!other.GetIndexKey()->Equals(GetIndexKey())) {
+ if (!other.GetPrimaryKey()->Equals(GetPrimaryKey())) {
return false;
}
return true;
@@ -240,19 +237,14 @@ void TIndexInfo::SetAllKeys() {
/// @note Setting replace and sorting key to PK we are able to:
/// * apply REPLACE by MergeSort
/// * apply PK predicate before REPLACE
- const auto& primaryKeyNames = NamesOnly(GetPrimaryKey());
+ const auto& primaryKeyNames = NamesOnly(GetPrimaryKeyColumns());
// Update set of required columns with names from primary key.
for (const auto& name: primaryKeyNames) {
RequiredColumns.insert(name);
}
-
- std::vector<std::shared_ptr<arrow::Field>> fields;
- if (primaryKeyNames.size()) {
- SortingKey = ArrowSchema(primaryKeyNames);
- ReplaceKey = SortingKey;
- fields = ReplaceKey->fields();
- IndexKey = ReplaceKey;
- }
+ AFL_VERIFY(primaryKeyNames.size());
+ PrimaryKey = ArrowSchema(primaryKeyNames);
+ std::vector<std::shared_ptr<arrow::Field>> fields = PrimaryKey->fields();
fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()));
fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64()));
@@ -271,8 +263,8 @@ void TIndexInfo::SetAllKeys() {
}
std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const {
- if (GetSortingKey()) {
- auto key = GetExtendedKey(); // Sort with extended key, greater snapshot first
+ if (GetPrimaryKey()) {
+ auto key = ExtendedKey; // Sort with extended key, greater snapshot first
Y_ABORT_UNLESS(key && key->num_fields() > 2);
auto description = std::make_shared<NArrow::TSortDescription>(key);
description->Directions[key->num_fields() - 1] = -1;
@@ -284,10 +276,10 @@ std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const {
}
std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortReplaceDescription() const {
- if (GetSortingKey()) {
- auto key = GetExtendedKey(); // Sort with extended key, greater snapshot first
+ if (GetPrimaryKey()) {
+ auto key = ExtendedKey; // Sort with extended key, greater snapshot first
Y_ABORT_UNLESS(key && key->num_fields() > 2);
- auto description = std::make_shared<NArrow::TSortDescription>(key, GetReplaceKey());
+ auto description = std::make_shared<NArrow::TSortDescription>(key, GetPrimaryKey());
description->Directions[key->num_fields() - 1] = -1;
description->Directions[key->num_fields() - 2] = -1;
description->NotNull = true; // TODO
diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h
index 4d27e29ebe..b73bf480c1 100644
--- a/ydb/core/tx/columnshard/engines/scheme/index_info.h
+++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h
@@ -130,7 +130,7 @@ public:
std::vector<TNameTypeInfo> GetColumns(const std::vector<ui32>& ids) const;
/// Traditional Primary Key (includes uniqueness, search and sorting logic)
- std::vector<TNameTypeInfo> GetPrimaryKey() const {
+ std::vector<TNameTypeInfo> GetPrimaryKeyColumns() const {
return GetColumns(KeyColumns);
}
@@ -143,10 +143,8 @@ public:
// Sorting key: could be less or greater then traditional PK
// It could be empty for append-only tables. It could be greater then PK for better columns compression.
// If sorting key includes uniqueness key as a prefix we are able to use MergeSort for REPLACE.
- const std::shared_ptr<arrow::Schema>& GetSortingKey() const { return SortingKey; }
- const std::shared_ptr<arrow::Schema>& GetReplaceKey() const { return ReplaceKey; }
- const std::shared_ptr<arrow::Schema>& GetExtendedKey() const { return ExtendedKey; }
- const std::shared_ptr<arrow::Schema>& GetIndexKey() const { return IndexKey; }
+ const std::shared_ptr<arrow::Schema>& GetReplaceKey() const { return PrimaryKey; }
+ const std::shared_ptr<arrow::Schema>& GetPrimaryKey() const { return PrimaryKey; }
/// Initializes sorting, replace, index and extended keys.
void SetAllKeys();
@@ -180,12 +178,9 @@ public:
bool AllowTtlOverColumn(const TString& name) const;
/// Returns whether the sorting keys defined.
- bool IsSorted() const { return SortingKey.get(); }
+ bool IsSorted() const { return true; }
bool IsSortedColumn(const ui32 columnId) const { return GetPKFirstColumnId() == columnId; }
- /// Returns whether the replace keys defined.
- bool IsReplacing() const { return ReplaceKey.get(); }
-
std::shared_ptr<NArrow::TSortDescription> SortDescription() const;
std::shared_ptr<NArrow::TSortDescription> SortReplaceDescription() const;
@@ -216,10 +211,8 @@ private:
TString Name;
std::shared_ptr<arrow::Schema> Schema;
std::shared_ptr<arrow::Schema> SchemaWithSpecials;
- std::shared_ptr<arrow::Schema> SortingKey;
- std::shared_ptr<arrow::Schema> ReplaceKey;
+ std::shared_ptr<arrow::Schema> PrimaryKey;
std::shared_ptr<arrow::Schema> ExtendedKey; // Extend PK with snapshot columns to allow old shapshot reads
- std::shared_ptr<arrow::Schema> IndexKey;
THashSet<TString> RequiredColumns;
THashSet<ui32> MinMaxIdxColumnsIds;
std::optional<NArrow::TCompression> DefaultCompression;