diff options
author | ivanmorozov <ivanmorozov@ydb.tech> | 2023-12-13 20:30:28 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@ydb.tech> | 2023-12-13 23:26:32 +0300 |
commit | 7873f211fb2af52c9c544317f4d987ae062e2519 (patch) | |
tree | 1fe6cc11b7475c2712fddd5c46e0876f394df026 | |
parent | 58ba8bb28c76ab2173c65407ff16e40d52b7e543 (diff) | |
download | ydb-7873f211fb2af52c9c544317f4d987ae062e2519.tar.gz |
primary keys naming unification
8 files changed, 26 insertions, 47 deletions
diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index d3e7138b5a..c261b70dfa 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -749,7 +749,7 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { auto ydbKey = isIndexStats ? NOlap::GetColumns(PrimaryIndexStatsSchema, PrimaryIndexStatsSchema.KeyColumns) : - indexInfo->GetPrimaryKey(); + indexInfo->GetPrimaryKeyColumns(); for (auto& range: record.GetRanges()) { if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), isIndexStats ? nullptr : indexInfo, ErrorDescription)) { diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 7697e28fb0..d823af9629 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -288,7 +288,7 @@ public: class TVersionedIndex { std::map<TSnapshot, ISnapshotSchema::TPtr> Snapshots; - std::shared_ptr<arrow::Schema> IndexKey; + std::shared_ptr<arrow::Schema> PrimaryKey; std::map<ui64, ISnapshotSchema::TPtr> SnapshotByVersion; ui64 LastSchemaVersion = 0; public: @@ -327,15 +327,15 @@ public: return Snapshots.rbegin()->second; } - const std::shared_ptr<arrow::Schema>& GetIndexKey() const noexcept { - return IndexKey; + const std::shared_ptr<arrow::Schema>& GetPrimaryKey() const noexcept { + return PrimaryKey; } void AddIndex(const TSnapshot& snapshot, TIndexInfo&& indexInfo) { if (Snapshots.empty()) { - IndexKey = indexInfo.GetIndexKey(); + PrimaryKey = indexInfo.GetPrimaryKey(); } else { - Y_ABORT_UNLESS(IndexKey->Equals(indexInfo.GetIndexKey())); + Y_ABORT_UNLESS(PrimaryKey->Equals(indexInfo.GetPrimaryKey())); } auto newVersion = indexInfo.GetVersion(); @@ -358,8 +358,6 @@ public: virtual const TVersionedIndex& GetVersionedIndex() const = 0; virtual const std::shared_ptr<arrow::Schema>& GetReplaceKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetReplaceKey(); } - virtual const std::shared_ptr<arrow::Schema>& GetSortingKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetSortingKey(); } - virtual const std::shared_ptr<arrow::Schema>& GetIndexKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetIndexKey(); } virtual bool HasDataInPathId(const ui64 pathId) const = 0; virtual bool Load(IDbWrapper& db) = 0; diff --git a/ydb/core/tx/columnshard/engines/predicate/range.cpp b/ydb/core/tx/columnshard/engines/predicate/range.cpp index adebf3860b..53e10174b8 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/range.cpp @@ -40,7 +40,7 @@ NKikimr::NArrow::TColumnFilter TPKRangeFilter::BuildFilter(const arrow::Datum& d } bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetIndexKey())) { + if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { const auto& portionEnd = info.IndexKeyEnd(); const int commonSize = std::min(from->Size(), portionEnd.Size()); if (std::is_gt(from->ComparePartNotNull(portionEnd, commonSize))) { @@ -48,7 +48,7 @@ bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo } } - if (auto to = PredicateTo.ExtractKey(indexInfo.GetIndexKey())) { + if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { const auto& portionStart = info.IndexKeyStart(); const int commonSize = std::min(to->Size(), portionStart.Size()); if (std::is_lt(to->ComparePartNotNull(portionStart, commonSize))) { diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp index 83d0ded5f5..729a50ae7a 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp @@ -69,7 +69,7 @@ std::set<ui32> TReadMetadata::GetEarlyFilterColumnIds() const { std::set<ui32> TReadMetadata::GetPKColumnIds() const { std::set<ui32> result; auto& indexInfo = ResultIndexSchema->GetIndexInfo(); - for (auto&& i : indexInfo.GetPrimaryKey()) { + for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second); } return result; diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h index 01ca8575e4..1d26b3399e 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/read_metadata.h @@ -214,16 +214,12 @@ public: return SelectInfo->PortionsOrderedPK.empty() && CommittedBlobs.empty(); } - std::shared_ptr<arrow::Schema> GetSortingKey() const { - return ResultIndexSchema->GetIndexInfo().GetSortingKey(); - } - std::shared_ptr<arrow::Schema> GetReplaceKey() const { return ResultIndexSchema->GetIndexInfo().GetReplaceKey(); } std::vector<TNameTypeInfo> GetKeyYqlSchema() const override { - return ResultIndexSchema->GetIndexInfo().GetPrimaryKey(); + return ResultIndexSchema->GetIndexInfo().GetPrimaryKeyColumns(); } size_t NumIndexedChunks() const { diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp index 7bcfff8a55..f138fc6b5f 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp @@ -78,7 +78,7 @@ std::shared_ptr<arrow::RecordBatch> ISnapshotSchema::PrepareForInsert(const TStr return nullptr; } - const auto& sortingKey = GetIndexInfo().GetSortingKey(); + const auto& sortingKey = GetIndexInfo().GetPrimaryKey(); Y_ABORT_UNLESS(sortingKey); // Check PK is NOT NULL diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index b438621663..82eeb52686 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -29,10 +29,7 @@ TIndexInfo::TIndexInfo(const TString& name, ui32 id) {} bool TIndexInfo::CheckCompatible(const TIndexInfo& other) const { - if (!other.GetReplaceKey()->Equals(GetReplaceKey())) { - return false; - } - if (!other.GetIndexKey()->Equals(GetIndexKey())) { + if (!other.GetPrimaryKey()->Equals(GetPrimaryKey())) { return false; } return true; @@ -240,19 +237,14 @@ void TIndexInfo::SetAllKeys() { /// @note Setting replace and sorting key to PK we are able to: /// * apply REPLACE by MergeSort /// * apply PK predicate before REPLACE - const auto& primaryKeyNames = NamesOnly(GetPrimaryKey()); + const auto& primaryKeyNames = NamesOnly(GetPrimaryKeyColumns()); // Update set of required columns with names from primary key. for (const auto& name: primaryKeyNames) { RequiredColumns.insert(name); } - - std::vector<std::shared_ptr<arrow::Field>> fields; - if (primaryKeyNames.size()) { - SortingKey = ArrowSchema(primaryKeyNames); - ReplaceKey = SortingKey; - fields = ReplaceKey->fields(); - IndexKey = ReplaceKey; - } + AFL_VERIFY(primaryKeyNames.size()); + PrimaryKey = ArrowSchema(primaryKeyNames); + std::vector<std::shared_ptr<arrow::Field>> fields = PrimaryKey->fields(); fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64())); fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); @@ -271,8 +263,8 @@ void TIndexInfo::SetAllKeys() { } std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const { - if (GetSortingKey()) { - auto key = GetExtendedKey(); // Sort with extended key, greater snapshot first + if (GetPrimaryKey()) { + auto key = ExtendedKey; // Sort with extended key, greater snapshot first Y_ABORT_UNLESS(key && key->num_fields() > 2); auto description = std::make_shared<NArrow::TSortDescription>(key); description->Directions[key->num_fields() - 1] = -1; @@ -284,10 +276,10 @@ std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const { } std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortReplaceDescription() const { - if (GetSortingKey()) { - auto key = GetExtendedKey(); // Sort with extended key, greater snapshot first + if (GetPrimaryKey()) { + auto key = ExtendedKey; // Sort with extended key, greater snapshot first Y_ABORT_UNLESS(key && key->num_fields() > 2); - auto description = std::make_shared<NArrow::TSortDescription>(key, GetReplaceKey()); + auto description = std::make_shared<NArrow::TSortDescription>(key, GetPrimaryKey()); description->Directions[key->num_fields() - 1] = -1; description->Directions[key->num_fields() - 2] = -1; description->NotNull = true; // TODO diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index 4d27e29ebe..b73bf480c1 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -130,7 +130,7 @@ public: std::vector<TNameTypeInfo> GetColumns(const std::vector<ui32>& ids) const; /// Traditional Primary Key (includes uniqueness, search and sorting logic) - std::vector<TNameTypeInfo> GetPrimaryKey() const { + std::vector<TNameTypeInfo> GetPrimaryKeyColumns() const { return GetColumns(KeyColumns); } @@ -143,10 +143,8 @@ public: // Sorting key: could be less or greater then traditional PK // It could be empty for append-only tables. It could be greater then PK for better columns compression. // If sorting key includes uniqueness key as a prefix we are able to use MergeSort for REPLACE. - const std::shared_ptr<arrow::Schema>& GetSortingKey() const { return SortingKey; } - const std::shared_ptr<arrow::Schema>& GetReplaceKey() const { return ReplaceKey; } - const std::shared_ptr<arrow::Schema>& GetExtendedKey() const { return ExtendedKey; } - const std::shared_ptr<arrow::Schema>& GetIndexKey() const { return IndexKey; } + const std::shared_ptr<arrow::Schema>& GetReplaceKey() const { return PrimaryKey; } + const std::shared_ptr<arrow::Schema>& GetPrimaryKey() const { return PrimaryKey; } /// Initializes sorting, replace, index and extended keys. void SetAllKeys(); @@ -180,12 +178,9 @@ public: bool AllowTtlOverColumn(const TString& name) const; /// Returns whether the sorting keys defined. - bool IsSorted() const { return SortingKey.get(); } + bool IsSorted() const { return true; } bool IsSortedColumn(const ui32 columnId) const { return GetPKFirstColumnId() == columnId; } - /// Returns whether the replace keys defined. - bool IsReplacing() const { return ReplaceKey.get(); } - std::shared_ptr<NArrow::TSortDescription> SortDescription() const; std::shared_ptr<NArrow::TSortDescription> SortReplaceDescription() const; @@ -216,10 +211,8 @@ private: TString Name; std::shared_ptr<arrow::Schema> Schema; std::shared_ptr<arrow::Schema> SchemaWithSpecials; - std::shared_ptr<arrow::Schema> SortingKey; - std::shared_ptr<arrow::Schema> ReplaceKey; + std::shared_ptr<arrow::Schema> PrimaryKey; std::shared_ptr<arrow::Schema> ExtendedKey; // Extend PK with snapshot columns to allow old shapshot reads - std::shared_ptr<arrow::Schema> IndexKey; THashSet<TString> RequiredColumns; THashSet<ui32> MinMaxIdxColumnsIds; std::optional<NArrow::TCompression> DefaultCompression; |