diff options
author | chertus <azuikov@ydb.tech> | 2022-12-22 16:12:20 +0300 |
---|---|---|
committer | chertus <azuikov@ydb.tech> | 2022-12-22 16:12:20 +0300 |
commit | ae0d2d0275506871ad5bc8c40f54efc0c5a93b91 (patch) | |
tree | 698a5cfefdbf8b82d4c485e17c6256dd026702f6 | |
parent | 0ecb69ad9d422ed676fd41df1741115ce62d40b6 (diff) | |
download | ydb-ae0d2d0275506871ad5bc8c40f54efc0c5a93b91.tar.gz |
enable min-max indexes for all primitive types in ColumnShard
-rw-r--r-- | ydb/core/formats/switch_type.h | 25 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/columnshard_impl.cpp | 8 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/column_engine_logs.cpp | 4 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/index_info.cpp | 33 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/index_info.h | 9 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/scalars.cpp | 3 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/engines/ut_logs_engine.cpp | 2 |
7 files changed, 53 insertions, 31 deletions
diff --git a/ydb/core/formats/switch_type.h b/ydb/core/formats/switch_type.h index c04d59fd24..5c78dc2014 100644 --- a/ydb/core/formats/switch_type.h +++ b/ydb/core/formats/switch_type.h @@ -108,7 +108,7 @@ bool SwitchArrayType(const arrow::Datum& column, TFunc&& f) { * @return Result of execution of callback or false if the type typeId is not supported. */ template <typename TFunc> -bool SwitchYqlTypeToArrowType(NScheme::TTypeInfo typeInfo, TFunc&& callback) { +bool SwitchYqlTypeToArrowType(const NScheme::TTypeInfo& typeInfo, TFunc&& callback) { switch (typeInfo.GetTypeId()) { case NScheme::NTypeIds::Bool: return callback(TTypeWrapper<arrow::BooleanType>()); @@ -162,6 +162,29 @@ bool SwitchYqlTypeToArrowType(NScheme::TTypeInfo typeInfo, TFunc&& callback) { return false; } +inline bool IsPrimitiveYqlType(const NScheme::TTypeInfo& typeInfo) { + switch (typeInfo.GetTypeId()) { + case NScheme::NTypeIds::Int8: + case NScheme::NTypeIds::Uint8: + case NScheme::NTypeIds::Int16: + case NScheme::NTypeIds::Date: + case NScheme::NTypeIds::Uint16: + case NScheme::NTypeIds::Int32: + case NScheme::NTypeIds::Datetime: + case NScheme::NTypeIds::Uint32: + case NScheme::NTypeIds::Int64: + case NScheme::NTypeIds::Uint64: + case NScheme::NTypeIds::Float: + case NScheme::NTypeIds::Double: + case NScheme::NTypeIds::Timestamp: + case NScheme::NTypeIds::Interval: + return true; + default: + break; + } + return false; +} + template <typename T> bool Append(arrow::ArrayBuilder& builder, const typename T::c_type& value) { using TBuilder = typename arrow::TypeTraits<T>::BuilderType; diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index d4b6cd0d85..d3afee8194 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -650,10 +650,6 @@ void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, void TColumnShard::SetPrimaryIndex(TMap<NOlap::TSnapshot, NOlap::TIndexInfo>&& schemaVersions) { for (auto& [snap, indexInfo] : schemaVersions) { - for (auto& columnName : Ttl.TtlColumns()) { - indexInfo.AddTtlColumn(columnName); - } - if (!PrimaryIndex) { PrimaryIndex = std::make_unique<NOlap::TColumnEngineForLogs>(std::move(indexInfo), TabletID()); SetCounter(COUNTER_INDEXES, 1); @@ -661,6 +657,10 @@ void TColumnShard::SetPrimaryIndex(TMap<NOlap::TSnapshot, NOlap::TIndexInfo>&& s PrimaryIndex->UpdateDefaultSchema(snap, std::move(indexInfo)); } } + + for (auto& columnName : Ttl.TtlColumns()) { + PrimaryIndex->GetIndexInfo().CheckTtlColumn(columnName); + } } void TColumnShard::EnqueueBackgroundActivities(bool periodic, bool insertOnly) { diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index ea93b8b39a..e45fecdb8f 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -399,7 +399,7 @@ TColumnEngineForLogs::TColumnEngineForLogs(TIndexInfo&& info, ui64 tabletId, con /// @note Setting replace and sorting key to PK we are able to: /// * apply REPLACE by MergeSort /// * apply PK predicate before REPLACE - IndexInfo.SetAllKeys(IndexInfo.GetPK(), {0}); + IndexInfo.SetAllKeys(); auto& indexKey = IndexInfo.GetIndexKey(); Y_VERIFY(indexKey->num_fields() == 1); @@ -534,7 +534,7 @@ void TColumnEngineForLogs::UpdateDefaultSchema(const TSnapshot& snapshot, TIndex Y_UNUSED(snapshot); IndexInfo = std::move(info); // copied from constructor above - IndexInfo.SetAllKeys(IndexInfo.GetPK(), {0}); + IndexInfo.SetAllKeys(); } bool TColumnEngineForLogs::Load(IDbWrapper& db, const THashSet<ui64>& pathsToDrop) { diff --git a/ydb/core/tx/columnshard/engines/index_info.cpp b/ydb/core/tx/columnshard/engines/index_info.cpp index 5eff70619a..64e9191987 100644 --- a/ydb/core/tx/columnshard/engines/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/index_info.cpp @@ -197,27 +197,30 @@ std::shared_ptr<arrow::Field> TIndexInfo::ArrowColumnField(ui32 columnId) const return ArrowSchema()->GetFieldByName(columnName); } -void TIndexInfo::SetAllKeys(const TVector<TString>& columns, const TVector<int>& indexKeyPos) { - AddRequiredColumns(columns); - MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); +void TIndexInfo::SetAllKeys() { + auto pk = NamesOnly(GetPK()); + AddRequiredColumns(pk); - std::vector<std::shared_ptr<arrow::Field>> fileds; - if (columns.size()) { - SortingKey = ArrowSchema(columns); + std::vector<std::shared_ptr<arrow::Field>> fields; + if (pk.size()) { + SortingKey = ArrowSchema(pk); ReplaceKey = SortingKey; - fileds = ReplaceKey->fields(); + fields = ReplaceKey->fields(); + + std::vector<std::shared_ptr<arrow::Field>> indexFields = { fields[0] }; + IndexKey = std::make_shared<arrow::Schema>(indexFields); } - fileds.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64())); - fileds.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); - ExtendedKey = std::make_shared<arrow::Schema>(fileds); + fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64())); + fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); + ExtendedKey = std::make_shared<arrow::Schema>(fields); - std::vector<std::shared_ptr<arrow::Field>> indexFields; - indexFields.reserve(indexKeyPos.size()); - for (int pos : indexKeyPos) { - indexFields.push_back(fileds[pos]); + for (auto& [colId, column] : Columns) { + if (NArrow::IsPrimitiveYqlType(column.PType)) { + MinMaxIdxColumnsIds.insert(colId); + } } - IndexKey = std::make_shared<arrow::Schema>(indexFields); + MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); } std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const { diff --git a/ydb/core/tx/columnshard/engines/index_info.h b/ydb/core/tx/columnshard/engines/index_info.h index 68855e43c6..0efd5c562c 100644 --- a/ydb/core/tx/columnshard/engines/index_info.h +++ b/ydb/core/tx/columnshard/engines/index_info.h @@ -139,14 +139,11 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { const std::shared_ptr<arrow::Schema>& GetExtendedKey() const { return ExtendedKey; } const std::shared_ptr<arrow::Schema>& GetIndexKey() const { return IndexKey; } - void SetAllKeys(const TVector<TString>& columns, const TVector<int>& indexKeyPos); - void SetAllKeys(const TVector<std::pair<TString, NScheme::TTypeInfo>>& columns, const TVector<int>& indexKeyPos) { - SetAllKeys(NamesOnly(columns), indexKeyPos); - } + void SetAllKeys(); - void AddTtlColumn(const TString& ttlColumn) { + void CheckTtlColumn(const TString& ttlColumn) const { Y_VERIFY(!ttlColumn.empty()); - MinMaxIdxColumnsIds.insert(GetColumnId(ttlColumn)); + Y_VERIFY(MinMaxIdxColumnsIds.count(GetColumnId(ttlColumn))); } TVector<TRawTypeValue> ExtractKey(const THashMap<ui32, TCell>& fields, bool allowNulls = false) const; diff --git a/ydb/core/tx/columnshard/engines/scalars.cpp b/ydb/core/tx/columnshard/engines/scalars.cpp index 3b45e29364..d23b9e7e72 100644 --- a/ydb/core/tx/columnshard/engines/scalars.cpp +++ b/ydb/core/tx/columnshard/engines/scalars.cpp @@ -45,8 +45,7 @@ void ScalarToConstant(const arrow::Scalar& scalar, NKikimrSSA::TProgram_TConstan value.SetInt64(static_cast<const arrow::Date64Scalar&>(scalar).value); break; case arrow::Type::TIMESTAMP: - // TODO: signed timestamps - value.SetUint64(static_cast<const arrow::TimestampScalar&>(scalar).value); + value.SetInt64(static_cast<const arrow::TimestampScalar&>(scalar).value); break; case arrow::Type::TIME32: value.SetInt32(static_cast<const arrow::Time32Scalar&>(scalar).value); diff --git a/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp index b5e6188cd4..8eebd177c1 100644 --- a/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp @@ -188,7 +188,7 @@ TIndexInfo TestTableInfo(const TVector<std::pair<TString, TTypeInfo>>& ydbSchema indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[keyName]); } - indexInfo.AddTtlColumn("timestamp"); + indexInfo.SetAllKeys(); return indexInfo; } |