aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchertus <azuikov@ydb.tech>2022-12-22 16:12:20 +0300
committerchertus <azuikov@ydb.tech>2022-12-22 16:12:20 +0300
commitae0d2d0275506871ad5bc8c40f54efc0c5a93b91 (patch)
tree698a5cfefdbf8b82d4c485e17c6256dd026702f6
parent0ecb69ad9d422ed676fd41df1741115ce62d40b6 (diff)
downloadydb-ae0d2d0275506871ad5bc8c40f54efc0c5a93b91.tar.gz
enable min-max indexes for all primitive types in ColumnShard
-rw-r--r--ydb/core/formats/switch_type.h25
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.cpp8
-rw-r--r--ydb/core/tx/columnshard/engines/column_engine_logs.cpp4
-rw-r--r--ydb/core/tx/columnshard/engines/index_info.cpp33
-rw-r--r--ydb/core/tx/columnshard/engines/index_info.h9
-rw-r--r--ydb/core/tx/columnshard/engines/scalars.cpp3
-rw-r--r--ydb/core/tx/columnshard/engines/ut_logs_engine.cpp2
7 files changed, 53 insertions, 31 deletions
diff --git a/ydb/core/formats/switch_type.h b/ydb/core/formats/switch_type.h
index c04d59fd24..5c78dc2014 100644
--- a/ydb/core/formats/switch_type.h
+++ b/ydb/core/formats/switch_type.h
@@ -108,7 +108,7 @@ bool SwitchArrayType(const arrow::Datum& column, TFunc&& f) {
* @return Result of execution of callback or false if the type typeId is not supported.
*/
template <typename TFunc>
-bool SwitchYqlTypeToArrowType(NScheme::TTypeInfo typeInfo, TFunc&& callback) {
+bool SwitchYqlTypeToArrowType(const NScheme::TTypeInfo& typeInfo, TFunc&& callback) {
switch (typeInfo.GetTypeId()) {
case NScheme::NTypeIds::Bool:
return callback(TTypeWrapper<arrow::BooleanType>());
@@ -162,6 +162,29 @@ bool SwitchYqlTypeToArrowType(NScheme::TTypeInfo typeInfo, TFunc&& callback) {
return false;
}
+inline bool IsPrimitiveYqlType(const NScheme::TTypeInfo& typeInfo) {
+ switch (typeInfo.GetTypeId()) {
+ case NScheme::NTypeIds::Int8:
+ case NScheme::NTypeIds::Uint8:
+ case NScheme::NTypeIds::Int16:
+ case NScheme::NTypeIds::Date:
+ case NScheme::NTypeIds::Uint16:
+ case NScheme::NTypeIds::Int32:
+ case NScheme::NTypeIds::Datetime:
+ case NScheme::NTypeIds::Uint32:
+ case NScheme::NTypeIds::Int64:
+ case NScheme::NTypeIds::Uint64:
+ case NScheme::NTypeIds::Float:
+ case NScheme::NTypeIds::Double:
+ case NScheme::NTypeIds::Timestamp:
+ case NScheme::NTypeIds::Interval:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
template <typename T>
bool Append(arrow::ArrayBuilder& builder, const typename T::c_type& value) {
using TBuilder = typename arrow::TypeTraits<T>::BuilderType;
diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp
index d4b6cd0d85..d3afee8194 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.cpp
+++ b/ydb/core/tx/columnshard/columnshard_impl.cpp
@@ -650,10 +650,6 @@ void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto,
void TColumnShard::SetPrimaryIndex(TMap<NOlap::TSnapshot, NOlap::TIndexInfo>&& schemaVersions) {
for (auto& [snap, indexInfo] : schemaVersions) {
- for (auto& columnName : Ttl.TtlColumns()) {
- indexInfo.AddTtlColumn(columnName);
- }
-
if (!PrimaryIndex) {
PrimaryIndex = std::make_unique<NOlap::TColumnEngineForLogs>(std::move(indexInfo), TabletID());
SetCounter(COUNTER_INDEXES, 1);
@@ -661,6 +657,10 @@ void TColumnShard::SetPrimaryIndex(TMap<NOlap::TSnapshot, NOlap::TIndexInfo>&& s
PrimaryIndex->UpdateDefaultSchema(snap, std::move(indexInfo));
}
}
+
+ for (auto& columnName : Ttl.TtlColumns()) {
+ PrimaryIndex->GetIndexInfo().CheckTtlColumn(columnName);
+ }
}
void TColumnShard::EnqueueBackgroundActivities(bool periodic, bool insertOnly) {
diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
index ea93b8b39a..e45fecdb8f 100644
--- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
+++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
@@ -399,7 +399,7 @@ TColumnEngineForLogs::TColumnEngineForLogs(TIndexInfo&& info, ui64 tabletId, con
/// @note Setting replace and sorting key to PK we are able to:
/// * apply REPLACE by MergeSort
/// * apply PK predicate before REPLACE
- IndexInfo.SetAllKeys(IndexInfo.GetPK(), {0});
+ IndexInfo.SetAllKeys();
auto& indexKey = IndexInfo.GetIndexKey();
Y_VERIFY(indexKey->num_fields() == 1);
@@ -534,7 +534,7 @@ void TColumnEngineForLogs::UpdateDefaultSchema(const TSnapshot& snapshot, TIndex
Y_UNUSED(snapshot);
IndexInfo = std::move(info);
// copied from constructor above
- IndexInfo.SetAllKeys(IndexInfo.GetPK(), {0});
+ IndexInfo.SetAllKeys();
}
bool TColumnEngineForLogs::Load(IDbWrapper& db, const THashSet<ui64>& pathsToDrop) {
diff --git a/ydb/core/tx/columnshard/engines/index_info.cpp b/ydb/core/tx/columnshard/engines/index_info.cpp
index 5eff70619a..64e9191987 100644
--- a/ydb/core/tx/columnshard/engines/index_info.cpp
+++ b/ydb/core/tx/columnshard/engines/index_info.cpp
@@ -197,27 +197,30 @@ std::shared_ptr<arrow::Field> TIndexInfo::ArrowColumnField(ui32 columnId) const
return ArrowSchema()->GetFieldByName(columnName);
}
-void TIndexInfo::SetAllKeys(const TVector<TString>& columns, const TVector<int>& indexKeyPos) {
- AddRequiredColumns(columns);
- MinMaxIdxColumnsIds.insert(GetPKFirstColumnId());
+void TIndexInfo::SetAllKeys() {
+ auto pk = NamesOnly(GetPK());
+ AddRequiredColumns(pk);
- std::vector<std::shared_ptr<arrow::Field>> fileds;
- if (columns.size()) {
- SortingKey = ArrowSchema(columns);
+ std::vector<std::shared_ptr<arrow::Field>> fields;
+ if (pk.size()) {
+ SortingKey = ArrowSchema(pk);
ReplaceKey = SortingKey;
- fileds = ReplaceKey->fields();
+ fields = ReplaceKey->fields();
+
+ std::vector<std::shared_ptr<arrow::Field>> indexFields = { fields[0] };
+ IndexKey = std::make_shared<arrow::Schema>(indexFields);
}
- fileds.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()));
- fileds.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64()));
- ExtendedKey = std::make_shared<arrow::Schema>(fileds);
+ fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()));
+ fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64()));
+ ExtendedKey = std::make_shared<arrow::Schema>(fields);
- std::vector<std::shared_ptr<arrow::Field>> indexFields;
- indexFields.reserve(indexKeyPos.size());
- for (int pos : indexKeyPos) {
- indexFields.push_back(fileds[pos]);
+ for (auto& [colId, column] : Columns) {
+ if (NArrow::IsPrimitiveYqlType(column.PType)) {
+ MinMaxIdxColumnsIds.insert(colId);
+ }
}
- IndexKey = std::make_shared<arrow::Schema>(indexFields);
+ MinMaxIdxColumnsIds.insert(GetPKFirstColumnId());
}
std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const {
diff --git a/ydb/core/tx/columnshard/engines/index_info.h b/ydb/core/tx/columnshard/engines/index_info.h
index 68855e43c6..0efd5c562c 100644
--- a/ydb/core/tx/columnshard/engines/index_info.h
+++ b/ydb/core/tx/columnshard/engines/index_info.h
@@ -139,14 +139,11 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema {
const std::shared_ptr<arrow::Schema>& GetExtendedKey() const { return ExtendedKey; }
const std::shared_ptr<arrow::Schema>& GetIndexKey() const { return IndexKey; }
- void SetAllKeys(const TVector<TString>& columns, const TVector<int>& indexKeyPos);
- void SetAllKeys(const TVector<std::pair<TString, NScheme::TTypeInfo>>& columns, const TVector<int>& indexKeyPos) {
- SetAllKeys(NamesOnly(columns), indexKeyPos);
- }
+ void SetAllKeys();
- void AddTtlColumn(const TString& ttlColumn) {
+ void CheckTtlColumn(const TString& ttlColumn) const {
Y_VERIFY(!ttlColumn.empty());
- MinMaxIdxColumnsIds.insert(GetColumnId(ttlColumn));
+ Y_VERIFY(MinMaxIdxColumnsIds.count(GetColumnId(ttlColumn)));
}
TVector<TRawTypeValue> ExtractKey(const THashMap<ui32, TCell>& fields, bool allowNulls = false) const;
diff --git a/ydb/core/tx/columnshard/engines/scalars.cpp b/ydb/core/tx/columnshard/engines/scalars.cpp
index 3b45e29364..d23b9e7e72 100644
--- a/ydb/core/tx/columnshard/engines/scalars.cpp
+++ b/ydb/core/tx/columnshard/engines/scalars.cpp
@@ -45,8 +45,7 @@ void ScalarToConstant(const arrow::Scalar& scalar, NKikimrSSA::TProgram_TConstan
value.SetInt64(static_cast<const arrow::Date64Scalar&>(scalar).value);
break;
case arrow::Type::TIMESTAMP:
- // TODO: signed timestamps
- value.SetUint64(static_cast<const arrow::TimestampScalar&>(scalar).value);
+ value.SetInt64(static_cast<const arrow::TimestampScalar&>(scalar).value);
break;
case arrow::Type::TIME32:
value.SetInt32(static_cast<const arrow::Time32Scalar&>(scalar).value);
diff --git a/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp
index b5e6188cd4..8eebd177c1 100644
--- a/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp
+++ b/ydb/core/tx/columnshard/engines/ut_logs_engine.cpp
@@ -188,7 +188,7 @@ TIndexInfo TestTableInfo(const TVector<std::pair<TString, TTypeInfo>>& ydbSchema
indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[keyName]);
}
- indexInfo.AddTtlColumn("timestamp");
+ indexInfo.SetAllKeys();
return indexInfo;
}