diff options
author | ivanmorozov <ivanmorozov@ydb.tech> | 2023-12-06 01:33:57 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@ydb.tech> | 2023-12-06 02:14:13 +0300 |
commit | 2b27731e53b45e8ebb704a0deda15ce0527e7f79 (patch) | |
tree | 5f10b8a0f5170a1dfbc9d22c44c8baacbb112815 | |
parent | 525f68b21befc656ea5f25bb7e30035d2d350768 (diff) | |
download | ydb-2b27731e53b45e8ebb704a0deda15ce0527e7f79.tar.gz |
KIKIMR-20406: Fix ArrowSchema initialization
4 files changed, 39 insertions, 24 deletions
diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp index 2abca9e067..6f72ad5968 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp @@ -1,6 +1,7 @@ #include "abstract_scheme.h" #include <ydb/core/tx/columnshard/engines/index_info.h> +#include <util/string/join.h> namespace NKikimr::NOlap { @@ -103,4 +104,10 @@ std::shared_ptr<arrow::RecordBatch> ISnapshotSchema::PrepareForInsert(const TStr return batch; } +ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const { + auto id = GetColumnIdOptional(columnName); + AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names())); + return *id; +} + } diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h index f97b265648..2f04897193 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h @@ -45,11 +45,7 @@ public: virtual std::optional<ui32> GetColumnIdOptional(const std::string& columnName) const = 0; virtual int GetFieldIndex(const ui32 columnId) const = 0; - ui32 GetColumnId(const std::string& columnName) const { - auto id = GetColumnIdOptional(columnName); - AFL_VERIFY(id); - return *id; - } + ui32 GetColumnId(const std::string& columnName) const; std::shared_ptr<arrow::Field> GetFieldByIndex(const int index) const; std::shared_ptr<arrow::Field> GetFieldByColumnId(const ui32 columnId) const; std::shared_ptr<arrow::Field> GetFieldByColumnIdVerified(const ui32 columnId) const { diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index 21dd75d4af..e7e7917726 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -127,27 +127,26 @@ std::vector<TNameTypeInfo> TIndexInfo::GetColumns(const std::vector<ui32>& ids) return NOlap::GetColumns(*this, ids); } -std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchema() const { - if (!Schema) { - std::vector<ui32> ids; - ids.reserve(Columns.size()); - for (const auto& [id, _] : Columns) { - ids.push_back(id); - } - - // The ids had a set type before so we keep them sorted. - std::sort(ids.begin(), ids.end()); - Schema = MakeArrowSchema(Columns, ids); +void TIndexInfo::BuildArrowSchema() { + AFL_VERIFY(!Schema); + std::vector<ui32> ids; + ids.reserve(Columns.size()); + for (const auto& [id, _] : Columns) { + ids.push_back(id); } - return Schema; + // The ids had a set type before so we keep them sorted. + std::sort(ids.begin(), ids.end()); + Schema = MakeArrowSchema(Columns, ids); } -std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchemaWithSpecials() const { - if (SchemaWithSpecials) { - return SchemaWithSpecials; - } +std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchema() const { + AFL_VERIFY(Schema); + return Schema; +} +void TIndexInfo::BuildSchemaWithSpecials() { + AFL_VERIFY(!SchemaWithSpecials); const auto& schema = ArrowSchema(); std::vector<std::shared_ptr<arrow::Field>> extended; @@ -160,6 +159,10 @@ std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchemaWithSpecials() const { extended.insert(extended.end(), schema->fields().begin(), schema->fields().end()); SchemaWithSpecials = std::make_shared<arrow::Schema>(std::move(extended)); +} + +std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchemaWithSpecials() const { + AFL_VERIFY(SchemaWithSpecials); return SchemaWithSpecials; } @@ -244,6 +247,11 @@ void TIndexInfo::SetAllKeys() { } } MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); + if (!Schema) { + AFL_VERIFY(!SchemaWithSpecials); + BuildArrowSchema(); + BuildSchemaWithSpecials(); + } } std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const { @@ -360,6 +368,8 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull); ColumnNames[name] = id; } + BuildArrowSchema(); + BuildSchemaWithSpecials(); for (const auto& col : schema.GetColumns()) { std::optional<TColumnFeatures> cFeatures = TColumnFeatures::BuildFromProto(col, *this); if (!cFeatures) { @@ -382,7 +392,6 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } DefaultCompression = *result; } - Version = schema.GetVersion(); return true; } diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index a50e3dbeef..97dffcfa60 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -37,6 +37,9 @@ private: TIndexInfo(const TString& name, ui32 id); bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema); TColumnFeatures& GetOrCreateColumnFeatures(const ui32 columnId) const; + void BuildSchemaWithSpecials(); + void BuildArrowSchema(); + public: static constexpr const char* SPEC_COL_PLAN_STEP = "_yql_plan_step"; static constexpr const char* SPEC_COL_TX_ID = "_yql_tx_id"; @@ -207,8 +210,8 @@ private: ui32 Id; ui64 Version = 0; TString Name; - mutable std::shared_ptr<arrow::Schema> Schema; - mutable std::shared_ptr<arrow::Schema> SchemaWithSpecials; + std::shared_ptr<arrow::Schema> Schema; + std::shared_ptr<arrow::Schema> SchemaWithSpecials; std::shared_ptr<arrow::Schema> SortingKey; std::shared_ptr<arrow::Schema> ReplaceKey; std::shared_ptr<arrow::Schema> ExtendedKey; // Extend PK with snapshot columns to allow old shapshot reads |