aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorivanmorozov <ivanmorozov@ydb.tech>2023-12-06 01:33:57 +0300
committerivanmorozov <ivanmorozov@ydb.tech>2023-12-06 02:14:13 +0300
commit2b27731e53b45e8ebb704a0deda15ce0527e7f79 (patch)
tree5f10b8a0f5170a1dfbc9d22c44c8baacbb112815
parent525f68b21befc656ea5f25bb7e30035d2d350768 (diff)
downloadydb-2b27731e53b45e8ebb704a0deda15ce0527e7f79.tar.gz
KIKIMR-20406: Fix ArrowSchema initialization
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp7
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h6
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/index_info.cpp43
-rw-r--r--ydb/core/tx/columnshard/engines/scheme/index_info.h7
4 files changed, 39 insertions, 24 deletions
diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp
index 2abca9e067..6f72ad5968 100644
--- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp
+++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp
@@ -1,6 +1,7 @@
#include "abstract_scheme.h"
#include <ydb/core/tx/columnshard/engines/index_info.h>
+#include <util/string/join.h>
namespace NKikimr::NOlap {
@@ -103,4 +104,10 @@ std::shared_ptr<arrow::RecordBatch> ISnapshotSchema::PrepareForInsert(const TStr
return batch;
}
+ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const {
+ auto id = GetColumnIdOptional(columnName);
+ AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names()));
+ return *id;
+}
+
}
diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h
index f97b265648..2f04897193 100644
--- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h
+++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h
@@ -45,11 +45,7 @@ public:
virtual std::optional<ui32> GetColumnIdOptional(const std::string& columnName) const = 0;
virtual int GetFieldIndex(const ui32 columnId) const = 0;
- ui32 GetColumnId(const std::string& columnName) const {
- auto id = GetColumnIdOptional(columnName);
- AFL_VERIFY(id);
- return *id;
- }
+ ui32 GetColumnId(const std::string& columnName) const;
std::shared_ptr<arrow::Field> GetFieldByIndex(const int index) const;
std::shared_ptr<arrow::Field> GetFieldByColumnId(const ui32 columnId) const;
std::shared_ptr<arrow::Field> GetFieldByColumnIdVerified(const ui32 columnId) const {
diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp
index 21dd75d4af..e7e7917726 100644
--- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp
+++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp
@@ -127,27 +127,26 @@ std::vector<TNameTypeInfo> TIndexInfo::GetColumns(const std::vector<ui32>& ids)
return NOlap::GetColumns(*this, ids);
}
-std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchema() const {
- if (!Schema) {
- std::vector<ui32> ids;
- ids.reserve(Columns.size());
- for (const auto& [id, _] : Columns) {
- ids.push_back(id);
- }
-
- // The ids had a set type before so we keep them sorted.
- std::sort(ids.begin(), ids.end());
- Schema = MakeArrowSchema(Columns, ids);
+void TIndexInfo::BuildArrowSchema() {
+ AFL_VERIFY(!Schema);
+ std::vector<ui32> ids;
+ ids.reserve(Columns.size());
+ for (const auto& [id, _] : Columns) {
+ ids.push_back(id);
}
- return Schema;
+ // The ids had a set type before so we keep them sorted.
+ std::sort(ids.begin(), ids.end());
+ Schema = MakeArrowSchema(Columns, ids);
}
-std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchemaWithSpecials() const {
- if (SchemaWithSpecials) {
- return SchemaWithSpecials;
- }
+std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchema() const {
+ AFL_VERIFY(Schema);
+ return Schema;
+}
+void TIndexInfo::BuildSchemaWithSpecials() {
+ AFL_VERIFY(!SchemaWithSpecials);
const auto& schema = ArrowSchema();
std::vector<std::shared_ptr<arrow::Field>> extended;
@@ -160,6 +159,10 @@ std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchemaWithSpecials() const {
extended.insert(extended.end(), schema->fields().begin(), schema->fields().end());
SchemaWithSpecials = std::make_shared<arrow::Schema>(std::move(extended));
+}
+
+std::shared_ptr<arrow::Schema> TIndexInfo::ArrowSchemaWithSpecials() const {
+ AFL_VERIFY(SchemaWithSpecials);
return SchemaWithSpecials;
}
@@ -244,6 +247,11 @@ void TIndexInfo::SetAllKeys() {
}
}
MinMaxIdxColumnsIds.insert(GetPKFirstColumnId());
+ if (!Schema) {
+ AFL_VERIFY(!SchemaWithSpecials);
+ BuildArrowSchema();
+ BuildSchemaWithSpecials();
+ }
}
std::shared_ptr<NArrow::TSortDescription> TIndexInfo::SortDescription() const {
@@ -360,6 +368,8 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema&
Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull);
ColumnNames[name] = id;
}
+ BuildArrowSchema();
+ BuildSchemaWithSpecials();
for (const auto& col : schema.GetColumns()) {
std::optional<TColumnFeatures> cFeatures = TColumnFeatures::BuildFromProto(col, *this);
if (!cFeatures) {
@@ -382,7 +392,6 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema&
}
DefaultCompression = *result;
}
-
Version = schema.GetVersion();
return true;
}
diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h
index a50e3dbeef..97dffcfa60 100644
--- a/ydb/core/tx/columnshard/engines/scheme/index_info.h
+++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h
@@ -37,6 +37,9 @@ private:
TIndexInfo(const TString& name, ui32 id);
bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema);
TColumnFeatures& GetOrCreateColumnFeatures(const ui32 columnId) const;
+ void BuildSchemaWithSpecials();
+ void BuildArrowSchema();
+
public:
static constexpr const char* SPEC_COL_PLAN_STEP = "_yql_plan_step";
static constexpr const char* SPEC_COL_TX_ID = "_yql_tx_id";
@@ -207,8 +210,8 @@ private:
ui32 Id;
ui64 Version = 0;
TString Name;
- mutable std::shared_ptr<arrow::Schema> Schema;
- mutable std::shared_ptr<arrow::Schema> SchemaWithSpecials;
+ std::shared_ptr<arrow::Schema> Schema;
+ std::shared_ptr<arrow::Schema> SchemaWithSpecials;
std::shared_ptr<arrow::Schema> SortingKey;
std::shared_ptr<arrow::Schema> ReplaceKey;
std::shared_ptr<arrow::Schema> ExtendedKey; // Extend PK with snapshot columns to allow old shapshot reads