diff options
author | Valery Mironov <[email protected]> | 2025-02-19 16:53:37 +0300 |
---|---|---|
committer | GitHub <[email protected]> | 2025-02-19 16:53:37 +0300 |
commit | 02fadfaefecfaa8fe7772bd4f73bb62cb7acf16d (patch) | |
tree | 54ab5b485341b66cca2df9feb69902d203fa4917 | |
parent | 78556e0c96d2ea5e796c08d855a2c2e648d534ac (diff) |
Replace uint32 with uint64 for cluster id in vector index (#14576)
18 files changed, 123 insertions, 108 deletions
diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index ebad4a127bd..55652ac78d5 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -1,5 +1,7 @@ #pragma once +#include <ydb/public/api/protos/ydb_value.pb.h> +#include <ydb/public/lib/scheme_types/scheme_type_id.h> #include <ydb/core/protos/flat_scheme_op.pb.h> #include <util/generic/hash_set.h> @@ -35,5 +37,10 @@ std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType inde bool IsImplTable(std::string_view tableName); bool IsBuildImplTable(std::string_view tableName); +using TClusterId = ui64; + +inline constexpr auto ClusterIdType = Ydb::Type::UINT64; +inline constexpr const char* ClusterIdTypeName = "Uint64"; + } } diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp index d6e32bab338..258012939b0 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp @@ -425,10 +425,10 @@ TExprBase DoRewriteTopSortOverKMeansTree( // TODO(mbkkt) How to inline construction of these constants to construction of readLevel0? auto fromValues = ctx.Builder(pos) - .Callable("Uint32").Atom(0, "0", TNodeFlags::Default).Seal() + .Callable(NTableIndex::ClusterIdTypeName).Atom(0, "0", TNodeFlags::Default).Seal() .Build(); auto toValues = ctx.Builder(pos) - .Callable("Uint32").Atom(0, "1", TNodeFlags::Default).Seal() + .Callable(NTableIndex::ClusterIdTypeName).Atom(0, "1", TNodeFlags::Default).Seal() .Build(); auto levelLambda = [&] { diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index 863ea2340d2..708492d91dc 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -1548,10 +1548,10 @@ message TEvLocalKMeansRequest { optional uint32 NeedsRounds = 14; // id of parent cluster - optional uint32 ParentFrom = 15; - optional uint32 ParentTo = 21; + optional uint64 ParentFrom = 15; + optional uint64 ParentTo = 21; // [Child ... Child + K * (ParentFrom - ParentTo + 1)) ids reserved for this kmeans clusters - optional uint32 Child = 16; + optional uint64 Child = 16; optional string LevelName = 17; optional string PostingName = 18; @@ -1599,9 +1599,9 @@ message TEvReshuffleKMeansRequest { optional TEvLocalKMeansRequest.EState Upload = 9; // id of parent cluster - optional uint32 Parent = 10; + optional uint64 Parent = 10; // [Child ... Child + ClustersSize) ids of this kmeans clusters - optional uint32 Child = 11; + optional uint64 Child = 11; // centroids of clusters repeated string Clusters = 12; diff --git a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp index b0a5b582297..df2e8509447 100644 --- a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp +++ b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp @@ -1,3 +1,4 @@ +#include <ydb/core/base/table_index.h> #include <ydb/core/testlib/test_client.h> #include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h> #include <ydb/core/tx/schemeshard/schemeshard.h> @@ -91,7 +92,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { } static std::tuple<TString, TString> DoLocalKMeans( - Tests::TServer::TPtr server, TActorId sender, ui32 parent, ui64 seed, ui64 k, + Tests::TServer::TPtr server, TActorId sender, NTableIndex::TClusterId parent, ui64 seed, ui64 k, NKikimrTxDataShard::TEvLocalKMeansRequest::EState upload, VectorIndexSettings::VectorType type, VectorIndexSettings::Metric metric) { @@ -185,8 +186,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {ParentColumn, "Uint32", true, true}, - {IdColumn, "Uint32", true, true}, + {ParentColumn, NTableIndex::ClusterIdTypeName, true, true}, + {IdColumn, NTableIndex::ClusterIdTypeName, true, true}, {CentroidColumn, "String", false, true}, }); CreateShardedTable(server, sender, "/Root", "table-level", options); @@ -196,7 +197,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {ParentColumn, "Uint32", true, true}, + {ParentColumn, NTableIndex::ClusterIdTypeName, true, true}, {"key", "Uint32", true, true}, {"data", "String", false, false}, }); @@ -208,7 +209,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {ParentColumn, "Uint32", true, true}, + {ParentColumn, NTableIndex::ClusterIdTypeName, true, true}, {"key", "Uint32", true, true}, {"embedding", "String", false, false}, {"data", "String", false, false}, diff --git a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp index 63f16142b68..cc455de7fad 100644 --- a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp +++ b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp @@ -1,3 +1,4 @@ +#include <ydb/core/base/table_index.h> #include <ydb/core/testlib/test_client.h> #include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h> #include <ydb/core/tx/schemeshard/schemeshard.h> @@ -84,7 +85,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { } } - static TString DoReshuffleKMeans(Tests::TServer::TPtr server, TActorId sender, ui32 parent, + static TString DoReshuffleKMeans(Tests::TServer::TPtr server, TActorId sender, NTableIndex::TClusterId parent, const std::vector<TString>& level, NKikimrTxDataShard::TEvLocalKMeansRequest::EState upload, VectorIndexSettings::VectorType type, VectorIndexSettings::Metric metric) @@ -171,7 +172,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {ParentColumn, "Uint32", true, true}, + {ParentColumn, NTableIndex::ClusterIdTypeName, true, true}, {"key", "Uint32", true, true}, {"data", "String", false, false}, }); @@ -183,7 +184,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {ParentColumn, "Uint32", true, true}, + {ParentColumn, NTableIndex::ClusterIdTypeName, true, true}, {"key", "Uint32", true, true}, {"embedding", "String", false, false}, {"data", "String", false, false}, diff --git a/ydb/core/tx/datashard/kmeans_helper.cpp b/ydb/core/tx/datashard/kmeans_helper.cpp index 842b583b524..080771c20f2 100644 --- a/ydb/core/tx/datashard/kmeans_helper.cpp +++ b/ydb/core/tx/datashard/kmeans_helper.cpp @@ -5,7 +5,7 @@ namespace NKikimr::NDataShard::NKMeans { -TTableRange CreateRangeFrom(const TUserTable& table, ui32 parent, TCell& from, TCell& to) { +TTableRange CreateRangeFrom(const TUserTable& table, NTableIndex::TClusterId parent, TCell& from, TCell& to) { if (parent == 0) { return table.GetTableRange(); } @@ -28,7 +28,7 @@ NTable::TLead CreateLeadFrom(const TTableRange& range) { return lead; } -void AddRowMain2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row) { +void AddRowMain2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row) { std::array<TCell, 1> cells; cells[0] = TCell::Make(parent); auto pk = TSerializedCellVec::Serialize(cells); @@ -36,7 +36,7 @@ void AddRowMain2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> k buffer.AddRow(TSerializedCellVec{key}, TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(*row)); } -void AddRowMain2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row, +void AddRowMain2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row, ui32 dataPos) { std::array<TCell, 1> cells; @@ -47,7 +47,7 @@ void AddRowMain2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> TSerializedCellVec::Serialize((*row).Slice(dataPos))); } -void AddRowBuild2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row) { +void AddRowBuild2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row) { std::array<TCell, 1> cells; cells[0] = TCell::Make(parent); auto pk = TSerializedCellVec::Serialize(cells); @@ -55,7 +55,7 @@ void AddRowBuild2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> buffer.AddRow(TSerializedCellVec{key}, TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(*row)); } -void AddRowBuild2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row, +void AddRowBuild2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row, ui32 dataPos) { std::array<TCell, 1> cells; @@ -96,7 +96,7 @@ MakeUploadTypes(const TUserTable& table, NKikimrTxDataShard::TEvLocalKMeansReque uploadTypes->reserve(1 + 1 + std::min(table.KeyColumnTypes.size() + data.size(), types.size())); Ydb::Type type; - type.set_type_id(Ydb::Type::UINT32); + type.set_type_id(NTableIndex::ClusterIdType); uploadTypes->emplace_back(NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type); auto addType = [&](const auto& column) { diff --git a/ydb/core/tx/datashard/kmeans_helper.h b/ydb/core/tx/datashard/kmeans_helper.h index 0d24f857fbf..42449519b4d 100644 --- a/ydb/core/tx/datashard/kmeans_helper.h +++ b/ydb/core/tx/datashard/kmeans_helper.h @@ -1,5 +1,6 @@ #pragma once +#include <ydb/core/base/table_index.h> #include <ydb/core/tx/datashard/buffer_data.h> #include <ydb/core/tx/datashard/datashard_user_table.h> #include <ydb/core/tx/datashard/range_ops.h> @@ -48,7 +49,7 @@ Y_PURE_FUNCTION TTriWayDotProduct<TRes> CosineImpl(const ui8* lhs, const ui8* rh return {static_cast<TRes>(ll), static_cast<TRes>(lr), static_cast<TRes>(rr)}; } -TTableRange CreateRangeFrom(const TUserTable& table, ui32 parent, TCell& from, TCell& to); +TTableRange CreateRangeFrom(const TUserTable& table, NTableIndex::TClusterId parent, TCell& from, TCell& to); NTable::TLead CreateLeadFrom(const TTableRange& range); @@ -200,14 +201,14 @@ ui32 FeedEmbedding(const TCalculation<TMetric>& calculation, std::span<const TSt return calculation.FindClosest(clusters, embedding); } -void AddRowMain2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row); +void AddRowMain2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row); -void AddRowMain2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row, +void AddRowMain2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row, ui32 dataPos); -void AddRowBuild2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row); +void AddRowBuild2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row); -void AddRowBuild2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row, +void AddRowBuild2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row, ui32 dataPos); TTags MakeUploadTags(const TUserTable& table, const TProtoStringType& embedding, diff --git a/ydb/core/tx/datashard/local_kmeans.cpp b/ydb/core/tx/datashard/local_kmeans.cpp index f3d2d2c05c6..7c3cda890bc 100644 --- a/ydb/core/tx/datashard/local_kmeans.cpp +++ b/ydb/core/tx/datashard/local_kmeans.cpp @@ -86,8 +86,8 @@ class TLocalKMeansScanBase: public TActor<TLocalKMeansScanBase>, public NTable:: protected: using EState = NKikimrTxDataShard::TEvLocalKMeansRequest; - ui32 Parent = 0; - ui32 Child = 0; + NTableIndex::TClusterId Parent = 0; + NTableIndex::TClusterId Child = 0; ui32 Round = 0; ui32 MaxRounds = 0; @@ -156,7 +156,7 @@ public: return NKikimrServices::TActivity::LOCAL_KMEANS_SCAN_ACTOR; } - TLocalKMeansScanBase(ui64 buildId, const TUserTable& table, TLead&& lead, ui32 parent, ui32 child, + TLocalKMeansScanBase(ui64 buildId, const TUserTable& table, TLead&& lead, NTableIndex::TClusterId parent, NTableIndex::TClusterId child, const NKikimrTxDataShard::TEvLocalKMeansRequest& request, std::shared_ptr<TResult> result) : TActor{&TThis::StateWork} @@ -180,7 +180,7 @@ public: // upload types if (Ydb::Type type; State <= EState::KMEANS) { TargetTypes = std::make_shared<NTxProxy::TUploadTypes>(3); - type.set_type_id(Ydb::Type::UINT32); + type.set_type_id(NTableIndex::ClusterIdType); (*TargetTypes)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type}; (*TargetTypes)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type}; type.set_type_id(Ydb::Type::STRING); @@ -382,7 +382,7 @@ class TLocalKMeansScan final: public TLocalKMeansScanBase, private TCalculation< std::vector<TAggregatedCluster> AggregatedClusters; public: - TLocalKMeansScan(ui64 buildId, const TUserTable& table, TLead&& lead, ui32 parent, ui32 child, NKikimrTxDataShard::TEvLocalKMeansRequest& request, + TLocalKMeansScan(ui64 buildId, const TUserTable& table, TLead&& lead, NTableIndex::TClusterId parent, NTableIndex::TClusterId child, NKikimrTxDataShard::TEvLocalKMeansRequest& request, std::shared_ptr<TResult> result) : TLocalKMeansScanBase{buildId, table, std::move(lead), parent, child, request, std::move(result)} { diff --git a/ydb/core/tx/datashard/reshuffle_kmeans.cpp b/ydb/core/tx/datashard/reshuffle_kmeans.cpp index 2b2c86abf5f..a86d6f83f13 100644 --- a/ydb/core/tx/datashard/reshuffle_kmeans.cpp +++ b/ydb/core/tx/datashard/reshuffle_kmeans.cpp @@ -27,8 +27,8 @@ class TReshuffleKMeansScanBase: public TActor<TReshuffleKMeansScanBase>, public protected: using EState = NKikimrTxDataShard::TEvLocalKMeansRequest; - ui32 Parent = 0; - ui32 Child = 0; + NTableIndex::TClusterId Parent = 0; + NTableIndex::TClusterId Child = 0; ui32 K = 0; diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 486e4ca35f2..87be404573f 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -4523,20 +4523,20 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { // read kmeans tree state { - auto rowset = db.Table<Schema::KMeansTreeState>().Range().Select(); + auto rowset = db.Table<Schema::KMeansTreeProgress>().Range().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - TIndexBuildId id = rowset.GetValue<Schema::KMeansTreeState::Id>(); + TIndexBuildId id = rowset.GetValue<Schema::KMeansTreeProgress::Id>(); const auto* buildInfoPtr = Self->IndexBuilds.FindPtr(id); Y_VERIFY_S(buildInfoPtr, "BuildIndex not found: id# " << id); auto& buildInfo = *buildInfoPtr->Get(); buildInfo.KMeans.Set( - rowset.GetValue<Schema::KMeansTreeState::Level>(), - rowset.GetValue<Schema::KMeansTreeState::Parent>(), - rowset.GetValue<Schema::KMeansTreeState::State>() + rowset.GetValue<Schema::KMeansTreeProgress::Level>(), + rowset.GetValue<Schema::KMeansTreeProgress::Parent>(), + rowset.GetValue<Schema::KMeansTreeProgress::State>() ); buildInfo.Sample.Rows.reserve(buildInfo.KMeans.K * 2); diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp index cd8e43b1102..90a93627e86 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp @@ -293,7 +293,7 @@ void TSchemeShard::PersistBuildIndexForget(NIceDb::TNiceDb& db, const TIndexBuil } if (info.IsBuildVectorIndex()) { - db.Table<Schema::KMeansTreeState>().Key(info.Id).Delete(); + db.Table<Schema::KMeansTreeProgress>().Key(info.Id).Delete(); PersistBuildIndexSampleForget(db, info); } } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index c58daae72cc..e124b34d69c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -61,11 +61,11 @@ static constexpr const char* Name(TIndexBuildInfo::EState state) noexcept { } // return count, parts, step -static std::tuple<ui32, ui32, ui32> ComputeKMeansBoundaries(const NSchemeShard::TTableInfo& tableInfo, const TIndexBuildInfo& buildInfo) { +static std::tuple<NTableIndex::TClusterId, NTableIndex::TClusterId, NTableIndex::TClusterId> ComputeKMeansBoundaries(const NSchemeShard::TTableInfo& tableInfo, const TIndexBuildInfo& buildInfo) { const auto& kmeans = buildInfo.KMeans; Y_ASSERT(kmeans.K != 0); const auto count = TIndexBuildInfo::TKMeans::BinPow(kmeans.K, kmeans.Level); - ui32 step = 1; + NTableIndex::TClusterId step = 1; auto parts = count; auto shards = tableInfo.GetShard2PartitionIdx().size(); if (!buildInfo.KMeans.NeedsAnotherLevel() || count <= 1 || shards <= 1) { @@ -97,8 +97,8 @@ protected: TActorId Uploader; ui32 RetryCount = 0; ui32 RowsBytes = 0; - ui32 Parent = 0; - ui32 Child = 0; + NTableIndex::TClusterId Parent = 0; + NTableIndex::TClusterId Child = 0; NDataShard::TUploadStatus UploadStatus; @@ -108,8 +108,8 @@ public: const TActorId& responseActorId, ui64 buildIndexId, TIndexBuildInfo::TSample::TRows init, - ui32 parent, - ui32 child) + NTableIndex::TClusterId parent, + NTableIndex::TClusterId child) : TargetTable(std::move(targetTable)) , ResponseActorId(responseActorId) , BuildIndexId(buildIndexId) @@ -159,7 +159,7 @@ public: Types = std::make_shared<NTxProxy::TUploadTypes>(3); Ydb::Type type; - type.set_type_id(Ydb::Type::UINT32); + type.set_type_id(NTableIndex::ClusterIdType); (*Types)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type}; (*Types)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type}; type.set_type_id(Ydb::Type::STRING); @@ -766,7 +766,7 @@ private: InitMultiKMeans(buildInfo); return false; } - std::array<NScheme::TTypeInfo, 1> typeInfos{NScheme::NTypeIds::Uint32}; + std::array<NScheme::TTypeInfo, 1> typeInfos{ClusterIdTypeId}; auto range = ParentRange(buildInfo.KMeans.Parent); auto addRestricted = [&] (const auto& idx) { const auto& status = buildInfo.Shards.at(idx); @@ -858,10 +858,10 @@ private: void PersistKMeansState(TTransactionContext& txc, TIndexBuildInfo& buildInfo) { NIceDb::TNiceDb db{txc.DB}; - db.Table<Schema::KMeansTreeState>().Key(buildInfo.Id).Update( - NIceDb::TUpdate<Schema::KMeansTreeState::Level>(buildInfo.KMeans.Level), - NIceDb::TUpdate<Schema::KMeansTreeState::Parent>(buildInfo.KMeans.Parent), - NIceDb::TUpdate<Schema::KMeansTreeState::State>(buildInfo.KMeans.State) + db.Table<Schema::KMeansTreeProgress>().Key(buildInfo.Id).Update( + NIceDb::TUpdate<Schema::KMeansTreeProgress::Level>(buildInfo.KMeans.Level), + NIceDb::TUpdate<Schema::KMeansTreeProgress::State>(buildInfo.KMeans.State), + NIceDb::TUpdate<Schema::KMeansTreeProgress::Parent>(buildInfo.KMeans.Parent) ); } @@ -1184,7 +1184,7 @@ public: return TSerializedTableRange(TSerializedCellVec::Serialize(cells), "", true, false); } - static TSerializedTableRange ParentRange(ui32 parent) { + static TSerializedTableRange ParentRange(NTableIndex::TClusterId parent) { if (parent == 0) { return {}; // empty } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 32f68f1f74a..dd3668c56e1 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3082,12 +3082,12 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { }; ui32 Level = 1; - ui32 Parent = 0; - ui32 ParentEnd = 0; // included - EState State = Sample; - ui32 ChildBegin = 1; // included + NTableIndex::TClusterId Parent = 0; + NTableIndex::TClusterId ParentEnd = 0; // included + + NTableIndex::TClusterId ChildBegin = 1; // included TString ToStr() const { return TStringBuilder() @@ -3097,8 +3097,8 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { << ", State = " << State << " }"; } - static ui32 BinPow(ui32 k, ui32 l) { - ui32 r = 1; + static NTableIndex::TClusterId BinPow(NTableIndex::TClusterId k, ui32 l) { + NTableIndex::TClusterId r = 1; while (l != 0) { if (l % 2 != 0) { r *= k; @@ -3149,7 +3149,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { return true; } - void Set(ui32 level, ui32 parent, ui32 state) { + void Set(ui32 level, NTableIndex::TClusterId parent, ui32 state) { // TODO(mbkkt) make it without cycles while (Level < level) { NextLevel(); @@ -3192,24 +3192,24 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { return name; } - std::pair<ui32, ui32> RangeToBorders(const TSerializedTableRange& range) const { + std::pair<NTableIndex::TClusterId, NTableIndex::TClusterId> RangeToBorders(const TSerializedTableRange& range) const { Y_ASSERT(ParentEnd != 0); - const ui32 maxParent = ParentEnd; - const ui32 levelSize = TKMeans::BinPow(K, Level - 1); + const NTableIndex::TClusterId maxParent = ParentEnd; + const NTableIndex::TClusterId levelSize = TKMeans::BinPow(K, Level - 1); Y_ASSERT(levelSize <= maxParent); - const ui32 minParent = maxParent - levelSize + 1; - const ui32 parentFrom = [&, from = range.From.GetCells()] { + const NTableIndex::TClusterId minParent = maxParent - levelSize + 1; + const NTableIndex::TClusterId parentFrom = [&, from = range.From.GetCells()] { if (!from.empty()) { if (!from[0].IsNull()) { - return from[0].AsValue<ui32>() + static_cast<ui32>(from.size() == 1); + return from[0].AsValue<NTableIndex::TClusterId>() + static_cast<NTableIndex::TClusterId>(from.size() == 1); } } return minParent; }(); - const ui32 parentTo = [&, to = range.To.GetCells()] { + const NTableIndex::TClusterId parentTo = [&, to = range.To.GetCells()] { if (!to.empty()) { if (!to[0].IsNull()) { - return to[0].AsValue<ui32>() - static_cast<ui32>(to.size() != 1 && to[1].IsNull()); + return to[0].AsValue<NTableIndex::TClusterId>() - static_cast<NTableIndex::TClusterId>(to.size() != 1 && to[1].IsNull()); } } return maxParent; @@ -3232,7 +3232,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { auto str = TStringBuilder{} << "{ count: " << cells.size(); if (Parent != 0) { Y_ASSERT(Level != 0); - str << ", parent: " << cells[0].AsValue<ui32>(); + str << ", parent: " << cells[0].AsValue<NTableIndex::TClusterId>(); if (cells.size() != 1 && cells[1].IsNull()) { str << ", pk: null"; } @@ -3408,11 +3408,11 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { } struct TClusterShards { - ui32 From = std::numeric_limits<ui32>::max(); + NTableIndex::TClusterId From = std::numeric_limits<NTableIndex::TClusterId>::max(); TShardIdx Local = InvalidShardIdx; std::vector<TShardIdx> Global; }; - TMap<ui32, TClusterShards> Cluster2Shards; + TMap<NTableIndex::TClusterId, TClusterShards> Cluster2Shards; void AddParent(const TSerializedTableRange& range, TShardIdx shard); diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index 05206d334ea..69d65b30f0a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -2,6 +2,7 @@ #include "schemeshard_types.h" +#include <ydb/core/base/table_index.h> #include <ydb/core/scheme/scheme_pathid.h> #include <ydb/core/protos/tx_datashard.pb.h> #include <ydb/core/protos/tx.pb.h> @@ -11,6 +12,8 @@ namespace NKikimr::NSchemeShard { +inline constexpr auto ClusterIdTypeId = NScheme::NTypeIds::Uint64; + struct Schema : NIceDb::Schema { struct Paths : Table<1> { struct Id : Column<1, NScheme::NTypeIds::Uint64> { using Type = TLocalPathId; }; @@ -1899,18 +1902,20 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns<OwnerPathId, LocalPathId, AlterVersion, Description>; }; - struct KMeansTreeState : Table<112> { + // struct KMeansTreeState : Table<112> -- already was in trunk some time ago, + // it was replaced with KMeansTreeProgress, before anyone really used it + struct KMeansTreeProgress : Table<114> { struct Id : Column<1, NScheme::NTypeIds::Uint64> { using Type = TIndexBuildId; }; struct Level : Column<2, NScheme::NTypeIds::Uint32> {}; - struct Parent : Column<3, NScheme::NTypeIds::Uint32> {}; - struct State : Column<4, NScheme::NTypeIds::Uint32> {}; + struct State : Column<3, NScheme::NTypeIds::Uint32> {}; + struct Parent : Column<4, ClusterIdTypeId> {}; using TKey = TableKey<Id>; using TColumns = TableColumns< Id, Level, - Parent, - State + State, + Parent >; }; @@ -2040,7 +2045,7 @@ struct Schema : NIceDb::Schema { BackgroundSessions, ResourcePool, BackupCollection, - KMeansTreeState, + KMeansTreeProgress, KMeansTreeSample >; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 9805e9e7090..3d3b99bc7af 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -268,8 +268,8 @@ auto CalcVectorKmeansTreePostingImplTableDescImpl( { auto parentColumn = implTableDesc.AddColumns(); parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn); - parentColumn->SetType("Uint32"); - parentColumn->SetTypeId(NScheme::NTypeIds::Uint32); + parentColumn->SetType(NTableIndex::ClusterIdTypeName); + parentColumn->SetTypeId(NSchemeShard::ClusterIdTypeId); parentColumn->SetNotNull(true); } implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::ParentColumn); @@ -304,8 +304,8 @@ auto CalcVectorKmeansTreePrefixImplTableDescImpl( { auto idColumn = implTableDesc.AddColumns(); idColumn->SetName(NTableVectorKmeansTreeIndex::IdColumn); - idColumn->SetType("Uint32"); - idColumn->SetTypeId(NScheme::NTypeIds::Uint32); + idColumn->SetType(NTableIndex::ClusterIdTypeName); + idColumn->SetTypeId(NSchemeShard::ClusterIdTypeId); idColumn->SetNotNull(true); } implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::IdColumn); @@ -346,15 +346,15 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreeLevelImplTableDesc( { auto parentColumn = implTableDesc.AddColumns(); parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn); - parentColumn->SetType("Uint32"); - parentColumn->SetTypeId(NScheme::NTypeIds::Uint32); + parentColumn->SetType(NTableIndex::ClusterIdTypeName); + parentColumn->SetTypeId(NSchemeShard::ClusterIdTypeId); parentColumn->SetNotNull(true); } { auto idColumn = implTableDesc.AddColumns(); idColumn->SetName(NTableVectorKmeansTreeIndex::IdColumn); - idColumn->SetType("Uint32"); - idColumn->SetTypeId(NScheme::NTypeIds::Uint32); + idColumn->SetType(NTableIndex::ClusterIdTypeName); + idColumn->SetTypeId(NSchemeShard::ClusterIdTypeId); idColumn->SetNotNull(true); } { diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 25804068e46..42cfda9f4f4 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -1403,7 +1403,7 @@ TCheckFunc SplitBoundaries(TVector<T>&& expectedBoundaries) { }; } -template TCheckFunc SplitBoundaries<ui32>(TVector<ui32>&&); +template TCheckFunc SplitBoundaries<ui64>(TVector<ui64>&&); TCheckFunc ServerlessComputeResourcesMode(NKikimrSubDomains::EServerlessComputeResourcesMode serverlessComputeResourcesMode) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp index 73d0902c1b8..263621f48f0 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp +++ b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp @@ -250,12 +250,12 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) { UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"( partition_at_keys { split_points { - type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } } - value { items { uint32_value: 12345 } } + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 12345 } } } split_points { - type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } } - value { items { uint32_value: 54321 } } + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 54321 } } } } partitioning_settings { @@ -310,14 +310,14 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) { NLs::PartitionCount(3), NLs::MinPartitionsCountEqual(3), NLs::MaxPartitionsCountEqual(3), - NLs::SplitBoundaries<ui32>({12345, 54321}) + NLs::SplitBoundaries<ui64>({12345, 54321}) }); TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", PostingTable), true, true), { NLs::IsTable, NLs::PartitionCount(3), NLs::MinPartitionsCountEqual(3), NLs::MaxPartitionsCountEqual(3), - NLs::SplitBoundaries<ui32>({12345, 54321}) + NLs::SplitBoundaries<ui64>({12345, 54321}) }); for (size_t i = 0; i != 3; ++i) { diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema index 38b3b510e95..91c9f955e6d 100644 --- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema +++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema @@ -8036,10 +8036,11 @@ } }, { - "TableId": 112, - "TableName": "KMeansTreeState", + "TableId": 113, + "TableName": "KMeansTreeSample", "TableKey": [ - 1 + 1, + 2 ], "ColumnsAdded": [ { @@ -8049,18 +8050,18 @@ }, { "ColumnId": 2, - "ColumnName": "Level", + "ColumnName": "Row", "ColumnType": "Uint32" }, { "ColumnId": 3, - "ColumnName": "Parent", - "ColumnType": "Uint32" + "ColumnName": "Probability", + "ColumnType": "Uint64" }, { "ColumnId": 4, - "ColumnName": "State", - "ColumnType": "Uint32" + "ColumnName": "Data", + "ColumnType": "String" } ], "ColumnsDropped": [], @@ -8092,11 +8093,10 @@ } }, { - "TableId": 113, - "TableName": "KMeansTreeSample", + "TableId": 114, + "TableName": "KMeansTreeProgress", "TableKey": [ - 1, - 2 + 1 ], "ColumnsAdded": [ { @@ -8106,18 +8106,18 @@ }, { "ColumnId": 2, - "ColumnName": "Row", + "ColumnName": "Level", "ColumnType": "Uint32" }, { "ColumnId": 3, - "ColumnName": "Probability", - "ColumnType": "Uint64" + "ColumnName": "State", + "ColumnType": "Uint32" }, { "ColumnId": 4, - "ColumnName": "Data", - "ColumnType": "String" + "ColumnName": "Parent", + "ColumnType": "Uint64" } ], "ColumnsDropped": [], |