summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValery Mironov <[email protected]>2025-02-19 16:53:37 +0300
committerGitHub <[email protected]>2025-02-19 16:53:37 +0300
commit02fadfaefecfaa8fe7772bd4f73bb62cb7acf16d (patch)
tree54ab5b485341b66cca2df9feb69902d203fa4917
parent78556e0c96d2ea5e796c08d855a2c2e648d534ac (diff)
Replace uint32 with uint64 for cluster id in vector index (#14576)
-rw-r--r--ydb/core/base/table_index.h7
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp4
-rw-r--r--ydb/core/protos/tx_datashard.proto10
-rw-r--r--ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp11
-rw-r--r--ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp7
-rw-r--r--ydb/core/tx/datashard/kmeans_helper.cpp12
-rw-r--r--ydb/core/tx/datashard/kmeans_helper.h11
-rw-r--r--ydb/core/tx/datashard/local_kmeans.cpp10
-rw-r--r--ydb/core/tx/datashard/reshuffle_kmeans.cpp4
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__init.cpp10
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_build_index.cpp2
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp26
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_info_types.h36
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_schema.h17
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_utils.cpp16
-rw-r--r--ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp2
-rw-r--r--ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp12
-rw-r--r--ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema34
18 files changed, 123 insertions, 108 deletions
diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h
index ebad4a127bd..55652ac78d5 100644
--- a/ydb/core/base/table_index.h
+++ b/ydb/core/base/table_index.h
@@ -1,5 +1,7 @@
#pragma once
+#include <ydb/public/api/protos/ydb_value.pb.h>
+#include <ydb/public/lib/scheme_types/scheme_type_id.h>
#include <ydb/core/protos/flat_scheme_op.pb.h>
#include <util/generic/hash_set.h>
@@ -35,5 +37,10 @@ std::span<const std::string_view> GetImplTables(NKikimrSchemeOp::EIndexType inde
bool IsImplTable(std::string_view tableName);
bool IsBuildImplTable(std::string_view tableName);
+using TClusterId = ui64;
+
+inline constexpr auto ClusterIdType = Ydb::Type::UINT64;
+inline constexpr const char* ClusterIdTypeName = "Uint64";
+
}
}
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp
index d6e32bab338..258012939b0 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp
@@ -425,10 +425,10 @@ TExprBase DoRewriteTopSortOverKMeansTree(
// TODO(mbkkt) How to inline construction of these constants to construction of readLevel0?
auto fromValues = ctx.Builder(pos)
- .Callable("Uint32").Atom(0, "0", TNodeFlags::Default).Seal()
+ .Callable(NTableIndex::ClusterIdTypeName).Atom(0, "0", TNodeFlags::Default).Seal()
.Build();
auto toValues = ctx.Builder(pos)
- .Callable("Uint32").Atom(0, "1", TNodeFlags::Default).Seal()
+ .Callable(NTableIndex::ClusterIdTypeName).Atom(0, "1", TNodeFlags::Default).Seal()
.Build();
auto levelLambda = [&] {
diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto
index 863ea2340d2..708492d91dc 100644
--- a/ydb/core/protos/tx_datashard.proto
+++ b/ydb/core/protos/tx_datashard.proto
@@ -1548,10 +1548,10 @@ message TEvLocalKMeansRequest {
optional uint32 NeedsRounds = 14;
// id of parent cluster
- optional uint32 ParentFrom = 15;
- optional uint32 ParentTo = 21;
+ optional uint64 ParentFrom = 15;
+ optional uint64 ParentTo = 21;
// [Child ... Child + K * (ParentFrom - ParentTo + 1)) ids reserved for this kmeans clusters
- optional uint32 Child = 16;
+ optional uint64 Child = 16;
optional string LevelName = 17;
optional string PostingName = 18;
@@ -1599,9 +1599,9 @@ message TEvReshuffleKMeansRequest {
optional TEvLocalKMeansRequest.EState Upload = 9;
// id of parent cluster
- optional uint32 Parent = 10;
+ optional uint64 Parent = 10;
// [Child ... Child + ClustersSize) ids of this kmeans clusters
- optional uint32 Child = 11;
+ optional uint64 Child = 11;
// centroids of clusters
repeated string Clusters = 12;
diff --git a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp
index b0a5b582297..df2e8509447 100644
--- a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp
+++ b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp
@@ -1,3 +1,4 @@
+#include <ydb/core/base/table_index.h>
#include <ydb/core/testlib/test_client.h>
#include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h>
#include <ydb/core/tx/schemeshard/schemeshard.h>
@@ -91,7 +92,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
}
static std::tuple<TString, TString> DoLocalKMeans(
- Tests::TServer::TPtr server, TActorId sender, ui32 parent, ui64 seed, ui64 k,
+ Tests::TServer::TPtr server, TActorId sender, NTableIndex::TClusterId parent, ui64 seed, ui64 k,
NKikimrTxDataShard::TEvLocalKMeansRequest::EState upload, VectorIndexSettings::VectorType type,
VectorIndexSettings::Metric metric)
{
@@ -185,8 +186,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {ParentColumn, "Uint32", true, true},
- {IdColumn, "Uint32", true, true},
+ {ParentColumn, NTableIndex::ClusterIdTypeName, true, true},
+ {IdColumn, NTableIndex::ClusterIdTypeName, true, true},
{CentroidColumn, "String", false, true},
});
CreateShardedTable(server, sender, "/Root", "table-level", options);
@@ -196,7 +197,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {ParentColumn, "Uint32", true, true},
+ {ParentColumn, NTableIndex::ClusterIdTypeName, true, true},
{"key", "Uint32", true, true},
{"data", "String", false, false},
});
@@ -208,7 +209,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {ParentColumn, "Uint32", true, true},
+ {ParentColumn, NTableIndex::ClusterIdTypeName, true, true},
{"key", "Uint32", true, true},
{"embedding", "String", false, false},
{"data", "String", false, false},
diff --git a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp
index 63f16142b68..cc455de7fad 100644
--- a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp
+++ b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp
@@ -1,3 +1,4 @@
+#include <ydb/core/base/table_index.h>
#include <ydb/core/testlib/test_client.h>
#include <ydb/core/tx/datashard/ut_common/datashard_ut_common.h>
#include <ydb/core/tx/schemeshard/schemeshard.h>
@@ -84,7 +85,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
}
}
- static TString DoReshuffleKMeans(Tests::TServer::TPtr server, TActorId sender, ui32 parent,
+ static TString DoReshuffleKMeans(Tests::TServer::TPtr server, TActorId sender, NTableIndex::TClusterId parent,
const std::vector<TString>& level,
NKikimrTxDataShard::TEvLocalKMeansRequest::EState upload,
VectorIndexSettings::VectorType type, VectorIndexSettings::Metric metric)
@@ -171,7 +172,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {ParentColumn, "Uint32", true, true},
+ {ParentColumn, NTableIndex::ClusterIdTypeName, true, true},
{"key", "Uint32", true, true},
{"data", "String", false, false},
});
@@ -183,7 +184,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {ParentColumn, "Uint32", true, true},
+ {ParentColumn, NTableIndex::ClusterIdTypeName, true, true},
{"key", "Uint32", true, true},
{"embedding", "String", false, false},
{"data", "String", false, false},
diff --git a/ydb/core/tx/datashard/kmeans_helper.cpp b/ydb/core/tx/datashard/kmeans_helper.cpp
index 842b583b524..080771c20f2 100644
--- a/ydb/core/tx/datashard/kmeans_helper.cpp
+++ b/ydb/core/tx/datashard/kmeans_helper.cpp
@@ -5,7 +5,7 @@
namespace NKikimr::NDataShard::NKMeans {
-TTableRange CreateRangeFrom(const TUserTable& table, ui32 parent, TCell& from, TCell& to) {
+TTableRange CreateRangeFrom(const TUserTable& table, NTableIndex::TClusterId parent, TCell& from, TCell& to) {
if (parent == 0) {
return table.GetTableRange();
}
@@ -28,7 +28,7 @@ NTable::TLead CreateLeadFrom(const TTableRange& range) {
return lead;
}
-void AddRowMain2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row) {
+void AddRowMain2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row) {
std::array<TCell, 1> cells;
cells[0] = TCell::Make(parent);
auto pk = TSerializedCellVec::Serialize(cells);
@@ -36,7 +36,7 @@ void AddRowMain2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> k
buffer.AddRow(TSerializedCellVec{key}, TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(*row));
}
-void AddRowMain2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
+void AddRowMain2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
ui32 dataPos)
{
std::array<TCell, 1> cells;
@@ -47,7 +47,7 @@ void AddRowMain2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell>
TSerializedCellVec::Serialize((*row).Slice(dataPos)));
}
-void AddRowBuild2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row) {
+void AddRowBuild2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row) {
std::array<TCell, 1> cells;
cells[0] = TCell::Make(parent);
auto pk = TSerializedCellVec::Serialize(cells);
@@ -55,7 +55,7 @@ void AddRowBuild2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell>
buffer.AddRow(TSerializedCellVec{key}, TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(*row));
}
-void AddRowBuild2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
+void AddRowBuild2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
ui32 dataPos)
{
std::array<TCell, 1> cells;
@@ -96,7 +96,7 @@ MakeUploadTypes(const TUserTable& table, NKikimrTxDataShard::TEvLocalKMeansReque
uploadTypes->reserve(1 + 1 + std::min(table.KeyColumnTypes.size() + data.size(), types.size()));
Ydb::Type type;
- type.set_type_id(Ydb::Type::UINT32);
+ type.set_type_id(NTableIndex::ClusterIdType);
uploadTypes->emplace_back(NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type);
auto addType = [&](const auto& column) {
diff --git a/ydb/core/tx/datashard/kmeans_helper.h b/ydb/core/tx/datashard/kmeans_helper.h
index 0d24f857fbf..42449519b4d 100644
--- a/ydb/core/tx/datashard/kmeans_helper.h
+++ b/ydb/core/tx/datashard/kmeans_helper.h
@@ -1,5 +1,6 @@
#pragma once
+#include <ydb/core/base/table_index.h>
#include <ydb/core/tx/datashard/buffer_data.h>
#include <ydb/core/tx/datashard/datashard_user_table.h>
#include <ydb/core/tx/datashard/range_ops.h>
@@ -48,7 +49,7 @@ Y_PURE_FUNCTION TTriWayDotProduct<TRes> CosineImpl(const ui8* lhs, const ui8* rh
return {static_cast<TRes>(ll), static_cast<TRes>(lr), static_cast<TRes>(rr)};
}
-TTableRange CreateRangeFrom(const TUserTable& table, ui32 parent, TCell& from, TCell& to);
+TTableRange CreateRangeFrom(const TUserTable& table, NTableIndex::TClusterId parent, TCell& from, TCell& to);
NTable::TLead CreateLeadFrom(const TTableRange& range);
@@ -200,14 +201,14 @@ ui32 FeedEmbedding(const TCalculation<TMetric>& calculation, std::span<const TSt
return calculation.FindClosest(clusters, embedding);
}
-void AddRowMain2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row);
+void AddRowMain2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row);
-void AddRowMain2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
+void AddRowMain2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
ui32 dataPos);
-void AddRowBuild2Build(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row);
+void AddRowBuild2Build(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row);
-void AddRowBuild2Posting(TBufferData& buffer, ui32 parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
+void AddRowBuild2Posting(TBufferData& buffer, NTableIndex::TClusterId parent, TArrayRef<const TCell> key, const NTable::TRowState& row,
ui32 dataPos);
TTags MakeUploadTags(const TUserTable& table, const TProtoStringType& embedding,
diff --git a/ydb/core/tx/datashard/local_kmeans.cpp b/ydb/core/tx/datashard/local_kmeans.cpp
index f3d2d2c05c6..7c3cda890bc 100644
--- a/ydb/core/tx/datashard/local_kmeans.cpp
+++ b/ydb/core/tx/datashard/local_kmeans.cpp
@@ -86,8 +86,8 @@ class TLocalKMeansScanBase: public TActor<TLocalKMeansScanBase>, public NTable::
protected:
using EState = NKikimrTxDataShard::TEvLocalKMeansRequest;
- ui32 Parent = 0;
- ui32 Child = 0;
+ NTableIndex::TClusterId Parent = 0;
+ NTableIndex::TClusterId Child = 0;
ui32 Round = 0;
ui32 MaxRounds = 0;
@@ -156,7 +156,7 @@ public:
return NKikimrServices::TActivity::LOCAL_KMEANS_SCAN_ACTOR;
}
- TLocalKMeansScanBase(ui64 buildId, const TUserTable& table, TLead&& lead, ui32 parent, ui32 child,
+ TLocalKMeansScanBase(ui64 buildId, const TUserTable& table, TLead&& lead, NTableIndex::TClusterId parent, NTableIndex::TClusterId child,
const NKikimrTxDataShard::TEvLocalKMeansRequest& request,
std::shared_ptr<TResult> result)
: TActor{&TThis::StateWork}
@@ -180,7 +180,7 @@ public:
// upload types
if (Ydb::Type type; State <= EState::KMEANS) {
TargetTypes = std::make_shared<NTxProxy::TUploadTypes>(3);
- type.set_type_id(Ydb::Type::UINT32);
+ type.set_type_id(NTableIndex::ClusterIdType);
(*TargetTypes)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type};
(*TargetTypes)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type};
type.set_type_id(Ydb::Type::STRING);
@@ -382,7 +382,7 @@ class TLocalKMeansScan final: public TLocalKMeansScanBase, private TCalculation<
std::vector<TAggregatedCluster> AggregatedClusters;
public:
- TLocalKMeansScan(ui64 buildId, const TUserTable& table, TLead&& lead, ui32 parent, ui32 child, NKikimrTxDataShard::TEvLocalKMeansRequest& request,
+ TLocalKMeansScan(ui64 buildId, const TUserTable& table, TLead&& lead, NTableIndex::TClusterId parent, NTableIndex::TClusterId child, NKikimrTxDataShard::TEvLocalKMeansRequest& request,
std::shared_ptr<TResult> result)
: TLocalKMeansScanBase{buildId, table, std::move(lead), parent, child, request, std::move(result)}
{
diff --git a/ydb/core/tx/datashard/reshuffle_kmeans.cpp b/ydb/core/tx/datashard/reshuffle_kmeans.cpp
index 2b2c86abf5f..a86d6f83f13 100644
--- a/ydb/core/tx/datashard/reshuffle_kmeans.cpp
+++ b/ydb/core/tx/datashard/reshuffle_kmeans.cpp
@@ -27,8 +27,8 @@ class TReshuffleKMeansScanBase: public TActor<TReshuffleKMeansScanBase>, public
protected:
using EState = NKikimrTxDataShard::TEvLocalKMeansRequest;
- ui32 Parent = 0;
- ui32 Child = 0;
+ NTableIndex::TClusterId Parent = 0;
+ NTableIndex::TClusterId Child = 0;
ui32 K = 0;
diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp
index 486e4ca35f2..87be404573f 100644
--- a/ydb/core/tx/schemeshard/schemeshard__init.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp
@@ -4523,20 +4523,20 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
// read kmeans tree state
{
- auto rowset = db.Table<Schema::KMeansTreeState>().Range().Select();
+ auto rowset = db.Table<Schema::KMeansTreeProgress>().Range().Select();
if (!rowset.IsReady()) {
return false;
}
while (!rowset.EndOfSet()) {
- TIndexBuildId id = rowset.GetValue<Schema::KMeansTreeState::Id>();
+ TIndexBuildId id = rowset.GetValue<Schema::KMeansTreeProgress::Id>();
const auto* buildInfoPtr = Self->IndexBuilds.FindPtr(id);
Y_VERIFY_S(buildInfoPtr, "BuildIndex not found: id# " << id);
auto& buildInfo = *buildInfoPtr->Get();
buildInfo.KMeans.Set(
- rowset.GetValue<Schema::KMeansTreeState::Level>(),
- rowset.GetValue<Schema::KMeansTreeState::Parent>(),
- rowset.GetValue<Schema::KMeansTreeState::State>()
+ rowset.GetValue<Schema::KMeansTreeProgress::Level>(),
+ rowset.GetValue<Schema::KMeansTreeProgress::Parent>(),
+ rowset.GetValue<Schema::KMeansTreeProgress::State>()
);
buildInfo.Sample.Rows.reserve(buildInfo.KMeans.K * 2);
diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp
index cd8e43b1102..90a93627e86 100644
--- a/ydb/core/tx/schemeshard/schemeshard_build_index.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_build_index.cpp
@@ -293,7 +293,7 @@ void TSchemeShard::PersistBuildIndexForget(NIceDb::TNiceDb& db, const TIndexBuil
}
if (info.IsBuildVectorIndex()) {
- db.Table<Schema::KMeansTreeState>().Key(info.Id).Delete();
+ db.Table<Schema::KMeansTreeProgress>().Key(info.Id).Delete();
PersistBuildIndexSampleForget(db, info);
}
}
diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
index c58daae72cc..e124b34d69c 100644
--- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
@@ -61,11 +61,11 @@ static constexpr const char* Name(TIndexBuildInfo::EState state) noexcept {
}
// return count, parts, step
-static std::tuple<ui32, ui32, ui32> ComputeKMeansBoundaries(const NSchemeShard::TTableInfo& tableInfo, const TIndexBuildInfo& buildInfo) {
+static std::tuple<NTableIndex::TClusterId, NTableIndex::TClusterId, NTableIndex::TClusterId> ComputeKMeansBoundaries(const NSchemeShard::TTableInfo& tableInfo, const TIndexBuildInfo& buildInfo) {
const auto& kmeans = buildInfo.KMeans;
Y_ASSERT(kmeans.K != 0);
const auto count = TIndexBuildInfo::TKMeans::BinPow(kmeans.K, kmeans.Level);
- ui32 step = 1;
+ NTableIndex::TClusterId step = 1;
auto parts = count;
auto shards = tableInfo.GetShard2PartitionIdx().size();
if (!buildInfo.KMeans.NeedsAnotherLevel() || count <= 1 || shards <= 1) {
@@ -97,8 +97,8 @@ protected:
TActorId Uploader;
ui32 RetryCount = 0;
ui32 RowsBytes = 0;
- ui32 Parent = 0;
- ui32 Child = 0;
+ NTableIndex::TClusterId Parent = 0;
+ NTableIndex::TClusterId Child = 0;
NDataShard::TUploadStatus UploadStatus;
@@ -108,8 +108,8 @@ public:
const TActorId& responseActorId,
ui64 buildIndexId,
TIndexBuildInfo::TSample::TRows init,
- ui32 parent,
- ui32 child)
+ NTableIndex::TClusterId parent,
+ NTableIndex::TClusterId child)
: TargetTable(std::move(targetTable))
, ResponseActorId(responseActorId)
, BuildIndexId(buildIndexId)
@@ -159,7 +159,7 @@ public:
Types = std::make_shared<NTxProxy::TUploadTypes>(3);
Ydb::Type type;
- type.set_type_id(Ydb::Type::UINT32);
+ type.set_type_id(NTableIndex::ClusterIdType);
(*Types)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type};
(*Types)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type};
type.set_type_id(Ydb::Type::STRING);
@@ -766,7 +766,7 @@ private:
InitMultiKMeans(buildInfo);
return false;
}
- std::array<NScheme::TTypeInfo, 1> typeInfos{NScheme::NTypeIds::Uint32};
+ std::array<NScheme::TTypeInfo, 1> typeInfos{ClusterIdTypeId};
auto range = ParentRange(buildInfo.KMeans.Parent);
auto addRestricted = [&] (const auto& idx) {
const auto& status = buildInfo.Shards.at(idx);
@@ -858,10 +858,10 @@ private:
void PersistKMeansState(TTransactionContext& txc, TIndexBuildInfo& buildInfo) {
NIceDb::TNiceDb db{txc.DB};
- db.Table<Schema::KMeansTreeState>().Key(buildInfo.Id).Update(
- NIceDb::TUpdate<Schema::KMeansTreeState::Level>(buildInfo.KMeans.Level),
- NIceDb::TUpdate<Schema::KMeansTreeState::Parent>(buildInfo.KMeans.Parent),
- NIceDb::TUpdate<Schema::KMeansTreeState::State>(buildInfo.KMeans.State)
+ db.Table<Schema::KMeansTreeProgress>().Key(buildInfo.Id).Update(
+ NIceDb::TUpdate<Schema::KMeansTreeProgress::Level>(buildInfo.KMeans.Level),
+ NIceDb::TUpdate<Schema::KMeansTreeProgress::State>(buildInfo.KMeans.State),
+ NIceDb::TUpdate<Schema::KMeansTreeProgress::Parent>(buildInfo.KMeans.Parent)
);
}
@@ -1184,7 +1184,7 @@ public:
return TSerializedTableRange(TSerializedCellVec::Serialize(cells), "", true, false);
}
- static TSerializedTableRange ParentRange(ui32 parent) {
+ static TSerializedTableRange ParentRange(NTableIndex::TClusterId parent) {
if (parent == 0) {
return {}; // empty
}
diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h
index 32f68f1f74a..dd3668c56e1 100644
--- a/ydb/core/tx/schemeshard/schemeshard_info_types.h
+++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h
@@ -3082,12 +3082,12 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
};
ui32 Level = 1;
- ui32 Parent = 0;
- ui32 ParentEnd = 0; // included
-
EState State = Sample;
- ui32 ChildBegin = 1; // included
+ NTableIndex::TClusterId Parent = 0;
+ NTableIndex::TClusterId ParentEnd = 0; // included
+
+ NTableIndex::TClusterId ChildBegin = 1; // included
TString ToStr() const {
return TStringBuilder()
@@ -3097,8 +3097,8 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
<< ", State = " << State << " }";
}
- static ui32 BinPow(ui32 k, ui32 l) {
- ui32 r = 1;
+ static NTableIndex::TClusterId BinPow(NTableIndex::TClusterId k, ui32 l) {
+ NTableIndex::TClusterId r = 1;
while (l != 0) {
if (l % 2 != 0) {
r *= k;
@@ -3149,7 +3149,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
return true;
}
- void Set(ui32 level, ui32 parent, ui32 state) {
+ void Set(ui32 level, NTableIndex::TClusterId parent, ui32 state) {
// TODO(mbkkt) make it without cycles
while (Level < level) {
NextLevel();
@@ -3192,24 +3192,24 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
return name;
}
- std::pair<ui32, ui32> RangeToBorders(const TSerializedTableRange& range) const {
+ std::pair<NTableIndex::TClusterId, NTableIndex::TClusterId> RangeToBorders(const TSerializedTableRange& range) const {
Y_ASSERT(ParentEnd != 0);
- const ui32 maxParent = ParentEnd;
- const ui32 levelSize = TKMeans::BinPow(K, Level - 1);
+ const NTableIndex::TClusterId maxParent = ParentEnd;
+ const NTableIndex::TClusterId levelSize = TKMeans::BinPow(K, Level - 1);
Y_ASSERT(levelSize <= maxParent);
- const ui32 minParent = maxParent - levelSize + 1;
- const ui32 parentFrom = [&, from = range.From.GetCells()] {
+ const NTableIndex::TClusterId minParent = maxParent - levelSize + 1;
+ const NTableIndex::TClusterId parentFrom = [&, from = range.From.GetCells()] {
if (!from.empty()) {
if (!from[0].IsNull()) {
- return from[0].AsValue<ui32>() + static_cast<ui32>(from.size() == 1);
+ return from[0].AsValue<NTableIndex::TClusterId>() + static_cast<NTableIndex::TClusterId>(from.size() == 1);
}
}
return minParent;
}();
- const ui32 parentTo = [&, to = range.To.GetCells()] {
+ const NTableIndex::TClusterId parentTo = [&, to = range.To.GetCells()] {
if (!to.empty()) {
if (!to[0].IsNull()) {
- return to[0].AsValue<ui32>() - static_cast<ui32>(to.size() != 1 && to[1].IsNull());
+ return to[0].AsValue<NTableIndex::TClusterId>() - static_cast<NTableIndex::TClusterId>(to.size() != 1 && to[1].IsNull());
}
}
return maxParent;
@@ -3232,7 +3232,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
auto str = TStringBuilder{} << "{ count: " << cells.size();
if (Parent != 0) {
Y_ASSERT(Level != 0);
- str << ", parent: " << cells[0].AsValue<ui32>();
+ str << ", parent: " << cells[0].AsValue<NTableIndex::TClusterId>();
if (cells.size() != 1 && cells[1].IsNull()) {
str << ", pk: null";
}
@@ -3408,11 +3408,11 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
}
struct TClusterShards {
- ui32 From = std::numeric_limits<ui32>::max();
+ NTableIndex::TClusterId From = std::numeric_limits<NTableIndex::TClusterId>::max();
TShardIdx Local = InvalidShardIdx;
std::vector<TShardIdx> Global;
};
- TMap<ui32, TClusterShards> Cluster2Shards;
+ TMap<NTableIndex::TClusterId, TClusterShards> Cluster2Shards;
void AddParent(const TSerializedTableRange& range, TShardIdx shard);
diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h
index 05206d334ea..69d65b30f0a 100644
--- a/ydb/core/tx/schemeshard/schemeshard_schema.h
+++ b/ydb/core/tx/schemeshard/schemeshard_schema.h
@@ -2,6 +2,7 @@
#include "schemeshard_types.h"
+#include <ydb/core/base/table_index.h>
#include <ydb/core/scheme/scheme_pathid.h>
#include <ydb/core/protos/tx_datashard.pb.h>
#include <ydb/core/protos/tx.pb.h>
@@ -11,6 +12,8 @@
namespace NKikimr::NSchemeShard {
+inline constexpr auto ClusterIdTypeId = NScheme::NTypeIds::Uint64;
+
struct Schema : NIceDb::Schema {
struct Paths : Table<1> {
struct Id : Column<1, NScheme::NTypeIds::Uint64> { using Type = TLocalPathId; };
@@ -1899,18 +1902,20 @@ struct Schema : NIceDb::Schema {
using TColumns = TableColumns<OwnerPathId, LocalPathId, AlterVersion, Description>;
};
- struct KMeansTreeState : Table<112> {
+ // struct KMeansTreeState : Table<112> -- already was in trunk some time ago,
+ // it was replaced with KMeansTreeProgress, before anyone really used it
+ struct KMeansTreeProgress : Table<114> {
struct Id : Column<1, NScheme::NTypeIds::Uint64> { using Type = TIndexBuildId; };
struct Level : Column<2, NScheme::NTypeIds::Uint32> {};
- struct Parent : Column<3, NScheme::NTypeIds::Uint32> {};
- struct State : Column<4, NScheme::NTypeIds::Uint32> {};
+ struct State : Column<3, NScheme::NTypeIds::Uint32> {};
+ struct Parent : Column<4, ClusterIdTypeId> {};
using TKey = TableKey<Id>;
using TColumns = TableColumns<
Id,
Level,
- Parent,
- State
+ State,
+ Parent
>;
};
@@ -2040,7 +2045,7 @@ struct Schema : NIceDb::Schema {
BackgroundSessions,
ResourcePool,
BackupCollection,
- KMeansTreeState,
+ KMeansTreeProgress,
KMeansTreeSample
>;
diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp
index 9805e9e7090..3d3b99bc7af 100644
--- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp
@@ -268,8 +268,8 @@ auto CalcVectorKmeansTreePostingImplTableDescImpl(
{
auto parentColumn = implTableDesc.AddColumns();
parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn);
- parentColumn->SetType("Uint32");
- parentColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ parentColumn->SetType(NTableIndex::ClusterIdTypeName);
+ parentColumn->SetTypeId(NSchemeShard::ClusterIdTypeId);
parentColumn->SetNotNull(true);
}
implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::ParentColumn);
@@ -304,8 +304,8 @@ auto CalcVectorKmeansTreePrefixImplTableDescImpl(
{
auto idColumn = implTableDesc.AddColumns();
idColumn->SetName(NTableVectorKmeansTreeIndex::IdColumn);
- idColumn->SetType("Uint32");
- idColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ idColumn->SetType(NTableIndex::ClusterIdTypeName);
+ idColumn->SetTypeId(NSchemeShard::ClusterIdTypeId);
idColumn->SetNotNull(true);
}
implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::IdColumn);
@@ -346,15 +346,15 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreeLevelImplTableDesc(
{
auto parentColumn = implTableDesc.AddColumns();
parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn);
- parentColumn->SetType("Uint32");
- parentColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ parentColumn->SetType(NTableIndex::ClusterIdTypeName);
+ parentColumn->SetTypeId(NSchemeShard::ClusterIdTypeId);
parentColumn->SetNotNull(true);
}
{
auto idColumn = implTableDesc.AddColumns();
idColumn->SetName(NTableVectorKmeansTreeIndex::IdColumn);
- idColumn->SetType("Uint32");
- idColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ idColumn->SetType(NTableIndex::ClusterIdTypeName);
+ idColumn->SetTypeId(NSchemeShard::ClusterIdTypeId);
idColumn->SetNotNull(true);
}
{
diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp
index 25804068e46..42cfda9f4f4 100644
--- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp
+++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp
@@ -1403,7 +1403,7 @@ TCheckFunc SplitBoundaries(TVector<T>&& expectedBoundaries) {
};
}
-template TCheckFunc SplitBoundaries<ui32>(TVector<ui32>&&);
+template TCheckFunc SplitBoundaries<ui64>(TVector<ui64>&&);
TCheckFunc ServerlessComputeResourcesMode(NKikimrSubDomains::EServerlessComputeResourcesMode serverlessComputeResourcesMode) {
return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) {
diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp
index 73d0902c1b8..263621f48f0 100644
--- a/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp
+++ b/ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp
@@ -250,12 +250,12 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
partition_at_keys {
split_points {
- type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } }
- value { items { uint32_value: 12345 } }
+ type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } }
+ value { items { uint64_value: 12345 } }
}
split_points {
- type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } }
- value { items { uint32_value: 54321 } }
+ type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } }
+ value { items { uint64_value: 54321 } }
}
}
partitioning_settings {
@@ -310,14 +310,14 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
NLs::PartitionCount(3),
NLs::MinPartitionsCountEqual(3),
NLs::MaxPartitionsCountEqual(3),
- NLs::SplitBoundaries<ui32>({12345, 54321})
+ NLs::SplitBoundaries<ui64>({12345, 54321})
});
TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", PostingTable), true, true), {
NLs::IsTable,
NLs::PartitionCount(3),
NLs::MinPartitionsCountEqual(3),
NLs::MaxPartitionsCountEqual(3),
- NLs::SplitBoundaries<ui32>({12345, 54321})
+ NLs::SplitBoundaries<ui64>({12345, 54321})
});
for (size_t i = 0; i != 3; ++i) {
diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema
index 38b3b510e95..91c9f955e6d 100644
--- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema
+++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema
@@ -8036,10 +8036,11 @@
}
},
{
- "TableId": 112,
- "TableName": "KMeansTreeState",
+ "TableId": 113,
+ "TableName": "KMeansTreeSample",
"TableKey": [
- 1
+ 1,
+ 2
],
"ColumnsAdded": [
{
@@ -8049,18 +8050,18 @@
},
{
"ColumnId": 2,
- "ColumnName": "Level",
+ "ColumnName": "Row",
"ColumnType": "Uint32"
},
{
"ColumnId": 3,
- "ColumnName": "Parent",
- "ColumnType": "Uint32"
+ "ColumnName": "Probability",
+ "ColumnType": "Uint64"
},
{
"ColumnId": 4,
- "ColumnName": "State",
- "ColumnType": "Uint32"
+ "ColumnName": "Data",
+ "ColumnType": "String"
}
],
"ColumnsDropped": [],
@@ -8092,11 +8093,10 @@
}
},
{
- "TableId": 113,
- "TableName": "KMeansTreeSample",
+ "TableId": 114,
+ "TableName": "KMeansTreeProgress",
"TableKey": [
- 1,
- 2
+ 1
],
"ColumnsAdded": [
{
@@ -8106,18 +8106,18 @@
},
{
"ColumnId": 2,
- "ColumnName": "Row",
+ "ColumnName": "Level",
"ColumnType": "Uint32"
},
{
"ColumnId": 3,
- "ColumnName": "Probability",
- "ColumnType": "Uint64"
+ "ColumnName": "State",
+ "ColumnType": "Uint32"
},
{
"ColumnId": 4,
- "ColumnName": "Data",
- "ColumnType": "String"
+ "ColumnName": "Parent",
+ "ColumnType": "Uint64"
}
],
"ColumnsDropped": [],