summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ydb/core/kqp/ut/indexes/kqp_indexes_prefixed_vector_ut.cpp77
-rw-r--r--ydb/core/protos/tx_datashard.proto2
-rw-r--r--ydb/core/tx/datashard/build_index/kmeans_helper.cpp64
-rw-r--r--ydb/core/tx/datashard/build_index/kmeans_helper.h13
-rw-r--r--ydb/core/tx/datashard/build_index/local_kmeans.cpp8
-rw-r--r--ydb/core/tx/datashard/build_index/prefix_kmeans.cpp31
-rw-r--r--ydb/core/tx/datashard/build_index/reshuffle_kmeans.cpp8
-rw-r--r--ydb/core/tx/datashard/build_index/ut/ut_prefix_kmeans.cpp8
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp4
9 files changed, 141 insertions, 74 deletions
diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_prefixed_vector_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_prefixed_vector_ut.cpp
index 33279262c0a..7c11f618454 100644
--- a/ydb/core/kqp/ut/indexes/kqp_indexes_prefixed_vector_ut.cpp
+++ b/ydb/core/kqp/ut/indexes/kqp_indexes_prefixed_vector_ut.cpp
@@ -173,7 +173,7 @@ Y_UNIT_TEST_SUITE(KqpPrefixedVectorIndexes) {
DoPositiveQueriesPrefixedVectorIndexOrderBy(session, "CosineSimilarity", "DESC", covered);
}
- TSession DoCreateTableForPrefixedVectorIndex(TTableClient& db, bool nullable) {
+ TSession DoCreateTableForPrefixedVectorIndex(TTableClient& db, bool nullable, bool suffixPk = false) {
auto session = db.CreateSession().GetValueSync().GetSession();
{
@@ -191,14 +191,25 @@ Y_UNIT_TEST_SUITE(KqpPrefixedVectorIndexes) {
.AddNonNullableColumn("emb", EPrimitiveType::String)
.AddNonNullableColumn("data", EPrimitiveType::String);
}
- tableBuilder.SetPrimaryKeyColumns({"pk"});
+ if (suffixPk) {
+ tableBuilder.SetPrimaryKeyColumns({"pk", "user"});
+ } else {
+ tableBuilder.SetPrimaryKeyColumns({"pk"});
+ }
tableBuilder.BeginPartitioningSettings()
.SetMinPartitionsCount(3)
- .EndPartitioningSettings();
- auto partitions = TExplicitPartitions{}
- .AppendSplitPoints(TValueBuilder{}.BeginTuple().AddElement().OptionalInt64(40).EndTuple().Build())
- .AppendSplitPoints(TValueBuilder{}.BeginTuple().AddElement().OptionalInt64(60).EndTuple().Build());
- tableBuilder.SetPartitionAtKeys(partitions);
+ .EndPartitioningSettings();
+ if (suffixPk) {
+ auto partitions = TExplicitPartitions{}
+ .AppendSplitPoints(TValueBuilder{}.BeginTuple().AddElement().OptionalInt64(40).AddElement().OptionalString("").EndTuple().Build())
+ .AppendSplitPoints(TValueBuilder{}.BeginTuple().AddElement().OptionalInt64(60).AddElement().OptionalString("").EndTuple().Build());
+ tableBuilder.SetPartitionAtKeys(partitions);
+ } else {
+ auto partitions = TExplicitPartitions{}
+ .AppendSplitPoints(TValueBuilder{}.BeginTuple().AddElement().OptionalInt64(40).EndTuple().Build())
+ .AppendSplitPoints(TValueBuilder{}.BeginTuple().AddElement().OptionalInt64(60).EndTuple().Build());
+ tableBuilder.SetPartitionAtKeys(partitions);
+ }
auto result = session.CreateTable("/Root/TestTable", tableBuilder.Build()).ExtractValueSync();
UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false);
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString());
@@ -488,6 +499,58 @@ Y_UNIT_TEST_SUITE(KqpPrefixedVectorIndexes) {
DoPositiveQueriesPrefixedVectorIndexOrderByCosine(session, true /*covered*/);
}
+ Y_UNIT_TEST_QUAD(CosineDistanceWithPkPrefix, Nullable, Covered) {
+ NKikimrConfig::TFeatureFlags featureFlags;
+ featureFlags.SetEnableVectorIndex(true);
+ auto setting = NKikimrKqp::TKqpSetting();
+ auto serverSettings = TKikimrSettings()
+ .SetFeatureFlags(featureFlags)
+ .SetKqpSettings({setting});
+
+ TKikimrRunner kikimr(serverSettings);
+ kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::BUILD_INDEX, NActors::NLog::PRI_TRACE);
+ kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_TRACE);
+
+ auto db = kikimr.GetTableClient();
+
+ auto session = DoCreateTableForPrefixedVectorIndex(db, Nullable, true);
+ {
+ const TString createIndex(Q_(Sprintf(R"(
+ ALTER TABLE `/Root/TestTable`
+ ADD INDEX index
+ GLOBAL USING vector_kmeans_tree
+ ON (user, emb) %s
+ WITH (distance=cosine, vector_type="uint8", vector_dimension=2, levels=2, clusters=2);
+ )", (Covered ? "COVER (emb, data)" : ""))));
+
+ auto result = session.ExecuteSchemeQuery(createIndex)
+ .ExtractValueSync();
+
+ UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
+ }
+ {
+ auto result = session.DescribeTable("/Root/TestTable").ExtractValueSync();
+ UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS);
+ const auto& indexes = result.GetTableDescription().GetIndexDescriptions();
+ UNIT_ASSERT_EQUAL(indexes.size(), 1);
+ UNIT_ASSERT_EQUAL(indexes[0].GetIndexName(), "index");
+ std::vector<std::string> indexKeyColumns{"user", "emb"};
+ UNIT_ASSERT_EQUAL(indexes[0].GetIndexColumns(), indexKeyColumns);
+ std::vector<std::string> indexDataColumns;
+ if (Covered) {
+ indexDataColumns = {"emb", "data"};
+ }
+ UNIT_ASSERT_EQUAL(indexes[0].GetDataColumns(), indexDataColumns);
+ const auto& settings = std::get<TKMeansTreeSettings>(indexes[0].GetIndexSettings());
+ UNIT_ASSERT_EQUAL(settings.Settings.Metric, NYdb::NTable::TVectorIndexSettings::EMetric::CosineDistance);
+ UNIT_ASSERT_EQUAL(settings.Settings.VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Uint8);
+ UNIT_ASSERT_EQUAL(settings.Settings.VectorDimension, 2);
+ UNIT_ASSERT_EQUAL(settings.Levels, 2);
+ UNIT_ASSERT_EQUAL(settings.Clusters, 2);
+ }
+ DoPositiveQueriesPrefixedVectorIndexOrderByCosine(session, Covered);
+ }
+
}
}
diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto
index 58480a322d5..ef8d32617f8 100644
--- a/ydb/core/protos/tx_datashard.proto
+++ b/ydb/core/protos/tx_datashard.proto
@@ -1668,6 +1668,8 @@ message TEvPrefixKMeansRequest {
optional uint32 PrefixColumns = 17;
optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 18;
+
+ repeated string SourcePrimaryKeyColumns = 19;
}
message TEvPrefixKMeansResponse {
diff --git a/ydb/core/tx/datashard/build_index/kmeans_helper.cpp b/ydb/core/tx/datashard/build_index/kmeans_helper.cpp
index 383fc938d57..6fa2c4d84c6 100644
--- a/ydb/core/tx/datashard/build_index/kmeans_helper.cpp
+++ b/ydb/core/tx/datashard/build_index/kmeans_helper.cpp
@@ -45,51 +45,22 @@ void AddRowToLevel(TBufferData& buffer, TClusterId parent, TClusterId child, con
buffer.AddRow(TSerializedCellVec{pk}, TSerializedCellVec::Serialize(data));
}
-void AddRowMainToBuild(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row) {
- EnsureNoPostingParentFlag(parent);
-
- std::array<TCell, 1> cells;
- cells[0] = TCell::Make(parent);
- auto pk = TSerializedCellVec::Serialize(cells);
- TSerializedCellVec::UnsafeAppendCells(key, pk);
- buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row),
- TSerializedCellVec{key});
-}
-
-void AddRowMainToPosting(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos)
-{
- parent = SetPostingParentFlag(parent);
-
- std::array<TCell, 1> cells;
- cells[0] = TCell::Make(parent);
- auto pk = TSerializedCellVec::Serialize(cells);
- TSerializedCellVec::UnsafeAppendCells(key, pk);
- buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row.Slice(dataPos)),
- TSerializedCellVec{key});
-}
-
-void AddRowBuildToBuild(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 prefixColumns)
-{
- EnsureNoPostingParentFlag(parent);
+void AddRowToData(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> sourcePk,
+ TArrayRef<const TCell> dataColumns, TArrayRef<const TCell> origKey, bool isPostingLevel) {
+ if (isPostingLevel) {
+ parent = SetPostingParentFlag(parent);
+ } else {
+ EnsureNoPostingParentFlag(parent);
+ }
std::array<TCell, 1> cells;
cells[0] = TCell::Make(parent);
auto pk = TSerializedCellVec::Serialize(cells);
- TSerializedCellVec::UnsafeAppendCells(key.Slice(prefixColumns), pk);
- buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row),
- TSerializedCellVec{key});
-}
-
-void AddRowBuildToPosting(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos, ui32 prefixColumns)
-{
- parent = SetPostingParentFlag(parent);
+ TSerializedCellVec::UnsafeAppendCells(sourcePk, pk);
- std::array<TCell, 1> cells;
- cells[0] = TCell::Make(parent);
- auto pk = TSerializedCellVec::Serialize(cells);
- TSerializedCellVec::UnsafeAppendCells(key.Slice(prefixColumns), pk);
- buffer.AddRow(TSerializedCellVec{std::move(pk)}, TSerializedCellVec::Serialize(row.Slice(dataPos)),
- TSerializedCellVec{key});
+ buffer.AddRow(TSerializedCellVec{std::move(pk)},
+ TSerializedCellVec::Serialize(dataColumns),
+ TSerializedCellVec{origKey});
}
TTags MakeScanTags(const TUserTable& table, const TProtoStringType& embedding,
@@ -114,12 +85,11 @@ TTags MakeScanTags(const TUserTable& table, const TProtoStringType& embedding,
std::shared_ptr<NTxProxy::TUploadTypes> MakeOutputTypes(const TUserTable& table, NKikimrTxDataShard::EKMeansState uploadState,
const TProtoStringType& embedding, const google::protobuf::RepeatedPtrField<TProtoStringType>& data,
- ui32 prefixColumns)
+ const google::protobuf::RepeatedPtrField<TProtoStringType>& pkColumns)
{
auto types = GetAllTypes(table);
auto result = std::make_shared<NTxProxy::TUploadTypes>();
- result->reserve(1 + 1 + std::min((table.KeyColumnTypes.size() - prefixColumns) + data.size(), types.size()));
Ydb::Type type;
type.set_type_id(NTableIndex::ClusterIdType);
@@ -133,8 +103,14 @@ std::shared_ptr<NTxProxy::TUploadTypes> MakeOutputTypes(const TUserTable& table,
types.erase(it);
}
};
- for (const auto& column : table.KeyColumnIds | std::views::drop(prefixColumns)) {
- addType(table.Columns.at(column).Name);
+ if (pkColumns.size()) {
+ for (const auto& column : pkColumns) {
+ addType(column);
+ }
+ } else {
+ for (const auto& column : table.KeyColumnIds) {
+ addType(table.Columns.at(column).Name);
+ }
}
switch (uploadState) {
case NKikimrTxDataShard::EKMeansState::UPLOAD_MAIN_TO_BUILD:
diff --git a/ydb/core/tx/datashard/build_index/kmeans_helper.h b/ydb/core/tx/datashard/build_index/kmeans_helper.h
index 3630050a7b5..6d2c4926a0f 100644
--- a/ydb/core/tx/datashard/build_index/kmeans_helper.h
+++ b/ydb/core/tx/datashard/build_index/kmeans_helper.h
@@ -140,21 +140,16 @@ struct TMaxInnerProductSimilarity : TMetric<TCoord> {
void AddRowToLevel(TBufferData& buffer, TClusterId parent, TClusterId child, const TString& embedding, bool isPostingLevel);
-void AddRowMainToBuild(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row);
+void AddRowToData(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> sourcePk,
+ TArrayRef<const TCell> dataColumns, TArrayRef<const TCell> origKey, bool isPostingLevel);
-void AddRowMainToPosting(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos);
-
-void AddRowBuildToBuild(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 prefixColumns = 1);
-
-void AddRowBuildToPosting(TBufferData& buffer, TClusterId parent, TArrayRef<const TCell> key, TArrayRef<const TCell> row, ui32 dataPos, ui32 prefixColumns = 1);
-
-TTags MakeScanTags(const TUserTable& table, const TProtoStringType& embedding,
+TTags MakeScanTags(const TUserTable& table, const TProtoStringType& embedding,
const google::protobuf::RepeatedPtrField<TProtoStringType>& data, ui32& embeddingPos,
ui32& dataPos, NTable::TTag& embeddingTag);
std::shared_ptr<NTxProxy::TUploadTypes> MakeOutputTypes(const TUserTable& table, NKikimrTxDataShard::EKMeansState uploadState,
const TProtoStringType& embedding, const google::protobuf::RepeatedPtrField<TProtoStringType>& data,
- ui32 prefixColumns = 0);
+ const google::protobuf::RepeatedPtrField<TProtoStringType>& pkColumns = {});
void MakeScan(auto& record, const auto& createScan, const auto& badRequest)
{
diff --git a/ydb/core/tx/datashard/build_index/local_kmeans.cpp b/ydb/core/tx/datashard/build_index/local_kmeans.cpp
index 6e79fec57d8..210419bce7d 100644
--- a/ydb/core/tx/datashard/build_index/local_kmeans.cpp
+++ b/ydb/core/tx/datashard/build_index/local_kmeans.cpp
@@ -462,28 +462,28 @@ private:
void FeedMainToBuild(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowMainToBuild(*OutputBuf, Child + *pos, key, row);
+ AddRowToData(*OutputBuf, Child + *pos, key, row, key, false);
}
}
void FeedMainToPosting(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowMainToPosting(*OutputBuf, Child + *pos, key, row, DataPos);
+ AddRowToData(*OutputBuf, Child + *pos, key, row.Slice(DataPos), key, true);
}
}
void FeedBuildToBuild(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowBuildToBuild(*OutputBuf, Child + *pos, key, row);
+ AddRowToData(*OutputBuf, Child + *pos, key.Slice(1), row, key, false);
}
}
void FeedBuildToPosting(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowBuildToPosting(*OutputBuf, Child + *pos, key, row, DataPos);
+ AddRowToData(*OutputBuf, Child + *pos, key.Slice(1), row.Slice(DataPos), key, true);
}
}
diff --git a/ydb/core/tx/datashard/build_index/prefix_kmeans.cpp b/ydb/core/tx/datashard/build_index/prefix_kmeans.cpp
index 469bdf2b49c..73e05b457ba 100644
--- a/ydb/core/tx/datashard/build_index/prefix_kmeans.cpp
+++ b/ydb/core/tx/datashard/build_index/prefix_kmeans.cpp
@@ -29,7 +29,7 @@ using namespace NKMeans;
*
* Request:
* - The client sends TEvPrefixKMeansRequest with:
-* - Child: base ID from which new cluster IDs are assigned within this request.
+ * - Child: base ID from which new cluster IDs are assigned within this request.
* - Each prefix group processed will be assigned cluster IDs starting at Child + 1.
* - For a request with K clusters per prefix, the IDs used for the first prefix group are
* (Child + 1) to (Child + K), and the parent ID for these is Child.
@@ -93,6 +93,9 @@ protected:
// FIXME: save PrefixRows as std::vector<std::pair<TSerializedCellVec, TSerializedCellVec>> to avoid parsing
const ui32 PrefixColumns;
+ // for PrefixKMeans, original table's primary key columns are passed separately,
+ // because the prefix table contains them in a different order if they are both in PK and in the prefix
+ const ui32 DataColumnCount;
TSerializedCellVec Prefix;
TBufferData PrefixRows;
bool IsFirstPrefixFeed = true;
@@ -126,10 +129,14 @@ public:
, ResponseActorId{responseActorId}
, Response{std::move(response)}
, PrefixColumns{request.GetPrefixColumns()}
+ , DataColumnCount{(ui32)request.GetDataColumns().size()}
{
const auto& embedding = request.GetEmbeddingColumn();
- const auto& data = request.GetDataColumns();
- ScanTags = MakeScanTags(table, embedding, data, EmbeddingPos, DataPos, EmbeddingTag);
+ TVector<TString> data{request.GetDataColumns().begin(), request.GetDataColumns().end()};
+ for (auto & col: request.GetSourcePrimaryKeyColumns()) {
+ data.push_back(col);
+ }
+ ScanTags = MakeScanTags(table, embedding, {data.begin(), data.end()}, EmbeddingPos, DataPos, EmbeddingTag);
Lead.To(ScanTags, {}, NTable::ESeek::Lower);
{
Ydb::Type type;
@@ -141,7 +148,11 @@ public:
(*levelTypes)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn, type};
LevelBuf = Uploader.AddDestination(request.GetLevelName(), std::move(levelTypes));
}
- OutputBuf = Uploader.AddDestination(request.GetOutputName(), MakeOutputTypes(table, UploadState, embedding, data, PrefixColumns));
+ {
+ auto outputTypes = MakeOutputTypes(table, UploadState, embedding,
+ {data.begin(), data.begin()+request.GetDataColumns().size()}, request.GetSourcePrimaryKeyColumns());
+ OutputBuf = Uploader.AddDestination(request.GetOutputName(), outputTypes);
+ }
{
auto types = GetAllTypes(table);
@@ -480,14 +491,14 @@ private:
void FeedBuildToBuild(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowBuildToBuild(*OutputBuf, Child + *pos, key, row, PrefixColumns);
+ AddRowToData(*OutputBuf, Child + *pos, row.Slice(DataPos+DataColumnCount), row.Slice(0, DataPos+DataColumnCount), key, false);
}
}
void FeedBuildToPosting(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowBuildToPosting(*OutputBuf, Child + *pos, key, row, DataPos, PrefixColumns);
+ AddRowToData(*OutputBuf, Child + *pos, row.Slice(DataPos+DataColumnCount), row.Slice(DataPos, DataColumnCount), key, true);
}
}
@@ -624,6 +635,14 @@ void TDataShard::HandleSafe(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, cons
if (request.GetPrefixColumns() > userTable.KeyColumnIds.size()) {
badRequest(TStringBuilder() << "Should not be requested on more than " << userTable.KeyColumnIds.size() << " prefix columns");
}
+ if (request.GetSourcePrimaryKeyColumns().size() == 0) {
+ badRequest("Request should include source primary key columns");
+ }
+ for (auto pkColumn : request.GetSourcePrimaryKeyColumns()) {
+ if (!tags.contains(pkColumn)) {
+ badRequest(TStringBuilder() << "Unknown source primary key column: " << pkColumn);
+ }
+ }
if (trySendBadRequest()) {
return;
diff --git a/ydb/core/tx/datashard/build_index/reshuffle_kmeans.cpp b/ydb/core/tx/datashard/build_index/reshuffle_kmeans.cpp
index cdf1e59bee4..7328a4e0279 100644
--- a/ydb/core/tx/datashard/build_index/reshuffle_kmeans.cpp
+++ b/ydb/core/tx/datashard/build_index/reshuffle_kmeans.cpp
@@ -308,28 +308,28 @@ private:
void FeedMainToBuild(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowMainToBuild(*OutputBuf, Child + *pos, key, row);
+ AddRowToData(*OutputBuf, Child + *pos, key, row, key, false);
}
}
void FeedMainToPosting(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowMainToPosting(*OutputBuf, Child + *pos, key, row, DataPos);
+ AddRowToData(*OutputBuf, Child + *pos, key, row.Slice(DataPos), key, true);
}
}
void FeedBuildToBuild(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowBuildToBuild(*OutputBuf, Child + *pos, key, row);
+ AddRowToData(*OutputBuf, Child + *pos, key.Slice(1), row, key, false);
}
}
void FeedBuildToPosting(TArrayRef<const TCell> key, TArrayRef<const TCell> row)
{
if (auto pos = Clusters.FindCluster(row, EmbeddingPos); pos) {
- AddRowBuildToPosting(*OutputBuf, Child + *pos, key, row, DataPos);
+ AddRowToData(*OutputBuf, Child + *pos, key.Slice(1), row.Slice(DataPos), key, true);
}
}
};
diff --git a/ydb/core/tx/datashard/build_index/ut/ut_prefix_kmeans.cpp b/ydb/core/tx/datashard/build_index/ut/ut_prefix_kmeans.cpp
index 1f31222c61a..e05f4d235b5 100644
--- a/ydb/core/tx/datashard/build_index/ut/ut_prefix_kmeans.cpp
+++ b/ydb/core/tx/datashard/build_index/ut/ut_prefix_kmeans.cpp
@@ -65,6 +65,9 @@ Y_UNIT_TEST_SUITE (TTxDataShardPrefixKMeansScan) {
rec.SetEmbeddingColumn("embedding");
rec.SetPrefixColumns(1);
+ rec.AddSourcePrimaryKeyColumns("key");
+
+ rec.SetPrefixName(kPrefixTable);
rec.SetLevelName(kLevelTable);
rec.SetOutputName(kPostingTable);
@@ -130,6 +133,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardPrefixKMeansScan) {
rec.SetEmbeddingColumn("embedding");
rec.AddDataColumns("data");
rec.SetPrefixColumns(1);
+ rec.AddSourcePrimaryKeyColumns("key");
rec.SetPrefixName(kPrefixTable);
rec.SetLevelName(kLevelTable);
@@ -252,6 +256,10 @@ Y_UNIT_TEST_SUITE (TTxDataShardPrefixKMeansScan) {
}, "{ <main>: Error: Unknown table id: 0 }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvPrefixKMeansRequest& request) {
+ request.ClearSourcePrimaryKeyColumns();
+ }, "{ <main>: Error: Request should include source primary key columns }");
+
+ DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvPrefixKMeansRequest& request) {
request.MutableSettings()->set_vector_type(VectorIndexSettings::VECTOR_TYPE_UNSPECIFIED);
}, "{ <main>: Error: Wrong vector type }");
DoBadRequest(server, sender, [](NKikimrTxDataShard::TEvPrefixKMeansRequest& request) {
diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
index 0a4b7b82574..539b6016a75 100644
--- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
@@ -692,6 +692,10 @@ private:
*ev->Record.MutableDataColumns() = {
buildInfo.DataColumns.begin(), buildInfo.DataColumns.end()
};
+ const auto& tableInfo = *Self->Tables.at(buildInfo.TablePathId);
+ for (ui32 keyPos: tableInfo.KeyColumnIds) {
+ ev->Record.AddSourcePrimaryKeyColumns(tableInfo.Columns.at(keyPos).Name);
+ }
auto shardId = CommonFillRecord<false>(ev->Record, shardIdx, buildInfo);
ev->Record.SetSeed(ui64(shardId));