aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValery Mironov <mbkkt@ydb.tech>2024-12-18 22:59:41 +0300
committerGitHub <noreply@github.com>2024-12-18 22:59:41 +0300
commitdb42db4322443721201debbfd11ef19e48e2d4f5 (patch)
tree763868396fef7be09af926d983548cb587b0f949
parentf3c9a60838fb5b413885d9894985861533874f25 (diff)
downloadydb-db42db4322443721201debbfd11ef19e48e2d4f5.tar.gz
Some fixes in vector index schema (#12727)
* Rename vector index impl table column constants to make them more convinient and less confusing (e.g. single constant for ParentColumn, important for future kqp rewrite rules) * Rename `__ydb_embedding` column to `__ydb_centroid` column, because it makes more sense * Add missed not null property for vector index impl table columns
-rw-r--r--ydb/core/base/table_index.cpp12
-rw-r--r--ydb/core/base/table_vector_index.h13
-rw-r--r--ydb/core/base/ut/table_index_ut.cpp22
-rw-r--r--ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp50
-rw-r--r--ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp20
-rw-r--r--ydb/core/tx/datashard/kmeans_helper.cpp2
-rw-r--r--ydb/core/tx/datashard/local_kmeans.cpp6
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp4
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp8
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_info_types.h4
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_utils.cpp18
-rw-r--r--ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp18
12 files changed, 91 insertions, 86 deletions
diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp
index c1d827ea519..fb2e136c480 100644
--- a/ydb/core/base/table_index.cpp
+++ b/ydb/core/base/table_index.cpp
@@ -115,17 +115,17 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn
return false;
}
- if (Contains(table.Keys, NTableVectorKmeansTreeIndex::PostingTable_ParentColumn)) {
- explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn;
+ if (Contains(table.Keys, NTableVectorKmeansTreeIndex::ParentColumn)) {
+ explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
return false;
}
- if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentColumn)) {
+ if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::ParentColumn)) {
// This isn't really needed, but it will be really strange to have column with such name but different meaning
- explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn;
+ explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
return false;
}
- if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentColumn)) {
- explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn;
+ if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::ParentColumn)) {
+ explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn;
return false;
}
}
diff --git a/ydb/core/base/table_vector_index.h b/ydb/core/base/table_vector_index.h
index 4878ed11bf0..e4f29bf0248 100644
--- a/ydb/core/base/table_vector_index.h
+++ b/ydb/core/base/table_vector_index.h
@@ -4,17 +4,18 @@ namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex {
// Vector KmeansTree index tables description
+// Level and Posting tables
+inline constexpr const char* ParentColumn = "__ydb_parent";
+
// Level table
inline constexpr const char* LevelTable = "indexImplLevelTable";
-inline constexpr const char* LevelTable_ParentColumn = "__ydb_parent";
-inline constexpr const char* LevelTable_IdColumn = "__ydb_id";
-inline constexpr const char* LevelTable_EmbeddingColumn = "__ydb_embedding";
+inline constexpr const char* IdColumn = "__ydb_id";
+inline constexpr const char* CentroidColumn = "__ydb_centroid";
// Posting table
inline constexpr const char* PostingTable = "indexImplPostingTable";
-inline constexpr const char* PostingTable_ParentColumn = LevelTable_ParentColumn;
-inline constexpr const char* BuildPostingTableSuffix0 = "0build";
-inline constexpr const char* BuildPostingTableSuffix1 = "1build";
+inline constexpr const char* BuildSuffix0 = "0build";
+inline constexpr const char* BuildSuffix1 = "1build";
}
diff --git a/ydb/core/base/ut/table_index_ut.cpp b/ydb/core/base/ut/table_index_ut.cpp
index ccac3e26a84..932c43007c9 100644
--- a/ydb/core/base/ut/table_index_ut.cpp
+++ b/ydb/core/base/ut/table_index_ut.cpp
@@ -27,16 +27,16 @@ Y_UNIT_TEST_SUITE (TableIndex) {
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
{
- const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {"PK"}};
+ const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {"PK"}};
- UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {}}, explain));
+ UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::ParentColumn}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
- UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}}, explain));
+ UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::ParentColumn}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
}
{
- const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}};
+ const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {NTableVectorKmeansTreeIndex::ParentColumn}};
UNIT_ASSERT(IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
UNIT_ASSERT_STRINGS_EQUAL(explain, "");
@@ -118,19 +118,19 @@ Y_UNIT_TEST_SUITE (TableIndex) {
UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example PK2");
{
- const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {"PK"}};
+ const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {"PK"}};
- UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {}}, explain));
- UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn);
+ UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::ParentColumn}, {}}, explain));
+ UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
- UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}}, explain));
- UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn);
+ UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::ParentColumn}}, explain));
+ UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
}
{
- const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}};
+ const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {NTableVectorKmeansTreeIndex::ParentColumn}};
UNIT_ASSERT(!IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain));
- UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn);
+ UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn);
}
}
}
diff --git a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp
index 3db8d4c1bed..ee7a32ab266 100644
--- a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp
+++ b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp
@@ -183,9 +183,9 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {LevelTable_ParentColumn, "Uint32", true, true},
- {LevelTable_IdColumn, "Uint32", true, true},
- {LevelTable_EmbeddingColumn, "String", false, true},
+ {ParentColumn, "Uint32", true, true},
+ {IdColumn, "Uint32", true, true},
+ {CentroidColumn, "String", false, true},
});
CreateShardedTable(server, sender, "/Root", "table-level", options);
}
@@ -194,7 +194,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {PostingTable_ParentColumn, "Uint32", true, true},
+ {ParentColumn, "Uint32", true, true},
{"key", "Uint32", true, true},
{"data", "String", false, false},
});
@@ -206,7 +206,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {PostingTable_ParentColumn, "Uint32", true, true},
+ {ParentColumn, "Uint32", true, true},
{"key", "Uint32", true, true},
{"embedding", "String", false, false},
{"data", "String", false, false},
@@ -350,8 +350,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_POSTING,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = mm\3\n"
- "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = 11\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = mm\3\n"
+ "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = 11\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 4, data = four\n"
"__ydb_parent = 1, key = 5, data = five\n"
"__ydb_parent = 2, key = 1, data = one\n"
@@ -365,8 +365,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_POSTING,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = 11\3\n"
- "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = mm\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = 11\3\n"
+ "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = mm\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, data = one\n"
"__ydb_parent = 1, key = 2, data = two\n"
"__ydb_parent = 1, key = 3, data = three\n"
@@ -381,7 +381,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_POSTING,
VectorIndexSettings::VECTOR_TYPE_UINT8, similarity);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = II\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = II\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, data = one\n"
"__ydb_parent = 1, key = 2, data = two\n"
"__ydb_parent = 1, key = 3, data = three\n"
@@ -440,8 +440,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_BUILD,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = mm\3\n"
- "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = 11\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = mm\3\n"
+ "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = 11\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 4, embedding = \x65\x65\3, data = four\n"
"__ydb_parent = 1, key = 5, embedding = \x75\x75\3, data = five\n"
"__ydb_parent = 2, key = 1, embedding = \x30\x30\3, data = one\n"
@@ -455,8 +455,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_BUILD,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = 11\3\n"
- "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = mm\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = 11\3\n"
+ "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = mm\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, embedding = \x30\x30\3, data = one\n"
"__ydb_parent = 1, key = 2, embedding = \x31\x31\3, data = two\n"
"__ydb_parent = 1, key = 3, embedding = \x32\x32\3, data = three\n"
@@ -471,7 +471,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_BUILD,
VectorIndexSettings::VECTOR_TYPE_UINT8, similarity);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = II\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = II\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, embedding = \x30\x30\3, data = one\n"
"__ydb_parent = 1, key = 2, embedding = \x31\x31\3, data = two\n"
"__ydb_parent = 1, key = 3, embedding = \x32\x32\3, data = three\n"
@@ -532,8 +532,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_POSTING,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = mm\3\n"
- "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = 11\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = mm\3\n"
+ "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = 11\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 4, data = four\n"
"__ydb_parent = 41, key = 5, data = five\n"
"__ydb_parent = 42, key = 1, data = one\n"
@@ -547,8 +547,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_POSTING,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = 11\3\n"
- "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = mm\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = 11\3\n"
+ "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = mm\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, data = one\n"
"__ydb_parent = 41, key = 2, data = two\n"
"__ydb_parent = 41, key = 3, data = three\n"
@@ -563,7 +563,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_POSTING,
VectorIndexSettings::VECTOR_TYPE_UINT8, similarity);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = II\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = II\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, data = one\n"
"__ydb_parent = 41, key = 2, data = two\n"
"__ydb_parent = 41, key = 3, data = three\n"
@@ -624,8 +624,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_BUILD,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = mm\3\n"
- "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = 11\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = mm\3\n"
+ "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = 11\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 4, embedding = \x65\x65\3, data = four\n"
"__ydb_parent = 41, key = 5, embedding = \x75\x75\3, data = five\n"
"__ydb_parent = 42, key = 1, embedding = \x30\x30\3, data = one\n"
@@ -639,8 +639,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_BUILD,
VectorIndexSettings::VECTOR_TYPE_UINT8, distance);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = 11\3\n"
- "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = mm\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = 11\3\n"
+ "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = mm\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, embedding = \x30\x30\3, data = one\n"
"__ydb_parent = 41, key = 2, embedding = \x31\x31\3, data = two\n"
"__ydb_parent = 41, key = 3, embedding = \x32\x32\3, data = three\n"
@@ -655,7 +655,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) {
auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k,
NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_BUILD,
VectorIndexSettings::VECTOR_TYPE_UINT8, similarity);
- UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = II\3\n");
+ UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = II\3\n");
UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, embedding = \x30\x30\3, data = one\n"
"__ydb_parent = 41, key = 2, embedding = \x31\x31\3, data = two\n"
"__ydb_parent = 41, key = 3, embedding = \x32\x32\3, data = three\n"
diff --git a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp
index 6267ebdeeba..f358b393d9f 100644
--- a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp
+++ b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp
@@ -171,7 +171,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {PostingTable_ParentColumn, "Uint32", true, true},
+ {ParentColumn, "Uint32", true, true},
{"key", "Uint32", true, true},
{"data", "String", false, false},
});
@@ -183,7 +183,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
{
options.AllowSystemColumnNames(true);
options.Columns({
- {PostingTable_ParentColumn, "Uint32", true, true},
+ {ParentColumn, "Uint32", true, true},
{"key", "Uint32", true, true},
{"embedding", "String", false, false},
{"data", "String", false, false},
@@ -292,8 +292,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
// Upsert some initial values
ExecSQL(server, sender,
R"(
- UPSERT INTO `/Root/table-main`
- (key, embedding, data)
+ UPSERT INTO `/Root/table-main`
+ (key, embedding, data)
VALUES )"
"(1, \"\x30\x30\3\", \"one\"),"
"(2, \"\x31\x31\3\", \"two\"),"
@@ -377,8 +377,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
// Upsert some initial values
ExecSQL(server, sender,
R"(
- UPSERT INTO `/Root/table-main`
- (key, embedding, data)
+ UPSERT INTO `/Root/table-main`
+ (key, embedding, data)
VALUES )"
"(1, \"\x30\x30\3\", \"one\"),"
"(2, \"\x31\x31\3\", \"two\"),"
@@ -462,8 +462,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
// Upsert some initial values
ExecSQL(server, sender,
R"(
- UPSERT INTO `/Root/table-main`
- (__ydb_parent, key, embedding, data)
+ UPSERT INTO `/Root/table-main`
+ (__ydb_parent, key, embedding, data)
VALUES )"
"(39, 1, \"\x30\x30\3\", \"one\"),"
"(40, 1, \"\x30\x30\3\", \"one\"),"
@@ -549,8 +549,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) {
// Upsert some initial values
ExecSQL(server, sender,
R"(
- UPSERT INTO `/Root/table-main`
- (__ydb_parent, key, embedding, data)
+ UPSERT INTO `/Root/table-main`
+ (__ydb_parent, key, embedding, data)
VALUES )"
"(39, 1, \"\x30\x30\3\", \"one\"),"
"(40, 1, \"\x30\x30\3\", \"one\"),"
diff --git a/ydb/core/tx/datashard/kmeans_helper.cpp b/ydb/core/tx/datashard/kmeans_helper.cpp
index e755d09c5ce..842b583b524 100644
--- a/ydb/core/tx/datashard/kmeans_helper.cpp
+++ b/ydb/core/tx/datashard/kmeans_helper.cpp
@@ -97,7 +97,7 @@ MakeUploadTypes(const TUserTable& table, NKikimrTxDataShard::TEvLocalKMeansReque
Ydb::Type type;
type.set_type_id(Ydb::Type::UINT32);
- uploadTypes->emplace_back(NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn, type);
+ uploadTypes->emplace_back(NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type);
auto addType = [&](const auto& column) {
auto it = types.find(column);
diff --git a/ydb/core/tx/datashard/local_kmeans.cpp b/ydb/core/tx/datashard/local_kmeans.cpp
index d038319c69a..2962f2d050f 100644
--- a/ydb/core/tx/datashard/local_kmeans.cpp
+++ b/ydb/core/tx/datashard/local_kmeans.cpp
@@ -145,10 +145,10 @@ public:
if (Ydb::Type type; State <= EState::KMEANS) {
TargetTypes = std::make_shared<NTxProxy::TUploadTypes>(3);
type.set_type_id(Ydb::Type::UINT32);
- (*TargetTypes)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_ParentColumn, type};
- (*TargetTypes)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_IdColumn, type};
+ (*TargetTypes)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type};
+ (*TargetTypes)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type};
type.set_type_id(Ydb::Type::STRING);
- (*TargetTypes)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn, type};
+ (*TargetTypes)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn, type};
}
NextTypes = MakeUploadTypes(table, UploadState, embedding, data);
}
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp
index 6f21fab6423..d9d80078be1 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp
@@ -133,8 +133,8 @@ TVector<ISubOperation::TPtr> CreateBuildIndex(TOperationId opId, const TTxTransa
result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc)));
// TODO Maybe better to use partition from main table
// This tables are temporary and handled differently in apply_build_index
- result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildPostingTableSuffix0)));
- result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildPostingTableSuffix1)));
+ result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildSuffix0)));
+ result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildSuffix1)));
} else {
NKikimrSchemeOp::TTableDescription indexTableDesc;
// TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS
diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
index b56852d17d2..b8958e4468c 100644
--- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
@@ -161,10 +161,10 @@ public:
Types = std::make_shared<NTxProxy::TUploadTypes>(3);
Ydb::Type type;
type.set_type_id(Ydb::Type::UINT32);
- (*Types)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_ParentColumn, type};
- (*Types)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_IdColumn, type};
+ (*Types)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type};
+ (*Types)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type};
type.set_type_id(Ydb::Type::STRING);
- (*Types)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn, type};
+ (*Types)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn, type};
Become(&TThis::StateWork);
@@ -337,7 +337,7 @@ THolder<TEvSchemeShard::TEvModifySchemeTransaction> CreateBuildPropose(
modifyScheme.SetWorkingDir(path.Dive(buildInfo.IndexName).PathString());
modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpInitiateBuildIndexImplTable);
auto& op = *modifyScheme.MutableCreateTable();
- const char* suffix = buildInfo.KMeans.Level % 2 != 0 ? BuildPostingTableSuffix0 : BuildPostingTableSuffix1;
+ const char* suffix = buildInfo.KMeans.Level % 2 != 0 ? BuildSuffix0 : BuildSuffix1;
op = CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, {}, suffix);
const auto [count, parts, step] = ComputeKMeansBoundaries(*tableInfo, buildInfo);
diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h
index 63a7e9d1a1d..a6623c63e54 100644
--- a/ydb/core/tx/schemeshard/schemeshard_info_types.h
+++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h
@@ -3126,7 +3126,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
using namespace NTableIndex::NTableVectorKmeansTreeIndex;
TString name = PostingTable;
if (needsBuildTable || NeedsAnotherLevel()) {
- name += Level % 2 != 0 ? BuildPostingTableSuffix0 : BuildPostingTableSuffix1;
+ name += Level % 2 != 0 ? BuildSuffix0 : BuildSuffix1;
}
return name;
}
@@ -3134,7 +3134,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> {
Y_ASSERT(Parent != 0);
using namespace NTableIndex::NTableVectorKmeansTreeIndex;
TString name = PostingTable;
- name += Level % 2 != 0 ? BuildPostingTableSuffix1 : BuildPostingTableSuffix0;
+ name += Level % 2 != 0 ? BuildSuffix1 : BuildSuffix0;
return name;
}
};
diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp
index 1a51dc7a421..9113662cbd7 100644
--- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp
@@ -265,11 +265,12 @@ auto CalcVectorKmeansTreePostingImplTableDescImpl(
SetImplTablePartitionConfig(baseTablePartitionConfig, indexTableDesc, implTableDesc);
{
auto parentColumn = implTableDesc.AddColumns();
- parentColumn->SetName(NTableVectorKmeansTreeIndex::PostingTable_ParentColumn);
+ parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn);
parentColumn->SetType("Uint32");
parentColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ parentColumn->SetNotNull(true);
}
- implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::PostingTable_ParentColumn);
+ implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::ParentColumn);
FillIndexImplTableColumns(GetColumns(baseTable), implTableColumns, implTableDesc);
implTableDesc.SetSystemColumnNamesAllowed(true);
@@ -307,25 +308,28 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreeLevelImplTableDesc(
{
auto parentColumn = implTableDesc.AddColumns();
- parentColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_ParentColumn);
+ parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn);
parentColumn->SetType("Uint32");
parentColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ parentColumn->SetNotNull(true);
}
{
auto idColumn = implTableDesc.AddColumns();
- idColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_IdColumn);
+ idColumn->SetName(NTableVectorKmeansTreeIndex::IdColumn);
idColumn->SetType("Uint32");
idColumn->SetTypeId(NScheme::NTypeIds::Uint32);
+ idColumn->SetNotNull(true);
}
{
auto centroidColumn = implTableDesc.AddColumns();
- centroidColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn);
+ centroidColumn->SetName(NTableVectorKmeansTreeIndex::CentroidColumn);
centroidColumn->SetType("String");
centroidColumn->SetTypeId(NScheme::NTypeIds::String);
+ centroidColumn->SetNotNull(true);
}
- implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::LevelTable_ParentColumn);
- implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::LevelTable_IdColumn);
+ implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::ParentColumn);
+ implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::IdColumn);
implTableDesc.SetSystemColumnNamesAllowed(true);
diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp
index dd16a3481af..3cca2d7575a 100644
--- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp
+++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp
@@ -55,11 +55,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) {
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"),
{ NLs::PathExist,
- NLs::CheckColumns(LevelTable, {LevelTable_ParentColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentColumn, LevelTable_IdColumn}, true) });
+ NLs::CheckColumns(LevelTable, {ParentColumn, IdColumn, CentroidColumn}, {}, {ParentColumn, IdColumn}, true) });
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"),
{ NLs::PathExist,
- NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id", "covered"}, {}, {PostingTable_ParentColumn, "id"}, true) });
+ NLs::CheckColumns(PostingTable, {ParentColumn, "id", "covered"}, {}, {ParentColumn, "id"}, true) });
TVector<ui64> dropTxIds;
@@ -106,11 +106,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) {
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"),
{ NLs::PathExist,
- NLs::CheckColumns(LevelTable, {LevelTable_ParentColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentColumn, LevelTable_IdColumn}, true) });
+ NLs::CheckColumns(LevelTable, {ParentColumn, IdColumn, CentroidColumn}, {}, {ParentColumn, IdColumn}, true) });
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"),
{ NLs::PathExist,
- NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id", "embedding"}, {}, {PostingTable_ParentColumn, "id"}, true) });
+ NLs::CheckColumns(PostingTable, {ParentColumn, "id", "embedding"}, {}, {ParentColumn, "id"}, true) });
}
Y_UNIT_TEST(CreateTableMultiColumn) {
@@ -150,11 +150,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) {
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"),
{ NLs::PathExist,
- NLs::CheckColumns(LevelTable, {LevelTable_ParentColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentColumn, LevelTable_IdColumn}, true) });
+ NLs::CheckColumns(LevelTable, {ParentColumn, IdColumn, CentroidColumn}, {}, {ParentColumn, IdColumn}, true) });
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"),
{ NLs::PathExist,
- NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id1", "id2", "covered1", "covered2"}, {}, {PostingTable_ParentColumn, "id1", "id2"}, true) });
+ NLs::CheckColumns(PostingTable, {ParentColumn, "id1", "id2", "covered1", "covered2"}, {}, {ParentColumn, "id1", "id2"}, true) });
}
Y_UNIT_TEST(VectorKmeansTreePostingImplTable) {
@@ -178,7 +178,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) {
}
NTableIndex::TTableColumns implTableColumns = {{"data2", "data1"}, {}};
auto desc = CalcVectorKmeansTreePostingImplTableDesc(baseTableDescr, baseTablePartitionConfig, implTableColumns, indexTableDesc, "something");
- std::string_view expected[] = {NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn, "data1", "data2"};
+ std::string_view expected[] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, "data1", "data2"};
for (size_t i = 0; auto& column : desc.GetColumns()) {
UNIT_ASSERT_STRINGS_EQUAL(column.GetName(), expected[i]);
++i;
@@ -190,7 +190,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) {
TTestEnv env(runtime);
ui64 txId = 100;
- // base table column should not contains reserved name ParentIdColumn
+ // base table column should not contains reserved name ParentColumn
TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"(
TableDescription {
Name: "vectors"
@@ -204,7 +204,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) {
Type: EIndexTypeGlobalVectorKmeansTree
VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } }
}
- )", NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn, NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn), {NKikimrScheme::StatusInvalidParameter});
+ )", NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn), {NKikimrScheme::StatusInvalidParameter});
// pk should not be covered
TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"(