diff options
author | Valery Mironov <mbkkt@ydb.tech> | 2024-12-18 22:59:41 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-18 22:59:41 +0300 |
commit | db42db4322443721201debbfd11ef19e48e2d4f5 (patch) | |
tree | 763868396fef7be09af926d983548cb587b0f949 | |
parent | f3c9a60838fb5b413885d9894985861533874f25 (diff) | |
download | ydb-db42db4322443721201debbfd11ef19e48e2d4f5.tar.gz |
Some fixes in vector index schema (#12727)
* Rename vector index impl table column constants to make them more convinient and less confusing (e.g. single constant for ParentColumn, important for future kqp rewrite rules)
* Rename `__ydb_embedding` column to `__ydb_centroid` column, because it makes more sense
* Add missed not null property for vector index impl table columns
-rw-r--r-- | ydb/core/base/table_index.cpp | 12 | ||||
-rw-r--r-- | ydb/core/base/table_vector_index.h | 13 | ||||
-rw-r--r-- | ydb/core/base/ut/table_index_ut.cpp | 22 | ||||
-rw-r--r-- | ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp | 50 | ||||
-rw-r--r-- | ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp | 20 | ||||
-rw-r--r-- | ydb/core/tx/datashard/kmeans_helper.cpp | 2 | ||||
-rw-r--r-- | ydb/core/tx/datashard/local_kmeans.cpp | 6 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp | 4 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp | 8 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_info_types.h | 4 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/schemeshard_utils.cpp | 18 | ||||
-rw-r--r-- | ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp | 18 |
12 files changed, 91 insertions, 86 deletions
diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index c1d827ea519..fb2e136c480 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -115,17 +115,17 @@ bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumn return false; } - if (Contains(table.Keys, NTableVectorKmeansTreeIndex::PostingTable_ParentColumn)) { - explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn; + if (Contains(table.Keys, NTableVectorKmeansTreeIndex::ParentColumn)) { + explain = TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn; return false; } - if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentColumn)) { + if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::ParentColumn)) { // This isn't really needed, but it will be really strange to have column with such name but different meaning - explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn; + explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn; return false; } - if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentColumn)) { - explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn; + if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::ParentColumn)) { + explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn; return false; } } diff --git a/ydb/core/base/table_vector_index.h b/ydb/core/base/table_vector_index.h index 4878ed11bf0..e4f29bf0248 100644 --- a/ydb/core/base/table_vector_index.h +++ b/ydb/core/base/table_vector_index.h @@ -4,17 +4,18 @@ namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex { // Vector KmeansTree index tables description +// Level and Posting tables +inline constexpr const char* ParentColumn = "__ydb_parent"; + // Level table inline constexpr const char* LevelTable = "indexImplLevelTable"; -inline constexpr const char* LevelTable_ParentColumn = "__ydb_parent"; -inline constexpr const char* LevelTable_IdColumn = "__ydb_id"; -inline constexpr const char* LevelTable_EmbeddingColumn = "__ydb_embedding"; +inline constexpr const char* IdColumn = "__ydb_id"; +inline constexpr const char* CentroidColumn = "__ydb_centroid"; // Posting table inline constexpr const char* PostingTable = "indexImplPostingTable"; -inline constexpr const char* PostingTable_ParentColumn = LevelTable_ParentColumn; -inline constexpr const char* BuildPostingTableSuffix0 = "0build"; -inline constexpr const char* BuildPostingTableSuffix1 = "1build"; +inline constexpr const char* BuildSuffix0 = "0build"; +inline constexpr const char* BuildSuffix1 = "1build"; } diff --git a/ydb/core/base/ut/table_index_ut.cpp b/ydb/core/base/ut/table_index_ut.cpp index ccac3e26a84..932c43007c9 100644 --- a/ydb/core/base/ut/table_index_ut.cpp +++ b/ydb/core/base/ut/table_index_ut.cpp @@ -27,16 +27,16 @@ Y_UNIT_TEST_SUITE (TableIndex) { UNIT_ASSERT_STRINGS_EQUAL(explain, ""); { - const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {"PK"}}; + const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {"PK"}}; - UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {}}, explain)); + UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::ParentColumn}, {}}, explain)); UNIT_ASSERT_STRINGS_EQUAL(explain, ""); - UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}}, explain)); + UNIT_ASSERT(IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::ParentColumn}}, explain)); UNIT_ASSERT_STRINGS_EQUAL(explain, ""); } { - const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}}; + const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {NTableVectorKmeansTreeIndex::ParentColumn}}; UNIT_ASSERT(IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain)); UNIT_ASSERT_STRINGS_EQUAL(explain, ""); @@ -118,19 +118,19 @@ Y_UNIT_TEST_SUITE (TableIndex) { UNIT_ASSERT_STRINGS_EQUAL(explain, "the same column can't be used as key and data column for one index, for example PK2"); { - const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {"PK"}}; + const TTableColumns Table2{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {"PK"}}; - UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {}}, explain)); - UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn); + UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{NTableVectorKmeansTreeIndex::ParentColumn}, {}}, explain)); + UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn); - UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}}, explain)); - UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn); + UNIT_ASSERT(!IsCompatibleIndex(type, Table2, {{"DATA"}, {NTableVectorKmeansTreeIndex::ParentColumn}}, explain)); + UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn); } { - const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}, {NTableVectorKmeansTreeIndex::PostingTable_ParentColumn}}; + const TTableColumns Table3{{"PK", "DATA", NTableVectorKmeansTreeIndex::ParentColumn}, {NTableVectorKmeansTreeIndex::ParentColumn}}; UNIT_ASSERT(!IsCompatibleIndex(type, Table3, {{"DATA"}, {}}, explain)); - UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentColumn); + UNIT_ASSERT_STRINGS_EQUAL(explain, TStringBuilder() << "table key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::ParentColumn); } } } diff --git a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp index 3db8d4c1bed..ee7a32ab266 100644 --- a/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp +++ b/ydb/core/tx/datashard/datashard_ut_local_kmeans.cpp @@ -183,9 +183,9 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {LevelTable_ParentColumn, "Uint32", true, true}, - {LevelTable_IdColumn, "Uint32", true, true}, - {LevelTable_EmbeddingColumn, "String", false, true}, + {ParentColumn, "Uint32", true, true}, + {IdColumn, "Uint32", true, true}, + {CentroidColumn, "String", false, true}, }); CreateShardedTable(server, sender, "/Root", "table-level", options); } @@ -194,7 +194,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {PostingTable_ParentColumn, "Uint32", true, true}, + {ParentColumn, "Uint32", true, true}, {"key", "Uint32", true, true}, {"data", "String", false, false}, }); @@ -206,7 +206,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {PostingTable_ParentColumn, "Uint32", true, true}, + {ParentColumn, "Uint32", true, true}, {"key", "Uint32", true, true}, {"embedding", "String", false, false}, {"data", "String", false, false}, @@ -350,8 +350,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_POSTING, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = mm\3\n" - "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = 11\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = mm\3\n" + "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = 11\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 4, data = four\n" "__ydb_parent = 1, key = 5, data = five\n" "__ydb_parent = 2, key = 1, data = one\n" @@ -365,8 +365,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_POSTING, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = 11\3\n" - "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = mm\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = 11\3\n" + "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = mm\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, data = one\n" "__ydb_parent = 1, key = 2, data = two\n" "__ydb_parent = 1, key = 3, data = three\n" @@ -381,7 +381,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_POSTING, VectorIndexSettings::VECTOR_TYPE_UINT8, similarity); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = II\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = II\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, data = one\n" "__ydb_parent = 1, key = 2, data = two\n" "__ydb_parent = 1, key = 3, data = three\n" @@ -440,8 +440,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_BUILD, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = mm\3\n" - "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = 11\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = mm\3\n" + "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = 11\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 4, embedding = \x65\x65\3, data = four\n" "__ydb_parent = 1, key = 5, embedding = \x75\x75\3, data = five\n" "__ydb_parent = 2, key = 1, embedding = \x30\x30\3, data = one\n" @@ -455,8 +455,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_BUILD, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = 11\3\n" - "__ydb_parent = 0, __ydb_id = 2, __ydb_embedding = mm\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = 11\3\n" + "__ydb_parent = 0, __ydb_id = 2, __ydb_centroid = mm\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, embedding = \x30\x30\3, data = one\n" "__ydb_parent = 1, key = 2, embedding = \x31\x31\3, data = two\n" "__ydb_parent = 1, key = 3, embedding = \x32\x32\3, data = three\n" @@ -471,7 +471,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 0, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_MAIN_TO_BUILD, VectorIndexSettings::VECTOR_TYPE_UINT8, similarity); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_embedding = II\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 0, __ydb_id = 1, __ydb_centroid = II\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 1, key = 1, embedding = \x30\x30\3, data = one\n" "__ydb_parent = 1, key = 2, embedding = \x31\x31\3, data = two\n" "__ydb_parent = 1, key = 3, embedding = \x32\x32\3, data = three\n" @@ -532,8 +532,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_POSTING, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = mm\3\n" - "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = 11\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = mm\3\n" + "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = 11\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 4, data = four\n" "__ydb_parent = 41, key = 5, data = five\n" "__ydb_parent = 42, key = 1, data = one\n" @@ -547,8 +547,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_POSTING, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = 11\3\n" - "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = mm\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = 11\3\n" + "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = mm\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, data = one\n" "__ydb_parent = 41, key = 2, data = two\n" "__ydb_parent = 41, key = 3, data = three\n" @@ -563,7 +563,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_POSTING, VectorIndexSettings::VECTOR_TYPE_UINT8, similarity); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = II\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = II\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, data = one\n" "__ydb_parent = 41, key = 2, data = two\n" "__ydb_parent = 41, key = 3, data = three\n" @@ -624,8 +624,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_BUILD, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = mm\3\n" - "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = 11\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = mm\3\n" + "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = 11\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 4, embedding = \x65\x65\3, data = four\n" "__ydb_parent = 41, key = 5, embedding = \x75\x75\3, data = five\n" "__ydb_parent = 42, key = 1, embedding = \x30\x30\3, data = one\n" @@ -639,8 +639,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_BUILD, VectorIndexSettings::VECTOR_TYPE_UINT8, distance); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = 11\3\n" - "__ydb_parent = 40, __ydb_id = 42, __ydb_embedding = mm\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = 11\3\n" + "__ydb_parent = 40, __ydb_id = 42, __ydb_centroid = mm\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, embedding = \x30\x30\3, data = one\n" "__ydb_parent = 41, key = 2, embedding = \x31\x31\3, data = two\n" "__ydb_parent = 41, key = 3, embedding = \x32\x32\3, data = three\n" @@ -655,7 +655,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardLocalKMeansScan) { auto [level, posting] = DoLocalKMeans(server, sender, 40, seed, k, NKikimrTxDataShard::TEvLocalKMeansRequest::UPLOAD_BUILD_TO_BUILD, VectorIndexSettings::VECTOR_TYPE_UINT8, similarity); - UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_embedding = II\3\n"); + UNIT_ASSERT_VALUES_EQUAL(level, "__ydb_parent = 40, __ydb_id = 41, __ydb_centroid = II\3\n"); UNIT_ASSERT_VALUES_EQUAL(posting, "__ydb_parent = 41, key = 1, embedding = \x30\x30\3, data = one\n" "__ydb_parent = 41, key = 2, embedding = \x31\x31\3, data = two\n" "__ydb_parent = 41, key = 3, embedding = \x32\x32\3, data = three\n" diff --git a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp index 6267ebdeeba..f358b393d9f 100644 --- a/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp +++ b/ydb/core/tx/datashard/datashard_ut_reshuffle_kmeans.cpp @@ -171,7 +171,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {PostingTable_ParentColumn, "Uint32", true, true}, + {ParentColumn, "Uint32", true, true}, {"key", "Uint32", true, true}, {"data", "String", false, false}, }); @@ -183,7 +183,7 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { { options.AllowSystemColumnNames(true); options.Columns({ - {PostingTable_ParentColumn, "Uint32", true, true}, + {ParentColumn, "Uint32", true, true}, {"key", "Uint32", true, true}, {"embedding", "String", false, false}, {"data", "String", false, false}, @@ -292,8 +292,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { // Upsert some initial values ExecSQL(server, sender, R"( - UPSERT INTO `/Root/table-main` - (key, embedding, data) + UPSERT INTO `/Root/table-main` + (key, embedding, data) VALUES )" "(1, \"\x30\x30\3\", \"one\")," "(2, \"\x31\x31\3\", \"two\")," @@ -377,8 +377,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { // Upsert some initial values ExecSQL(server, sender, R"( - UPSERT INTO `/Root/table-main` - (key, embedding, data) + UPSERT INTO `/Root/table-main` + (key, embedding, data) VALUES )" "(1, \"\x30\x30\3\", \"one\")," "(2, \"\x31\x31\3\", \"two\")," @@ -462,8 +462,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { // Upsert some initial values ExecSQL(server, sender, R"( - UPSERT INTO `/Root/table-main` - (__ydb_parent, key, embedding, data) + UPSERT INTO `/Root/table-main` + (__ydb_parent, key, embedding, data) VALUES )" "(39, 1, \"\x30\x30\3\", \"one\")," "(40, 1, \"\x30\x30\3\", \"one\")," @@ -549,8 +549,8 @@ Y_UNIT_TEST_SUITE (TTxDataShardReshuffleKMeansScan) { // Upsert some initial values ExecSQL(server, sender, R"( - UPSERT INTO `/Root/table-main` - (__ydb_parent, key, embedding, data) + UPSERT INTO `/Root/table-main` + (__ydb_parent, key, embedding, data) VALUES )" "(39, 1, \"\x30\x30\3\", \"one\")," "(40, 1, \"\x30\x30\3\", \"one\")," diff --git a/ydb/core/tx/datashard/kmeans_helper.cpp b/ydb/core/tx/datashard/kmeans_helper.cpp index e755d09c5ce..842b583b524 100644 --- a/ydb/core/tx/datashard/kmeans_helper.cpp +++ b/ydb/core/tx/datashard/kmeans_helper.cpp @@ -97,7 +97,7 @@ MakeUploadTypes(const TUserTable& table, NKikimrTxDataShard::TEvLocalKMeansReque Ydb::Type type; type.set_type_id(Ydb::Type::UINT32); - uploadTypes->emplace_back(NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn, type); + uploadTypes->emplace_back(NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type); auto addType = [&](const auto& column) { auto it = types.find(column); diff --git a/ydb/core/tx/datashard/local_kmeans.cpp b/ydb/core/tx/datashard/local_kmeans.cpp index d038319c69a..2962f2d050f 100644 --- a/ydb/core/tx/datashard/local_kmeans.cpp +++ b/ydb/core/tx/datashard/local_kmeans.cpp @@ -145,10 +145,10 @@ public: if (Ydb::Type type; State <= EState::KMEANS) { TargetTypes = std::make_shared<NTxProxy::TUploadTypes>(3); type.set_type_id(Ydb::Type::UINT32); - (*TargetTypes)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_ParentColumn, type}; - (*TargetTypes)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_IdColumn, type}; + (*TargetTypes)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type}; + (*TargetTypes)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type}; type.set_type_id(Ydb::Type::STRING); - (*TargetTypes)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn, type}; + (*TargetTypes)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn, type}; } NextTypes = MakeUploadTypes(table, UploadState, embedding, data); } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp index 6f21fab6423..d9d80078be1 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_build_index.cpp @@ -133,8 +133,8 @@ TVector<ISubOperation::TPtr> CreateBuildIndex(TOperationId opId, const TTxTransa result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc))); // TODO Maybe better to use partition from main table // This tables are temporary and handled differently in apply_build_index - result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildPostingTableSuffix0))); - result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildPostingTableSuffix1))); + result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildSuffix0))); + result.push_back(createImplTable(CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, indexPostingTableDesc, NTableVectorKmeansTreeIndex::BuildSuffix1))); } else { NKikimrSchemeOp::TTableDescription indexTableDesc; // TODO After IndexImplTableDescriptions are persisted, this should be replaced with Y_ABORT_UNLESS diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index b56852d17d2..b8958e4468c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -161,10 +161,10 @@ public: Types = std::make_shared<NTxProxy::TUploadTypes>(3); Ydb::Type type; type.set_type_id(Ydb::Type::UINT32); - (*Types)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_ParentColumn, type}; - (*Types)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_IdColumn, type}; + (*Types)[0] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, type}; + (*Types)[1] = {NTableIndex::NTableVectorKmeansTreeIndex::IdColumn, type}; type.set_type_id(Ydb::Type::STRING); - (*Types)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn, type}; + (*Types)[2] = {NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn, type}; Become(&TThis::StateWork); @@ -337,7 +337,7 @@ THolder<TEvSchemeShard::TEvModifySchemeTransaction> CreateBuildPropose( modifyScheme.SetWorkingDir(path.Dive(buildInfo.IndexName).PathString()); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpInitiateBuildIndexImplTable); auto& op = *modifyScheme.MutableCreateTable(); - const char* suffix = buildInfo.KMeans.Level % 2 != 0 ? BuildPostingTableSuffix0 : BuildPostingTableSuffix1; + const char* suffix = buildInfo.KMeans.Level % 2 != 0 ? BuildSuffix0 : BuildSuffix1; op = CalcVectorKmeansTreePostingImplTableDesc(tableInfo, tableInfo->PartitionConfig(), implTableColumns, {}, suffix); const auto [count, parts, step] = ComputeKMeansBoundaries(*tableInfo, buildInfo); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 63a7e9d1a1d..a6623c63e54 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -3126,7 +3126,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { using namespace NTableIndex::NTableVectorKmeansTreeIndex; TString name = PostingTable; if (needsBuildTable || NeedsAnotherLevel()) { - name += Level % 2 != 0 ? BuildPostingTableSuffix0 : BuildPostingTableSuffix1; + name += Level % 2 != 0 ? BuildSuffix0 : BuildSuffix1; } return name; } @@ -3134,7 +3134,7 @@ struct TIndexBuildInfo: public TSimpleRefCount<TIndexBuildInfo> { Y_ASSERT(Parent != 0); using namespace NTableIndex::NTableVectorKmeansTreeIndex; TString name = PostingTable; - name += Level % 2 != 0 ? BuildPostingTableSuffix1 : BuildPostingTableSuffix0; + name += Level % 2 != 0 ? BuildSuffix1 : BuildSuffix0; return name; } }; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 1a51dc7a421..9113662cbd7 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -265,11 +265,12 @@ auto CalcVectorKmeansTreePostingImplTableDescImpl( SetImplTablePartitionConfig(baseTablePartitionConfig, indexTableDesc, implTableDesc); { auto parentColumn = implTableDesc.AddColumns(); - parentColumn->SetName(NTableVectorKmeansTreeIndex::PostingTable_ParentColumn); + parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn); parentColumn->SetType("Uint32"); parentColumn->SetTypeId(NScheme::NTypeIds::Uint32); + parentColumn->SetNotNull(true); } - implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::PostingTable_ParentColumn); + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::ParentColumn); FillIndexImplTableColumns(GetColumns(baseTable), implTableColumns, implTableDesc); implTableDesc.SetSystemColumnNamesAllowed(true); @@ -307,25 +308,28 @@ NKikimrSchemeOp::TTableDescription CalcVectorKmeansTreeLevelImplTableDesc( { auto parentColumn = implTableDesc.AddColumns(); - parentColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_ParentColumn); + parentColumn->SetName(NTableVectorKmeansTreeIndex::ParentColumn); parentColumn->SetType("Uint32"); parentColumn->SetTypeId(NScheme::NTypeIds::Uint32); + parentColumn->SetNotNull(true); } { auto idColumn = implTableDesc.AddColumns(); - idColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_IdColumn); + idColumn->SetName(NTableVectorKmeansTreeIndex::IdColumn); idColumn->SetType("Uint32"); idColumn->SetTypeId(NScheme::NTypeIds::Uint32); + idColumn->SetNotNull(true); } { auto centroidColumn = implTableDesc.AddColumns(); - centroidColumn->SetName(NTableVectorKmeansTreeIndex::LevelTable_EmbeddingColumn); + centroidColumn->SetName(NTableVectorKmeansTreeIndex::CentroidColumn); centroidColumn->SetType("String"); centroidColumn->SetTypeId(NScheme::NTypeIds::String); + centroidColumn->SetNotNull(true); } - implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::LevelTable_ParentColumn); - implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::LevelTable_IdColumn); + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::ParentColumn); + implTableDesc.AddKeyColumnNames(NTableVectorKmeansTreeIndex::IdColumn); implTableDesc.SetSystemColumnNamesAllowed(true); diff --git a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp index dd16a3481af..3cca2d7575a 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_vector_index.cpp @@ -55,11 +55,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), { NLs::PathExist, - NLs::CheckColumns(LevelTable, {LevelTable_ParentColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentColumn, LevelTable_IdColumn}, true) }); + NLs::CheckColumns(LevelTable, {ParentColumn, IdColumn, CentroidColumn}, {}, {ParentColumn, IdColumn}, true) }); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), { NLs::PathExist, - NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id", "covered"}, {}, {PostingTable_ParentColumn, "id"}, true) }); + NLs::CheckColumns(PostingTable, {ParentColumn, "id", "covered"}, {}, {ParentColumn, "id"}, true) }); TVector<ui64> dropTxIds; @@ -106,11 +106,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), { NLs::PathExist, - NLs::CheckColumns(LevelTable, {LevelTable_ParentColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentColumn, LevelTable_IdColumn}, true) }); + NLs::CheckColumns(LevelTable, {ParentColumn, IdColumn, CentroidColumn}, {}, {ParentColumn, IdColumn}, true) }); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), { NLs::PathExist, - NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id", "embedding"}, {}, {PostingTable_ParentColumn, "id"}, true) }); + NLs::CheckColumns(PostingTable, {ParentColumn, "id", "embedding"}, {}, {ParentColumn, "id"}, true) }); } Y_UNIT_TEST(CreateTableMultiColumn) { @@ -150,11 +150,11 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplLevelTable"), { NLs::PathExist, - NLs::CheckColumns(LevelTable, {LevelTable_ParentColumn, LevelTable_IdColumn, LevelTable_EmbeddingColumn}, {}, {LevelTable_ParentColumn, LevelTable_IdColumn}, true) }); + NLs::CheckColumns(LevelTable, {ParentColumn, IdColumn, CentroidColumn}, {}, {ParentColumn, IdColumn}, true) }); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/idx_vector/indexImplPostingTable"), { NLs::PathExist, - NLs::CheckColumns(PostingTable, {PostingTable_ParentColumn, "id1", "id2", "covered1", "covered2"}, {}, {PostingTable_ParentColumn, "id1", "id2"}, true) }); + NLs::CheckColumns(PostingTable, {ParentColumn, "id1", "id2", "covered1", "covered2"}, {}, {ParentColumn, "id1", "id2"}, true) }); } Y_UNIT_TEST(VectorKmeansTreePostingImplTable) { @@ -178,7 +178,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { } NTableIndex::TTableColumns implTableColumns = {{"data2", "data1"}, {}}; auto desc = CalcVectorKmeansTreePostingImplTableDesc(baseTableDescr, baseTablePartitionConfig, implTableColumns, indexTableDesc, "something"); - std::string_view expected[] = {NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn, "data1", "data2"}; + std::string_view expected[] = {NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, "data1", "data2"}; for (size_t i = 0; auto& column : desc.GetColumns()) { UNIT_ASSERT_STRINGS_EQUAL(column.GetName(), expected[i]); ++i; @@ -190,7 +190,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { TTestEnv env(runtime); ui64 txId = 100; - // base table column should not contains reserved name ParentIdColumn + // base table column should not contains reserved name ParentColumn TestCreateIndexedTable(runtime, ++txId, "/MyRoot", Sprintf(R"( TableDescription { Name: "vectors" @@ -204,7 +204,7 @@ Y_UNIT_TEST_SUITE(TVectorIndexTests) { Type: EIndexTypeGlobalVectorKmeansTree VectorIndexKmeansTreeDescription: { Settings: { settings: { metric: DISTANCE_COSINE, vector_type: VECTOR_TYPE_FLOAT, vector_dimension: 1024 } } } } - )", NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn, NTableIndex::NTableVectorKmeansTreeIndex::PostingTable_ParentColumn), {NKikimrScheme::StatusInvalidParameter}); + )", NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn, NTableIndex::NTableVectorKmeansTreeIndex::ParentColumn), {NKikimrScheme::StatusInvalidParameter}); // pk should not be covered TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( |