diff options
author | kungasc <[email protected]> | 2025-09-15 10:27:10 +0300 |
---|---|---|
committer | kungasc <[email protected]> | 2025-09-15 10:45:35 +0300 |
commit | 814f11de7acf376c60d6943c27b6d2565cf9cec5 (patch) | |
tree | 264f92648bb63c770211bcc2c05e68c4a288989c /yql/essentials/sql | |
parent | e2627eae787b33c23ad5c80d0a722db7dea1f85a (diff) |
Pass index settings as is
It seems that there is no much sence in parsing (vector) index settings, and it is much easier to pass them as is
It will much help in adding fulltext index with dozens of parameters
commit_hash:ba3d7b32d60f54dd6c8f0aba116a10450455d68b
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r-- | yql/essentials/sql/v1/node.h | 37 | ||||
-rw-r--r-- | yql/essentials/sql/v1/query.cpp | 28 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_translation.cpp | 120 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_translation.h | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/sql_ut_common.h | 17 |
5 files changed, 46 insertions, 160 deletions
diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h index 5bef15e5e2f..4c62fd339a1 100644 --- a/yql/essentials/sql/v1/node.h +++ b/yql/essentials/sql/v1/node.h @@ -1,6 +1,7 @@ #pragma once #include <google/protobuf/message.h> +#include <yql/essentials/public/issue/yql_issue.h> #include <yql/essentials/utils/resetable_setting.h> #include <yql/essentials/parser/proto_ast/common.h> #include <yql/essentials/public/udf/udf_data_type.h> @@ -1187,33 +1188,6 @@ namespace NSQLTranslationV1 { TNodePtr CacheMode; }; - struct TVectorIndexSettings { - enum class EDistance { - Cosine /* "cosine" */ - , Manhattan /* "manhattan" */ - , Euclidean /* "euclidean" */ - }; - - enum class ESimilarity { - Cosine /* "cosine" */ - , InnerProduct /* "inner_product" */ - }; - - enum class EVectorType { - Float /* "float" */ - , Uint8 /* "uint8" */ - , Int8 /* "int8" */ - , Bit /* "bit" */ - }; - - std::optional<EDistance> Distance; - std::optional<ESimilarity> Similarity; - std::optional<EVectorType> VectorType; - std::optional<ui32> VectorDimension; - std::optional<ui32> Clusters; - std::optional<ui32> Levels; - }; - struct TIndexDescription { enum class EType { GlobalSync, @@ -1222,6 +1196,13 @@ namespace NSQLTranslationV1 { GlobalVectorKmeansTree, }; + struct TIndexSetting { + TString Name; + TPosition NamePosition; + TString Value; + TPosition ValuePosition; + }; + TIndexDescription(const TIdentifier& name, EType type = EType::GlobalSync) : Name(name) , Type(type) @@ -1233,7 +1214,7 @@ namespace NSQLTranslationV1 { TVector<TIdentifier> DataColumns; TTableSettings TableSettings; - using TIndexSettings = std::variant<std::monostate, TVectorIndexSettings>; + using TIndexSettings = TMap<TString, TIndexSetting>; TIndexSettings IndexSettings; }; diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp index bab822823ca..6fed44cadc0 100644 --- a/yql/essentials/sql/v1/query.cpp +++ b/yql/essentials/sql/v1/query.cpp @@ -9,6 +9,7 @@ #include <library/cpp/charset/ci_string.h> #include <util/digest/fnv.h> +#include <yql/essentials/public/issue/yql_issue.h> using namespace NYql; @@ -288,7 +289,7 @@ static INode::TPtr CreateTableSettings(const TTableSettings& tableSettings, ETab return settings; } -static INode::TPtr CreateVectorIndexSettings(const TVectorIndexSettings& vectorIndexSettings, const INode& node) { +static INode::TPtr CreateIndexSettings(const TIndexDescription::TIndexSettings& indexSettings, const INode& node) { // short aliases for member function calls auto Y = [&node](auto&&... args) { return node.Y(std::forward<decltype(args)>(args)...); }; auto Q = [&node](auto&&... args) { return node.Q(std::forward<decltype(args)>(args)...); }; @@ -296,23 +297,10 @@ static INode::TPtr CreateVectorIndexSettings(const TVectorIndexSettings& vectorI auto settings = Y(); - if (vectorIndexSettings.Distance) { - settings = L(settings, Q(Y(Q("distance"), Q(ToString(*vectorIndexSettings.Distance))))); - } - if (vectorIndexSettings.Similarity) { - settings = L(settings, Q(Y(Q("similarity"), Q(ToString(*vectorIndexSettings.Similarity))))); - } - if (vectorIndexSettings.VectorType) { - settings = L(settings, Q(Y(Q("vector_type"), Q(ToString(*vectorIndexSettings.VectorType))))); - } - if (vectorIndexSettings.VectorDimension) { - settings = L(settings, Q(Y(Q("vector_dimension"), Q(ToString(*vectorIndexSettings.VectorDimension))))); - } - if (vectorIndexSettings.Clusters) { - settings = L(settings, Q(Y(Q("clusters"), Q(ToString(*vectorIndexSettings.Clusters))))); - } - if (vectorIndexSettings.Levels) { - settings = L(settings, Q(Y(Q("levels"), Q(ToString(*vectorIndexSettings.Levels))))); + for (const auto& [_, indexSetting] : indexSettings) { + settings = L(settings, Q(Y( + BuildQuotedAtom(indexSetting.NamePosition, indexSetting.Name), + BuildQuotedAtom(indexSetting.ValuePosition, indexSetting.Value)))); } return settings; @@ -342,10 +330,10 @@ static INode::TPtr CreateIndexDesc(const TIndexDescription& index, ETableSetting )); indexNode = node.L(indexNode, tableSettings); } - if (const auto* indexSettingsPtr = std::get_if<TVectorIndexSettings>(&index.IndexSettings)) { + if (index.IndexSettings) { const auto& indexSettings = node.Q(node.Y( node.Q("indexSettings"), - node.Q(CreateVectorIndexSettings(*indexSettingsPtr, node)))); + node.Q(CreateIndexSettings(index.IndexSettings, node)))); indexNode = node.L(indexNode, indexSettings); } return indexNode; diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp index 8e4bfcd7761..4375aac31f5 100644 --- a/yql/essentials/sql/v1/sql_translation.cpp +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -717,11 +717,9 @@ bool TSqlTranslation::CreateTableIndex(const TRule_table_index& node, TVector<TI //const auto& with = node.GetBlock4(); auto& index = indexes.back(); if (index.Type == TIndexDescription::EType::GlobalVectorKmeansTree) { - index.IndexSettings.emplace<TVectorIndexSettings>(); - if (!CreateIndexSettings(node.GetBlock10().GetRule_with_index_settings1(), index.Type, index.IndexSettings)) { + if (!FillIndexSettings(node.GetBlock10().GetRule_with_index_settings1(), index.IndexSettings)) { return false; } - } else { AltNotImplemented("with", indexType); return false; @@ -826,16 +824,15 @@ bool TSqlTranslation::ParseDatabaseSetting(const TRule_database_setting& in, THa return true; } -bool TSqlTranslation::CreateIndexSettings(const TRule_with_index_settings& settingsNode, - TIndexDescription::EType indexType, +bool TSqlTranslation::FillIndexSettings(const TRule_with_index_settings& settingsNode, TIndexDescription::TIndexSettings& indexSettings) { const auto& firstEntry = settingsNode.GetRule_index_setting_entry3(); - if (!CreateIndexSettingEntry(IdEx(firstEntry.GetRule_an_id1(), *this), firstEntry.GetRule_index_setting_value3(), indexType, indexSettings)) { + if (!AddIndexSetting(IdEx(firstEntry.GetRule_an_id1(), *this), firstEntry.GetRule_index_setting_value3(), indexSettings)) { return false; } for (auto& block : settingsNode.GetBlock4()) { const auto& entry = block.GetRule_index_setting_entry2(); - if (!CreateIndexSettingEntry(IdEx(entry.GetRule_an_id1(), *this), entry.GetRule_index_setting_value3(), indexType, indexSettings)) { + if (!AddIndexSetting(IdEx(entry.GetRule_an_id1(), *this), entry.GetRule_index_setting_value3(), indexSettings)) { return false; } } @@ -860,109 +857,26 @@ TString TSqlTranslation::GetIndexSettingStringValue(const TRule_index_setting_va } } -template<typename T> -std::tuple<bool, T, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) { - T value{}; - const TString stringValue = GetIndexSettingStringValue(node); - if (node.GetAltCase() != NSQLv1Generated::TRule_index_setting_value::kAltIndexSettingValue1 - && node.GetAltCase() != NSQLv1Generated::TRule_index_setting_value::kAltIndexSettingValue2 - || stringValue.empty()) - { - return {false, value, stringValue}; - } - if (!TryFromString<T>(to_lower(stringValue), value)) { - return {false, value, stringValue}; - } - return {true, value, stringValue}; -} - -template<> -std::tuple<bool, ui32, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) { - ui32 value = 0; - const TString stringValue = GetIndexSettingStringValue(node); - if (node.GetAltCase() != NSQLv1Generated::TRule_index_setting_value::kAltIndexSettingValue3 || stringValue.empty()) { - return {false, value, stringValue}; - } - TString suffix; - ui64 value64; - if (!ParseNumbers(Ctx_, stringValue, value64, suffix) || value64 > Max<ui32>()) { - return {false, value, stringValue}; - } - return {true, value = static_cast<ui32>(value64), stringValue}; -} - -template<> -std::tuple<bool, bool, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) { - bool value = false; - const TString stringValue = GetIndexSettingStringValue(node); - if (node.GetAltCase() != NSQLv1Generated::TRule_index_setting_value::kAltIndexSettingValue4 || stringValue.empty()) { - return {false, value, stringValue}; - } - if (!TryFromString<bool>(to_lower(stringValue), value)) { - return {false, value, stringValue}; - } - return {true, value, stringValue}; -} - -bool TSqlTranslation::CreateIndexSettingEntry(const TIdentifier &id, +bool TSqlTranslation::AddIndexSetting(const TIdentifier &id, const TRule_index_setting_value& node, - TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings) { + // TODO: remove to_lower transformation after the next release to keep backward compatibility + const auto name = to_lower(id.Name); + const auto value = to_lower(GetIndexSettingStringValue(node)); - if (indexType == TIndexDescription::EType::GlobalVectorKmeansTree) { - TVectorIndexSettings &vectorIndexSettings = std::get<TVectorIndexSettings>(indexSettings); + TIndexDescription::TIndexSetting indexSetting { + .Name = name, + .NamePosition = id.Pos, + .Value = value, + .ValuePosition = Ctx_.Pos() + }; - if (to_lower(id.Name) == "distance") { - const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::EDistance>(node); - if (!success) { - Ctx_.Error() << "Invalid distance: " << stringValue; - return false; - } - vectorIndexSettings.Distance = value; - } else if (to_lower(id.Name) == "similarity") { - const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::ESimilarity>(node); - if (!success) { - Ctx_.Error() << "Invalid similarity: " << stringValue; - return false; - } - vectorIndexSettings.Similarity = value; - } else if (to_lower(id.Name) == "vector_type") { - const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::EVectorType>(node); - if (!success) { - Ctx_.Error() << "Invalid vector_type: " << stringValue; - return false; - } - vectorIndexSettings.VectorType = value; - } else if (to_lower(id.Name) == "vector_dimension") { - const auto [success, value, stringValue] = GetIndexSettingValue<ui32>(node); - if (!success) { - Ctx_.Error() << "Invalid vector_dimension: " << stringValue; - return false; - } - vectorIndexSettings.VectorDimension = value; - } else if (to_lower(id.Name) == "clusters") { - const auto [success, value, stringValue] = GetIndexSettingValue<ui32>(node); - if (!success) { - Ctx_.Error() << "Invalid clusters: " << stringValue; - return false; - } - vectorIndexSettings.Clusters = value; - } else if (to_lower(id.Name) == "levels") { - const auto [success, value, stringValue] = GetIndexSettingValue<ui32>(node); - if (!success) { - Ctx_.Error() << "Invalid levels: " << stringValue; - return false; - } - vectorIndexSettings.Levels = value; - } else { - Ctx_.Error() << "Unknown index setting: " << id.Name; - return false; - } - } else { - Ctx_.Error() << "Unknown index setting: " << id.Name; + if (!indexSettings.emplace(name, indexSetting).second) { + Ctx_.Error() << "Duplicated " << name; return false; } + return true; } diff --git a/yql/essentials/sql/v1/sql_translation.h b/yql/essentials/sql/v1/sql_translation.h index 9d64e40d7ad..87a489d5afb 100644 --- a/yql/essentials/sql/v1/sql_translation.h +++ b/yql/essentials/sql/v1/sql_translation.h @@ -183,8 +183,8 @@ protected: bool ResetTableSettingsEntry(const TIdentifier& id, TTableSettings& settings, ETableType tableType); bool CreateTableIndex(const TRule_table_index& node, TVector<TIndexDescription>& indexes); - bool CreateIndexSettings(const TRule_with_index_settings& settingsNode, TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings); - bool CreateIndexSettingEntry(const TIdentifier& id, const TRule_index_setting_value& value, TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings); + bool FillIndexSettings(const TRule_with_index_settings& settingsNode, TIndexDescription::TIndexSettings& indexSettings); + bool AddIndexSetting(const TIdentifier& id, const TRule_index_setting_value& value, TIndexDescription::TIndexSettings& indexSettings); TString GetIndexSettingStringValue(const TRule_index_setting_value& node); template<typename T> std::tuple<bool, T, TString> GetIndexSettingValue(const TRule_index_setting_value& node); diff --git a/yql/essentials/sql/v1/sql_ut_common.h b/yql/essentials/sql/v1/sql_ut_common.h index e58a074f7e5..f8b15bed2dc 100644 --- a/yql/essentials/sql/v1/sql_ut_common.h +++ b/yql/essentials/sql/v1/sql_ut_common.h @@ -3419,22 +3419,25 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); } - Y_UNIT_TEST(AlterTableAddIndexVectorIsNotCorrect) { - ExpectFailWithError(R"sql(USE plato; + Y_UNIT_TEST(AlterTableAddIndexDifferentSettings) { + // index settings and their types are checked in KQP + const auto result = SqlToYql(R"sql(USE plato; ALTER TABLE table ADD INDEX idx GLOBAL USING vector_kmeans_tree ON (col) COVER (col) - WITH (distance=cosine, vector_type="float", vector_dimension=asdf, levels=3, clusters=10) - )sql", - "<main>:5:78: Error: Invalid vector_dimension: asdf\n"); + WITH (distance=42, vector_type="float", vector_dimension=True, levels=none, clusters=10, asdf=qwerty) + )sql"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + Y_UNIT_TEST(AlterTableAddIndexDuplicatedSetting) { ExpectFailWithError(R"sql(USE plato; ALTER TABLE table ADD INDEX idx GLOBAL USING vector_kmeans_tree ON (col) COVER (col) - WITH (distance=42, vector_type="float", vector_dimension=1024, levels=3, clusters=10) + WITH (distance=cosine, distance=42) )sql", - "<main>:5:32: Error: Invalid distance: 42\n"); + "<main>:5:49: Error: Duplicated distance\n"); } Y_UNIT_TEST(AlterTableAddIndexUnknownSubtype) { |