diff options
author | vityaman <[email protected]> | 2025-04-14 13:06:15 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-04-14 13:40:02 +0300 |
commit | 14005fcfba8efa6918e54d823bff6780d9922f8f (patch) | |
tree | a0be3398ab981aed52ac31b4d168d9f8680405a7 /yql/essentials/sql/v1/complete | |
parent | 0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (diff) |
YQL-19747 Rank keywords just by plain usages
- [x] Rank keywords just by plain usages
- [x] `LocalSyntaxAnalysis` now returns a mapping `:: Keyword -> [Following Keywords]`.
- [x] Extracted keyword sequence formatting from `syntax/local` to `syntax/format`.
- [x] Extracted token display logic from `syntax/local` to `antlr4/vocabulary` as it is ANTLR dependent.
---
Example
```python
$ ./yql_complete <<< "select "
[Keyword] CAST(
[Keyword] NULL
[Keyword] NOT
[FunctionName] If(
[FunctionName] Yson::ConvertToString(
[FunctionName] Count(
[FunctionName] Sum(
[FunctionName] Unwrap(
[FunctionName] Coalesce(
[Keyword] DISTINCT
[Keyword] ALL
[Keyword] CASE
[FunctionName] Max(
[Keyword] FALSE
[FunctionName] Some(
```
---
- Related to https://github.com/ydb-platform/ydb/issues/9056
- Related to https://github.com/vityaman/ydb/issues/17
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1197
commit_hash:f42cb4aaffe6de7c9137069c4d9c635ee110a805
Diffstat (limited to 'yql/essentials/sql/v1/complete')
19 files changed, 198 insertions, 91 deletions
diff --git a/yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp b/yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp new file mode 100644 index 00000000000..0938ae3aca0 --- /dev/null +++ b/yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp @@ -0,0 +1,14 @@ +#include "vocabulary.h" + +namespace NSQLComplete { + + std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) { + auto name = vocabulary.getDisplayName(tokenType); + if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) { + name.erase(static_cast<std::string::size_type>(0), 1); + name.pop_back(); + } + return name; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/antlr4/vocabulary.h b/yql/essentials/sql/v1/complete/antlr4/vocabulary.h new file mode 100644 index 00000000000..deb67828800 --- /dev/null +++ b/yql/essentials/sql/v1/complete/antlr4/vocabulary.h @@ -0,0 +1,13 @@ +#pragma once + +#include "defs.h" + +#include <contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h> + +#include <string> + +namespace NSQLComplete { + + std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/antlr4/ya.make b/yql/essentials/sql/v1/complete/antlr4/ya.make index 36145606177..b79ae1b60bb 100644 --- a/yql/essentials/sql/v1/complete/antlr4/ya.make +++ b/yql/essentials/sql/v1/complete/antlr4/ya.make @@ -1,5 +1,9 @@ LIBRARY() +SRCS( + vocabulary.cpp +) + PEERDIR( contrib/libs/antlr4_cpp_runtime contrib/libs/antlr4-c3 diff --git a/yql/essentials/sql/v1/complete/name/name_service.h b/yql/essentials/sql/v1/complete/name/name_service.h index 34a109d0013..665068e1520 100644 --- a/yql/essentials/sql/v1/complete/name/name_service.h +++ b/yql/essentials/sql/v1/complete/name/name_service.h @@ -19,6 +19,10 @@ namespace NSQLComplete { TString Namespace; }; + struct TKeyword { + TString Content; + }; + struct TPragmaName: TIndentifier { struct TConstraints: TNamespaced {}; }; @@ -38,12 +42,14 @@ namespace NSQLComplete { }; using TGenericName = std::variant< + TKeyword, TPragmaName, TTypeName, TFunctionName, THintName>; struct TNameRequest { + TVector<TString> Keywords; struct { std::optional<TPragmaName::TConstraints> Pragma; std::optional<TTypeName::TConstraints> Type; @@ -54,7 +60,8 @@ namespace NSQLComplete { size_t Limit = 128; bool IsEmpty() const { - return !Constraints.Pragma && + return Keywords.empty() && + !Constraints.Pragma && !Constraints.Type && !Constraints.Function && !Constraints.Hint; diff --git a/yql/essentials/sql/v1/complete/name/static/frequency.cpp b/yql/essentials/sql/v1/complete/name/static/frequency.cpp index b3707533e69..62997ccff7a 100644 --- a/yql/essentials/sql/v1/complete/name/static/frequency.cpp +++ b/yql/essentials/sql/v1/complete/name/static/frequency.cpp @@ -17,6 +17,7 @@ namespace NSQLComplete { const char* Pragma = "PRAGMA"; const char* Type = "TYPE"; const char* Func = "FUNC"; + const char* Keyword = "KEYWORD"; const char* Module = "MODULE"; const char* ModuleFunc = "MODULE_FUNC"; const char* ReadHint = "READ_HINT"; @@ -59,6 +60,7 @@ namespace NSQLComplete { if (item.Parent == Json.Parent.Pragma || item.Parent == Json.Parent.Type || item.Parent == Json.Parent.Func || + item.Parent == Json.Parent.Keyword || item.Parent == Json.Parent.ModuleFunc || item.Parent == Json.Parent.Module || item.Parent == Json.Parent.ReadHint || @@ -70,6 +72,8 @@ namespace NSQLComplete { data.Pragmas[item.Rule] += item.Sum; } else if (item.Parent == Json.Parent.Type) { data.Types[item.Rule] += item.Sum; + } else if (item.Parent == Json.Parent.Keyword) { + data.Keywords[item.Rule] += item.Sum; } else if (item.Parent == Json.Parent.Module) { // Ignore, unsupported: Modules } else if (item.Parent == Json.Parent.Func || diff --git a/yql/essentials/sql/v1/complete/name/static/frequency.h b/yql/essentials/sql/v1/complete/name/static/frequency.h index 024d93cefcb..6925c99fa5e 100644 --- a/yql/essentials/sql/v1/complete/name/static/frequency.h +++ b/yql/essentials/sql/v1/complete/name/static/frequency.h @@ -6,6 +6,7 @@ namespace NSQLComplete { struct TFrequencyData { + THashMap<TString, size_t> Keywords; THashMap<TString, size_t> Pragmas; THashMap<TString, size_t> Types; THashMap<TString, size_t> Functions; diff --git a/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp b/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp index a5fd8fad00a..8f7eafed2ea 100644 --- a/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp +++ b/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp @@ -10,6 +10,7 @@ Y_UNIT_TEST_SUITE(FrequencyTests) { TFrequencyData actual = ParseJsonFrequencyData(R"([ {"parent":"FUNC","rule":"ABC","sum":1}, {"parent":"TYPE","rule":"BIGINT","sum":7101}, + {"parent":"KEYWORD","rule":"UNION","sum":65064443}, {"parent":"MODULE_FUNC","rule":"Compress::BZip2","sum":2}, {"parent":"MODULE","rule":"re2","sum":3094}, {"parent":"READ_HINT","rule":"COLUMNS","sum":826110}, @@ -18,6 +19,9 @@ Y_UNIT_TEST_SUITE(FrequencyTests) { ])"); TFrequencyData expected = { + .Keywords = { + {"union", 65064443}, + }, .Types = { {"bigint", 7101}, }, @@ -31,8 +35,10 @@ Y_UNIT_TEST_SUITE(FrequencyTests) { }, }; + UNIT_ASSERT_VALUES_EQUAL(actual.Keywords, expected.Keywords); UNIT_ASSERT_VALUES_EQUAL(actual.Types, expected.Types); UNIT_ASSERT_VALUES_EQUAL(actual.Functions, expected.Functions); + UNIT_ASSERT_VALUES_EQUAL(actual.Hints, expected.Hints); } Y_UNIT_TEST(FrequencyDataResouce) { diff --git a/yql/essentials/sql/v1/complete/name/static/name_service.cpp b/yql/essentials/sql/v1/complete/name/static/name_service.cpp index 37f5a06785b..3fd33102d61 100644 --- a/yql/essentials/sql/v1/complete/name/static/name_service.cpp +++ b/yql/essentials/sql/v1/complete/name/static/name_service.cpp @@ -2,22 +2,9 @@ #include "ranking.h" -namespace NSQLComplete { - - bool NoCaseCompare(const TString& lhs, const TString& rhs) { - return std::lexicographical_compare( - std::begin(lhs), std::end(lhs), - std::begin(rhs), std::end(rhs), - [](const char lhs, const char rhs) { - return ToLower(lhs) < ToLower(rhs); - }); - } +#include <yql/essentials/sql/v1/complete/text/case.h> - auto NoCaseCompareLimit(size_t size) { - return [size](const TString& lhs, const TString& rhs) -> bool { - return strncasecmp(lhs.data(), rhs.data(), size) < 0; - }; - } +namespace NSQLComplete { const TVector<TStringBuf> FilteredByPrefix( const TString& prefix, @@ -28,8 +15,8 @@ namespace NSQLComplete { return TVector<TStringBuf>(first, last); } - template <class T> - void AppendAs(TVector<TGenericName>& target, const TVector<TStringBuf>& source) { + template <class T, class S = TStringBuf> + void AppendAs(TVector<TGenericName>& target, const TVector<S>& source) { for (const auto& element : source) { target.emplace_back(T{TString(element)}); } @@ -82,6 +69,11 @@ namespace NSQLComplete { TFuture<TNameResponse> Lookup(TNameRequest request) override { TNameResponse response; + Sort(request.Keywords, NoCaseCompare); + AppendAs<TKeyword>( + response.RankedNames, + FilteredByPrefix(request.Prefix, request.Keywords)); + if (request.Constraints.Pragma) { auto prefix = Prefixed(request.Prefix, ".", *request.Constraints.Pragma); auto names = FilteredByPrefix(prefix, NameSet_.Pragmas); diff --git a/yql/essentials/sql/v1/complete/name/static/ranking.cpp b/yql/essentials/sql/v1/complete/name/static/ranking.cpp index 79ebbc98003..ee1cbef08f5 100644 --- a/yql/essentials/sql/v1/complete/name/static/ranking.cpp +++ b/yql/essentials/sql/v1/complete/name/static/ranking.cpp @@ -57,28 +57,34 @@ namespace NSQLComplete { return std::visit([this](const auto& name) -> size_t { using T = std::decay_t<decltype(name)>; - auto identifier = ToLowerUTF8(ContentView(name)); + auto content = ToLowerUTF8(ContentView(name)); + + if constexpr (std::is_same_v<T, TKeyword>) { + if (auto weight = Frequency_.Keywords.FindPtr(content)) { + return *weight; + } + } if constexpr (std::is_same_v<T, TPragmaName>) { - if (auto weight = Frequency_.Pragmas.FindPtr(identifier)) { + if (auto weight = Frequency_.Pragmas.FindPtr(content)) { return *weight; } } if constexpr (std::is_same_v<T, TFunctionName>) { - if (auto weight = Frequency_.Functions.FindPtr(identifier)) { + if (auto weight = Frequency_.Functions.FindPtr(content)) { return *weight; } } if constexpr (std::is_same_v<T, TTypeName>) { - if (auto weight = Frequency_.Types.FindPtr(identifier)) { + if (auto weight = Frequency_.Types.FindPtr(content)) { return *weight; } } if constexpr (std::is_same_v<T, THintName>) { - if (auto weight = Frequency_.Hints.FindPtr(identifier)) { + if (auto weight = Frequency_.Hints.FindPtr(content)) { return *weight; } } @@ -94,6 +100,9 @@ namespace NSQLComplete { const TStringBuf ContentView(const TGenericName& name Y_LIFETIME_BOUND) const { return std::visit([](const auto& name) -> TStringBuf { using T = std::decay_t<decltype(name)>; + if constexpr (std::is_base_of_v<TKeyword, T>) { + return name.Content; + } if constexpr (std::is_base_of_v<TIndentifier, T>) { return name.Indentifier; } diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index c3581bfc9ea..fe2cde67baa 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -3,6 +3,7 @@ #include <yql/essentials/sql/v1/complete/text/word.h> #include <yql/essentials/sql/v1/complete/name/static/name_service.h> #include <yql/essentials/sql/v1/complete/syntax/local.h> +#include <yql/essentials/sql/v1/complete/syntax/format.h> // FIXME(YQL-19747): unwanted dependency on a lexer implementation #include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h> @@ -40,13 +41,9 @@ namespace NSQLComplete { TStringBuf prefix = input.Text.Head(input.CursorPosition); TCompletedToken completedToken = GetCompletedToken(prefix); - TVector<TCandidate> candidates; - EnrichWithKeywords(candidates, std::move(context.Keywords), completedToken); - EnrichWithNames(candidates, context, completedToken); - return { .CompletedToken = std::move(completedToken), - .Candidates = std::move(candidates), + .Candidates = GetCanidates(std::move(context), completedToken), }; } @@ -58,33 +55,16 @@ namespace NSQLComplete { }; } - void EnrichWithKeywords( - TVector<TCandidate>& candidates, - TVector<TString> keywords, - const TCompletedToken& prefix) { - for (auto keyword : keywords) { - candidates.push_back({ - .Kind = ECandidateKind::Keyword, - .Content = std::move(keyword), - }); - } - FilterByContent(candidates, prefix.Content); - candidates.crop(Configuration.Limit); - } - - void EnrichWithNames( - TVector<TCandidate>& candidates, - const TLocalSyntaxContext& context, - const TCompletedToken& prefix) { - if (candidates.size() == Configuration.Limit) { - return; - } - + TVector<TCandidate> GetCanidates(TLocalSyntaxContext context, const TCompletedToken& prefix) { TNameRequest request = { .Prefix = TString(prefix.Content), - .Limit = Configuration.Limit - candidates.size(), + .Limit = Configuration.Limit, }; + for (const auto& [first, _] : context.Keywords) { + request.Keywords.emplace_back(first); + } + if (context.Pragma) { TPragmaName::TConstraints constraints; constraints.Namespace = context.Pragma->Namespace; @@ -108,19 +88,25 @@ namespace NSQLComplete { } if (request.IsEmpty()) { - return; + return {}; } // User should prepare a robust INameService TNameResponse response = Names->Lookup(std::move(request)).ExtractValueSync(); - EnrichWithNames(candidates, std::move(response.RankedNames)); + return Convert(std::move(response.RankedNames), std::move(context.Keywords)); } - void EnrichWithNames(TVector<TCandidate>& candidates, TVector<TGenericName> names) { + TVector<TCandidate> Convert(TVector<TGenericName> names, TLocalSyntaxContext::TKeywords keywords) { + TVector<TCandidate> candidates; for (auto& name : names) { - candidates.emplace_back(std::visit([](auto&& name) -> TCandidate { + candidates.emplace_back(std::visit([&](auto&& name) -> TCandidate { using T = std::decay_t<decltype(name)>; + if constexpr (std::is_base_of_v<TKeyword, T>) { + TVector<TString>& seq = keywords[name.Content]; + seq.insert(std::begin(seq), name.Content); + return {ECandidateKind::Keyword, FormatKeywords(seq)}; + } if constexpr (std::is_base_of_v<TPragmaName, T>) { return {ECandidateKind::PragmaName, std::move(name.Indentifier)}; } @@ -136,14 +122,7 @@ namespace NSQLComplete { } }, std::move(name))); } - } - - void FilterByContent(TVector<TCandidate>& candidates, TStringBuf prefix) { - const auto lowerPrefix = ToLowerUTF8(prefix); - auto removed = std::ranges::remove_if(candidates, [&](const auto& candidate) { - return !ToLowerUTF8(candidate.Content).StartsWith(lowerPrefix); - }); - candidates.erase(std::begin(removed), std::end(removed)); + return candidates; } TConfiguration Configuration; diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index dd7145169c1..a0681b1888f 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -324,6 +324,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "CURRENT_TIMESTAMP"}, {Keyword, "DICT<"}, {Keyword, "DISTINCT"}, + {FunctionName, "DateTime::Split("}, {Keyword, "EMPTY_ACTION"}, {Keyword, "ENUM"}, {Keyword, "EXISTS("}, @@ -340,12 +341,11 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "SET<"}, {Keyword, "STREAM"}, {Keyword, "STRUCT"}, + {FunctionName, "StartsWith("}, {Keyword, "TAGGED<"}, {Keyword, "TRUE"}, {Keyword, "TUPLE"}, {Keyword, "VARIANT"}, - {FunctionName, "DateTime::Split("}, - {FunctionName, "StartsWith("}, }; auto engine = MakeSqlCompletionEngineUT(); @@ -362,6 +362,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "CURRENT_TIME"}, {Keyword, "CURRENT_TIMESTAMP"}, {Keyword, "DICT<"}, + {FunctionName, "DateTime::Split("}, {Keyword, "EMPTY_ACTION"}, {Keyword, "ENUM"}, {Keyword, "EXISTS("}, @@ -378,12 +379,11 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "SET<"}, {Keyword, "STREAM<"}, {Keyword, "STRUCT"}, + {FunctionName, "StartsWith("}, {Keyword, "TAGGED<"}, {Keyword, "TRUE"}, {Keyword, "TUPLE"}, {Keyword, "VARIANT"}, - {FunctionName, "DateTime::Split("}, - {FunctionName, "StartsWith("}, }; auto engine = MakeSqlCompletionEngineUT(); @@ -415,8 +415,8 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "STRUCT"}, {Keyword, "TAGGED<"}, {Keyword, "TUPLE"}, - {Keyword, "VARIANT<"}, {TypeName, "Uint64"}, + {Keyword, "VARIANT<"}, }; auto engine = MakeSqlCompletionEngineUT(); @@ -505,8 +505,8 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(InsertTableHintName) { TVector<TCandidate> expected = { {Keyword, "COLUMNS"}, - {Keyword, "SCHEMA"}, {HintName, "EXPIRATION"}, + {Keyword, "SCHEMA"}, }; auto engine = MakeSqlCompletionEngineUT(); @@ -614,7 +614,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(OnFailingNameService) { auto service = MakeHolder<TFailingNameService>(); auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(service)); - UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {""})); + UNIT_ASSERT_EXCEPTION(Complete(engine, {""}), TDummyException); UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT OPTIONAL<U"}), TDummyException); UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT CAST (1 AS "}).size(), TDummyException); } @@ -644,6 +644,10 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(Ranking) { TFrequencyData frequency = { + .Keywords = { + {"select", 2}, + {"insert", 4}, + }, .Pragmas = { {"yt.defaultmemorylimit", 16}, {"yt.annotations", 8}, @@ -670,6 +674,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(service)); { TVector<TCandidate> expected = { + {Keyword, "INSERT"}, + {Keyword, "SELECT"}, + }; + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {""}), expected); + } + { + TVector<TCandidate> expected = { {PragmaName, "DefaultMemoryLimit"}, {PragmaName, "Annotations"}, }; @@ -701,10 +712,10 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { } { TVector<TCandidate> expected = { - {Keyword, "COLUMNS"}, - {Keyword, "SCHEMA"}, {HintName, "XLOCK"}, {HintName, "UNORDERED"}, + {Keyword, "COLUMNS"}, + {HintName, "FORCEINFERSCHEMA"}, }; UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {"SELECT * FROM a WITH "}), expected); } diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp new file mode 100644 index 00000000000..1c9f146c923 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/format.cpp @@ -0,0 +1,38 @@ +#include "format.h" + +#include "grammar.h" + +#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h> + +#include <util/generic/hash_set.h> + +namespace NSQLComplete { + + TString FormatKeywords(const TVector<TString>& seq) { + static const THashSet<std::string> Keywords = [] { + const auto& grammar = GetSqlGrammar(); + const auto& vocabulary = grammar.GetVocabulary(); + + THashSet<std::string> keywords; + for (auto& token : grammar.GetKeywordTokens()) { + keywords.emplace(Display(vocabulary, token)); + } + return keywords; + }(); + + if (seq.empty()) { + return ""; + } + + TString text = seq[0]; + for (size_t i = 1; i < seq.size(); ++i) { + const auto& token = seq[i]; + if (Keywords.contains(token)) { + text += " "; + } + text += token; + } + return text; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h new file mode 100644 index 00000000000..6c2f1b72ac2 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/format.h @@ -0,0 +1,10 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/vector.h> + +namespace NSQLComplete { + + TString FormatKeywords(const TVector<TString>& seq); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp index e6a7430ca27..4b6fac094d5 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.cpp +++ b/yql/essentials/sql/v1/complete/syntax/local.cpp @@ -6,6 +6,7 @@ #include <yql/essentials/sql/v1/complete/antlr4/c3i.h> #include <yql/essentials/sql/v1/complete/antlr4/c3t.h> +#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h> #include <yql/essentials/core/issue/yql_issue.h> @@ -115,34 +116,22 @@ namespace NSQLComplete { return true; } - TVector<TString> SiftedKeywords(const TC3Candidates& candidates) { + TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) { const auto& vocabulary = Grammar->GetVocabulary(); const auto& keywordTokens = Grammar->GetKeywordTokens(); - TVector<TString> keywords; + TLocalSyntaxContext::TKeywords keywords; for (const auto& token : candidates.Tokens) { if (keywordTokens.contains(token.Number)) { - keywords.emplace_back(Display(vocabulary, token.Number)); - for (auto following : token.Following) { - if (keywordTokens.contains(following)) { - keywords.back() += " "; - } - keywords.back() += Display(vocabulary, following); + auto& following = keywords[Display(vocabulary, token.Number)]; + for (auto next : token.Following) { + following.emplace_back(Display(vocabulary, next)); } } } return keywords; } - std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) { - auto name = vocabulary.getDisplayName(tokenType); - if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) { - name.erase(static_cast<std::string::size_type>(0), 1); - name.pop_back(); - } - return name; - } - std::optional<TLocalSyntaxContext::TPragma> PragmaMatch( const NSQLTranslation::TParsedTokenList& tokens, const TC3Candidates& candidates) { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) { diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h index 8d51c54df57..2b926e1296e 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.h +++ b/yql/essentials/sql/v1/complete/syntax/local.h @@ -5,11 +5,13 @@ #include <yql/essentials/sql/v1/lexer/lexer.h> #include <util/generic/string.h> -#include <util/generic/vector.h> +#include <util/generic/hash.h> namespace NSQLComplete { struct TLocalSyntaxContext { + using TKeywords = THashMap<TString, TVector<TString>>; + struct TPragma { TString Namespace; }; @@ -22,7 +24,7 @@ namespace NSQLComplete { EStatementKind StatementKind; }; - TVector<TString> Keywords; + TKeywords Keywords; std::optional<TPragma> Pragma; bool IsTypeName; std::optional<TFunction> Function; diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make index e9b3ca15bdd..6631a0d9c1d 100644 --- a/yql/essentials/sql/v1/complete/syntax/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( ansi.cpp + format.cpp grammar.cpp local.cpp parser_call_stack.cpp diff --git a/yql/essentials/sql/v1/complete/text/case.cpp b/yql/essentials/sql/v1/complete/text/case.cpp new file mode 100644 index 00000000000..ea1b39e1613 --- /dev/null +++ b/yql/essentials/sql/v1/complete/text/case.cpp @@ -0,0 +1,11 @@ +#include "case.h" + +#include <util/string/ascii.h> + +namespace NSQLComplete { + + bool NoCaseCompare(const TString& lhs, const TString& rhs) { + return AsciiCompareIgnoreCase(lhs, rhs) < 0; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/text/case.h b/yql/essentials/sql/v1/complete/text/case.h new file mode 100644 index 00000000000..883cb7f5048 --- /dev/null +++ b/yql/essentials/sql/v1/complete/text/case.h @@ -0,0 +1,15 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NSQLComplete { + + bool NoCaseCompare(const TString& lhs, const TString& rhs); + + inline auto NoCaseCompareLimit(size_t size) { + return [size](const TString& lhs, const TString& rhs) -> bool { + return strncasecmp(lhs.data(), rhs.data(), size) < 0; + }; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/text/ya.make b/yql/essentials/sql/v1/complete/text/ya.make index 030e69172ab..3d26b895e52 100644 --- a/yql/essentials/sql/v1/complete/text/ya.make +++ b/yql/essentials/sql/v1/complete/text/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + case.cpp word.cpp ) |