diff options
author | vityaman <vityaman.dev@yandex.ru> | 2025-04-14 13:06:15 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-04-14 13:40:02 +0300 |
commit | 14005fcfba8efa6918e54d823bff6780d9922f8f (patch) | |
tree | a0be3398ab981aed52ac31b4d168d9f8680405a7 /yql/essentials/sql/v1/complete/syntax | |
parent | 0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (diff) | |
download | ydb-14005fcfba8efa6918e54d823bff6780d9922f8f.tar.gz |
YQL-19747 Rank keywords just by plain usages
- [x] Rank keywords just by plain usages
- [x] `LocalSyntaxAnalysis` now returns a mapping `:: Keyword -> [Following Keywords]`.
- [x] Extracted keyword sequence formatting from `syntax/local` to `syntax/format`.
- [x] Extracted token display logic from `syntax/local` to `antlr4/vocabulary` as it is ANTLR dependent.
---
Example
```python
$ ./yql_complete <<< "select "
[Keyword] CAST(
[Keyword] NULL
[Keyword] NOT
[FunctionName] If(
[FunctionName] Yson::ConvertToString(
[FunctionName] Count(
[FunctionName] Sum(
[FunctionName] Unwrap(
[FunctionName] Coalesce(
[Keyword] DISTINCT
[Keyword] ALL
[Keyword] CASE
[FunctionName] Max(
[Keyword] FALSE
[FunctionName] Some(
```
---
- Related to https://github.com/ydb-platform/ydb/issues/9056
- Related to https://github.com/vityaman/ydb/issues/17
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1197
commit_hash:f42cb4aaffe6de7c9137069c4d9c635ee110a805
Diffstat (limited to 'yql/essentials/sql/v1/complete/syntax')
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/format.cpp | 38 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/format.h | 10 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/local.cpp | 23 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/local.h | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/ya.make | 1 |
5 files changed, 59 insertions, 19 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp new file mode 100644 index 00000000000..1c9f146c923 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/format.cpp @@ -0,0 +1,38 @@ +#include "format.h" + +#include "grammar.h" + +#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h> + +#include <util/generic/hash_set.h> + +namespace NSQLComplete { + + TString FormatKeywords(const TVector<TString>& seq) { + static const THashSet<std::string> Keywords = [] { + const auto& grammar = GetSqlGrammar(); + const auto& vocabulary = grammar.GetVocabulary(); + + THashSet<std::string> keywords; + for (auto& token : grammar.GetKeywordTokens()) { + keywords.emplace(Display(vocabulary, token)); + } + return keywords; + }(); + + if (seq.empty()) { + return ""; + } + + TString text = seq[0]; + for (size_t i = 1; i < seq.size(); ++i) { + const auto& token = seq[i]; + if (Keywords.contains(token)) { + text += " "; + } + text += token; + } + return text; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h new file mode 100644 index 00000000000..6c2f1b72ac2 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/format.h @@ -0,0 +1,10 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/vector.h> + +namespace NSQLComplete { + + TString FormatKeywords(const TVector<TString>& seq); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp index e6a7430ca27..4b6fac094d5 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.cpp +++ b/yql/essentials/sql/v1/complete/syntax/local.cpp @@ -6,6 +6,7 @@ #include <yql/essentials/sql/v1/complete/antlr4/c3i.h> #include <yql/essentials/sql/v1/complete/antlr4/c3t.h> +#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h> #include <yql/essentials/core/issue/yql_issue.h> @@ -115,34 +116,22 @@ namespace NSQLComplete { return true; } - TVector<TString> SiftedKeywords(const TC3Candidates& candidates) { + TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) { const auto& vocabulary = Grammar->GetVocabulary(); const auto& keywordTokens = Grammar->GetKeywordTokens(); - TVector<TString> keywords; + TLocalSyntaxContext::TKeywords keywords; for (const auto& token : candidates.Tokens) { if (keywordTokens.contains(token.Number)) { - keywords.emplace_back(Display(vocabulary, token.Number)); - for (auto following : token.Following) { - if (keywordTokens.contains(following)) { - keywords.back() += " "; - } - keywords.back() += Display(vocabulary, following); + auto& following = keywords[Display(vocabulary, token.Number)]; + for (auto next : token.Following) { + following.emplace_back(Display(vocabulary, next)); } } } return keywords; } - std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) { - auto name = vocabulary.getDisplayName(tokenType); - if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) { - name.erase(static_cast<std::string::size_type>(0), 1); - name.pop_back(); - } - return name; - } - std::optional<TLocalSyntaxContext::TPragma> PragmaMatch( const NSQLTranslation::TParsedTokenList& tokens, const TC3Candidates& candidates) { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) { diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h index 8d51c54df57..2b926e1296e 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.h +++ b/yql/essentials/sql/v1/complete/syntax/local.h @@ -5,11 +5,13 @@ #include <yql/essentials/sql/v1/lexer/lexer.h> #include <util/generic/string.h> -#include <util/generic/vector.h> +#include <util/generic/hash.h> namespace NSQLComplete { struct TLocalSyntaxContext { + using TKeywords = THashMap<TString, TVector<TString>>; + struct TPragma { TString Namespace; }; @@ -22,7 +24,7 @@ namespace NSQLComplete { EStatementKind StatementKind; }; - TVector<TString> Keywords; + TKeywords Keywords; std::optional<TPragma> Pragma; bool IsTypeName; std::optional<TFunction> Function; diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make index e9b3ca15bdd..6631a0d9c1d 100644 --- a/yql/essentials/sql/v1/complete/syntax/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( ansi.cpp + format.cpp grammar.cpp local.cpp parser_call_stack.cpp |