diff options
author | vityaman <vityaman.dev@yandex.ru> | 2025-04-14 13:06:15 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-04-14 13:40:02 +0300 |
commit | 14005fcfba8efa6918e54d823bff6780d9922f8f (patch) | |
tree | a0be3398ab981aed52ac31b4d168d9f8680405a7 /yql/essentials/sql/v1/complete/syntax/format.cpp | |
parent | 0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (diff) | |
download | ydb-14005fcfba8efa6918e54d823bff6780d9922f8f.tar.gz |
YQL-19747 Rank keywords just by plain usages
- [x] Rank keywords just by plain usages
- [x] `LocalSyntaxAnalysis` now returns a mapping `:: Keyword -> [Following Keywords]`.
- [x] Extracted keyword sequence formatting from `syntax/local` to `syntax/format`.
- [x] Extracted token display logic from `syntax/local` to `antlr4/vocabulary` as it is ANTLR dependent.
---
Example
```python
$ ./yql_complete <<< "select "
[Keyword] CAST(
[Keyword] NULL
[Keyword] NOT
[FunctionName] If(
[FunctionName] Yson::ConvertToString(
[FunctionName] Count(
[FunctionName] Sum(
[FunctionName] Unwrap(
[FunctionName] Coalesce(
[Keyword] DISTINCT
[Keyword] ALL
[Keyword] CASE
[FunctionName] Max(
[Keyword] FALSE
[FunctionName] Some(
```
---
- Related to https://github.com/ydb-platform/ydb/issues/9056
- Related to https://github.com/vityaman/ydb/issues/17
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1197
commit_hash:f42cb4aaffe6de7c9137069c4d9c635ee110a805
Diffstat (limited to 'yql/essentials/sql/v1/complete/syntax/format.cpp')
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/format.cpp | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp new file mode 100644 index 00000000000..1c9f146c923 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/format.cpp @@ -0,0 +1,38 @@ +#include "format.h" + +#include "grammar.h" + +#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h> + +#include <util/generic/hash_set.h> + +namespace NSQLComplete { + + TString FormatKeywords(const TVector<TString>& seq) { + static const THashSet<std::string> Keywords = [] { + const auto& grammar = GetSqlGrammar(); + const auto& vocabulary = grammar.GetVocabulary(); + + THashSet<std::string> keywords; + for (auto& token : grammar.GetKeywordTokens()) { + keywords.emplace(Display(vocabulary, token)); + } + return keywords; + }(); + + if (seq.empty()) { + return ""; + } + + TString text = seq[0]; + for (size_t i = 1; i < seq.size(); ++i) { + const auto& token = seq[i]; + if (Keywords.contains(token)) { + text += " "; + } + text += token; + } + return text; + } + +} // namespace NSQLComplete |