aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/complete/syntax/format.cpp
diff options
context:
space:
mode:
authorvityaman <vityaman.dev@yandex.ru>2025-04-14 13:06:15 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-04-14 13:40:02 +0300
commit14005fcfba8efa6918e54d823bff6780d9922f8f (patch)
treea0be3398ab981aed52ac31b4d168d9f8680405a7 /yql/essentials/sql/v1/complete/syntax/format.cpp
parent0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (diff)
downloadydb-14005fcfba8efa6918e54d823bff6780d9922f8f.tar.gz
YQL-19747 Rank keywords just by plain usages
- [x] Rank keywords just by plain usages - [x] `LocalSyntaxAnalysis` now returns a mapping `:: Keyword -> [Following Keywords]`. - [x] Extracted keyword sequence formatting from `syntax/local` to `syntax/format`. - [x] Extracted token display logic from `syntax/local` to `antlr4/vocabulary` as it is ANTLR dependent. --- Example ```python $ ./yql_complete <<< "select " [Keyword] CAST( [Keyword] NULL [Keyword] NOT [FunctionName] If( [FunctionName] Yson::ConvertToString( [FunctionName] Count( [FunctionName] Sum( [FunctionName] Unwrap( [FunctionName] Coalesce( [Keyword] DISTINCT [Keyword] ALL [Keyword] CASE [FunctionName] Max( [Keyword] FALSE [FunctionName] Some( ``` --- - Related to https://github.com/ydb-platform/ydb/issues/9056 - Related to https://github.com/vityaman/ydb/issues/17 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1197 commit_hash:f42cb4aaffe6de7c9137069c4d9c635ee110a805
Diffstat (limited to 'yql/essentials/sql/v1/complete/syntax/format.cpp')
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.cpp38
1 files changed, 38 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp
new file mode 100644
index 00000000000..1c9f146c923
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/format.cpp
@@ -0,0 +1,38 @@
+#include "format.h"
+
+#include "grammar.h"
+
+#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h>
+
+#include <util/generic/hash_set.h>
+
+namespace NSQLComplete {
+
+ TString FormatKeywords(const TVector<TString>& seq) {
+ static const THashSet<std::string> Keywords = [] {
+ const auto& grammar = GetSqlGrammar();
+ const auto& vocabulary = grammar.GetVocabulary();
+
+ THashSet<std::string> keywords;
+ for (auto& token : grammar.GetKeywordTokens()) {
+ keywords.emplace(Display(vocabulary, token));
+ }
+ return keywords;
+ }();
+
+ if (seq.empty()) {
+ return "";
+ }
+
+ TString text = seq[0];
+ for (size_t i = 1; i < seq.size(); ++i) {
+ const auto& token = seq[i];
+ if (Keywords.contains(token)) {
+ text += " ";
+ }
+ text += token;
+ }
+ return text;
+ }
+
+} // namespace NSQLComplete