aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/complete/syntax
diff options
context:
space:
mode:
authorvityaman <vityaman.dev@yandex.ru>2025-04-14 13:06:15 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-04-14 13:40:02 +0300
commit14005fcfba8efa6918e54d823bff6780d9922f8f (patch)
treea0be3398ab981aed52ac31b4d168d9f8680405a7 /yql/essentials/sql/v1/complete/syntax
parent0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (diff)
downloadydb-14005fcfba8efa6918e54d823bff6780d9922f8f.tar.gz
YQL-19747 Rank keywords just by plain usages
- [x] Rank keywords just by plain usages - [x] `LocalSyntaxAnalysis` now returns a mapping `:: Keyword -> [Following Keywords]`. - [x] Extracted keyword sequence formatting from `syntax/local` to `syntax/format`. - [x] Extracted token display logic from `syntax/local` to `antlr4/vocabulary` as it is ANTLR dependent. --- Example ```python $ ./yql_complete <<< "select " [Keyword] CAST( [Keyword] NULL [Keyword] NOT [FunctionName] If( [FunctionName] Yson::ConvertToString( [FunctionName] Count( [FunctionName] Sum( [FunctionName] Unwrap( [FunctionName] Coalesce( [Keyword] DISTINCT [Keyword] ALL [Keyword] CASE [FunctionName] Max( [Keyword] FALSE [FunctionName] Some( ``` --- - Related to https://github.com/ydb-platform/ydb/issues/9056 - Related to https://github.com/vityaman/ydb/issues/17 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1197 commit_hash:f42cb4aaffe6de7c9137069c4d9c635ee110a805
Diffstat (limited to 'yql/essentials/sql/v1/complete/syntax')
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.cpp38
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.h10
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.cpp23
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.h6
-rw-r--r--yql/essentials/sql/v1/complete/syntax/ya.make1
5 files changed, 59 insertions, 19 deletions
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp
new file mode 100644
index 00000000000..1c9f146c923
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/format.cpp
@@ -0,0 +1,38 @@
+#include "format.h"
+
+#include "grammar.h"
+
+#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h>
+
+#include <util/generic/hash_set.h>
+
+namespace NSQLComplete {
+
+ TString FormatKeywords(const TVector<TString>& seq) {
+ static const THashSet<std::string> Keywords = [] {
+ const auto& grammar = GetSqlGrammar();
+ const auto& vocabulary = grammar.GetVocabulary();
+
+ THashSet<std::string> keywords;
+ for (auto& token : grammar.GetKeywordTokens()) {
+ keywords.emplace(Display(vocabulary, token));
+ }
+ return keywords;
+ }();
+
+ if (seq.empty()) {
+ return "";
+ }
+
+ TString text = seq[0];
+ for (size_t i = 1; i < seq.size(); ++i) {
+ const auto& token = seq[i];
+ if (Keywords.contains(token)) {
+ text += " ";
+ }
+ text += token;
+ }
+ return text;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h
new file mode 100644
index 00000000000..6c2f1b72ac2
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/format.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+namespace NSQLComplete {
+
+ TString FormatKeywords(const TVector<TString>& seq);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp
index e6a7430ca27..4b6fac094d5 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/local.cpp
@@ -6,6 +6,7 @@
#include <yql/essentials/sql/v1/complete/antlr4/c3i.h>
#include <yql/essentials/sql/v1/complete/antlr4/c3t.h>
+#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h>
#include <yql/essentials/core/issue/yql_issue.h>
@@ -115,34 +116,22 @@ namespace NSQLComplete {
return true;
}
- TVector<TString> SiftedKeywords(const TC3Candidates& candidates) {
+ TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) {
const auto& vocabulary = Grammar->GetVocabulary();
const auto& keywordTokens = Grammar->GetKeywordTokens();
- TVector<TString> keywords;
+ TLocalSyntaxContext::TKeywords keywords;
for (const auto& token : candidates.Tokens) {
if (keywordTokens.contains(token.Number)) {
- keywords.emplace_back(Display(vocabulary, token.Number));
- for (auto following : token.Following) {
- if (keywordTokens.contains(following)) {
- keywords.back() += " ";
- }
- keywords.back() += Display(vocabulary, following);
+ auto& following = keywords[Display(vocabulary, token.Number)];
+ for (auto next : token.Following) {
+ following.emplace_back(Display(vocabulary, next));
}
}
}
return keywords;
}
- std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) {
- auto name = vocabulary.getDisplayName(tokenType);
- if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) {
- name.erase(static_cast<std::string::size_type>(0), 1);
- name.pop_back();
- }
- return name;
- }
-
std::optional<TLocalSyntaxContext::TPragma> PragmaMatch(
const NSQLTranslation::TParsedTokenList& tokens, const TC3Candidates& candidates) {
if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) {
diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h
index 8d51c54df57..2b926e1296e 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.h
+++ b/yql/essentials/sql/v1/complete/syntax/local.h
@@ -5,11 +5,13 @@
#include <yql/essentials/sql/v1/lexer/lexer.h>
#include <util/generic/string.h>
-#include <util/generic/vector.h>
+#include <util/generic/hash.h>
namespace NSQLComplete {
struct TLocalSyntaxContext {
+ using TKeywords = THashMap<TString, TVector<TString>>;
+
struct TPragma {
TString Namespace;
};
@@ -22,7 +24,7 @@ namespace NSQLComplete {
EStatementKind StatementKind;
};
- TVector<TString> Keywords;
+ TKeywords Keywords;
std::optional<TPragma> Pragma;
bool IsTypeName;
std::optional<TFunction> Function;
diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make
index e9b3ca15bdd..6631a0d9c1d 100644
--- a/yql/essentials/sql/v1/complete/syntax/ya.make
+++ b/yql/essentials/sql/v1/complete/syntax/ya.make
@@ -2,6 +2,7 @@ LIBRARY()
SRCS(
ansi.cpp
+ format.cpp
grammar.cpp
local.cpp
parser_call_stack.cpp