summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1
diff options
context:
space:
mode:
authorvityaman <[email protected]>2025-04-14 13:06:15 +0300
committerrobot-piglet <[email protected]>2025-04-14 13:40:02 +0300
commit14005fcfba8efa6918e54d823bff6780d9922f8f (patch)
treea0be3398ab981aed52ac31b4d168d9f8680405a7 /yql/essentials/sql/v1
parent0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (diff)
YQL-19747 Rank keywords just by plain usages
- [x] Rank keywords just by plain usages - [x] `LocalSyntaxAnalysis` now returns a mapping `:: Keyword -> [Following Keywords]`. - [x] Extracted keyword sequence formatting from `syntax/local` to `syntax/format`. - [x] Extracted token display logic from `syntax/local` to `antlr4/vocabulary` as it is ANTLR dependent. --- Example ```python $ ./yql_complete <<< "select " [Keyword] CAST( [Keyword] NULL [Keyword] NOT [FunctionName] If( [FunctionName] Yson::ConvertToString( [FunctionName] Count( [FunctionName] Sum( [FunctionName] Unwrap( [FunctionName] Coalesce( [Keyword] DISTINCT [Keyword] ALL [Keyword] CASE [FunctionName] Max( [Keyword] FALSE [FunctionName] Some( ``` --- - Related to https://github.com/ydb-platform/ydb/issues/9056 - Related to https://github.com/vityaman/ydb/issues/17 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1197 commit_hash:f42cb4aaffe6de7c9137069c4d9c635ee110a805
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp14
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/vocabulary.h13
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/ya.make4
-rw-r--r--yql/essentials/sql/v1/complete/name/name_service.h9
-rw-r--r--yql/essentials/sql/v1/complete/name/static/frequency.cpp4
-rw-r--r--yql/essentials/sql/v1/complete/name/static/frequency.h1
-rw-r--r--yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp6
-rw-r--r--yql/essentials/sql/v1/complete/name/static/name_service.cpp26
-rw-r--r--yql/essentials/sql/v1/complete/name/static/ranking.cpp19
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.cpp59
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete_ut.cpp29
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.cpp38
-rw-r--r--yql/essentials/sql/v1/complete/syntax/format.h10
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.cpp23
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.h6
-rw-r--r--yql/essentials/sql/v1/complete/syntax/ya.make1
-rw-r--r--yql/essentials/sql/v1/complete/text/case.cpp11
-rw-r--r--yql/essentials/sql/v1/complete/text/case.h15
-rw-r--r--yql/essentials/sql/v1/complete/text/ya.make1
19 files changed, 198 insertions, 91 deletions
diff --git a/yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp b/yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp
new file mode 100644
index 00000000000..0938ae3aca0
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/antlr4/vocabulary.cpp
@@ -0,0 +1,14 @@
+#include "vocabulary.h"
+
+namespace NSQLComplete {
+
+ std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) {
+ auto name = vocabulary.getDisplayName(tokenType);
+ if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) {
+ name.erase(static_cast<std::string::size_type>(0), 1);
+ name.pop_back();
+ }
+ return name;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/antlr4/vocabulary.h b/yql/essentials/sql/v1/complete/antlr4/vocabulary.h
new file mode 100644
index 00000000000..deb67828800
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/antlr4/vocabulary.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "defs.h"
+
+#include <contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h>
+
+#include <string>
+
+namespace NSQLComplete {
+
+ std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/antlr4/ya.make b/yql/essentials/sql/v1/complete/antlr4/ya.make
index 36145606177..b79ae1b60bb 100644
--- a/yql/essentials/sql/v1/complete/antlr4/ya.make
+++ b/yql/essentials/sql/v1/complete/antlr4/ya.make
@@ -1,5 +1,9 @@
LIBRARY()
+SRCS(
+ vocabulary.cpp
+)
+
PEERDIR(
contrib/libs/antlr4_cpp_runtime
contrib/libs/antlr4-c3
diff --git a/yql/essentials/sql/v1/complete/name/name_service.h b/yql/essentials/sql/v1/complete/name/name_service.h
index 34a109d0013..665068e1520 100644
--- a/yql/essentials/sql/v1/complete/name/name_service.h
+++ b/yql/essentials/sql/v1/complete/name/name_service.h
@@ -19,6 +19,10 @@ namespace NSQLComplete {
TString Namespace;
};
+ struct TKeyword {
+ TString Content;
+ };
+
struct TPragmaName: TIndentifier {
struct TConstraints: TNamespaced {};
};
@@ -38,12 +42,14 @@ namespace NSQLComplete {
};
using TGenericName = std::variant<
+ TKeyword,
TPragmaName,
TTypeName,
TFunctionName,
THintName>;
struct TNameRequest {
+ TVector<TString> Keywords;
struct {
std::optional<TPragmaName::TConstraints> Pragma;
std::optional<TTypeName::TConstraints> Type;
@@ -54,7 +60,8 @@ namespace NSQLComplete {
size_t Limit = 128;
bool IsEmpty() const {
- return !Constraints.Pragma &&
+ return Keywords.empty() &&
+ !Constraints.Pragma &&
!Constraints.Type &&
!Constraints.Function &&
!Constraints.Hint;
diff --git a/yql/essentials/sql/v1/complete/name/static/frequency.cpp b/yql/essentials/sql/v1/complete/name/static/frequency.cpp
index b3707533e69..62997ccff7a 100644
--- a/yql/essentials/sql/v1/complete/name/static/frequency.cpp
+++ b/yql/essentials/sql/v1/complete/name/static/frequency.cpp
@@ -17,6 +17,7 @@ namespace NSQLComplete {
const char* Pragma = "PRAGMA";
const char* Type = "TYPE";
const char* Func = "FUNC";
+ const char* Keyword = "KEYWORD";
const char* Module = "MODULE";
const char* ModuleFunc = "MODULE_FUNC";
const char* ReadHint = "READ_HINT";
@@ -59,6 +60,7 @@ namespace NSQLComplete {
if (item.Parent == Json.Parent.Pragma ||
item.Parent == Json.Parent.Type ||
item.Parent == Json.Parent.Func ||
+ item.Parent == Json.Parent.Keyword ||
item.Parent == Json.Parent.ModuleFunc ||
item.Parent == Json.Parent.Module ||
item.Parent == Json.Parent.ReadHint ||
@@ -70,6 +72,8 @@ namespace NSQLComplete {
data.Pragmas[item.Rule] += item.Sum;
} else if (item.Parent == Json.Parent.Type) {
data.Types[item.Rule] += item.Sum;
+ } else if (item.Parent == Json.Parent.Keyword) {
+ data.Keywords[item.Rule] += item.Sum;
} else if (item.Parent == Json.Parent.Module) {
// Ignore, unsupported: Modules
} else if (item.Parent == Json.Parent.Func ||
diff --git a/yql/essentials/sql/v1/complete/name/static/frequency.h b/yql/essentials/sql/v1/complete/name/static/frequency.h
index 024d93cefcb..6925c99fa5e 100644
--- a/yql/essentials/sql/v1/complete/name/static/frequency.h
+++ b/yql/essentials/sql/v1/complete/name/static/frequency.h
@@ -6,6 +6,7 @@
namespace NSQLComplete {
struct TFrequencyData {
+ THashMap<TString, size_t> Keywords;
THashMap<TString, size_t> Pragmas;
THashMap<TString, size_t> Types;
THashMap<TString, size_t> Functions;
diff --git a/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp b/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp
index a5fd8fad00a..8f7eafed2ea 100644
--- a/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp
+++ b/yql/essentials/sql/v1/complete/name/static/frequency_ut.cpp
@@ -10,6 +10,7 @@ Y_UNIT_TEST_SUITE(FrequencyTests) {
TFrequencyData actual = ParseJsonFrequencyData(R"([
{"parent":"FUNC","rule":"ABC","sum":1},
{"parent":"TYPE","rule":"BIGINT","sum":7101},
+ {"parent":"KEYWORD","rule":"UNION","sum":65064443},
{"parent":"MODULE_FUNC","rule":"Compress::BZip2","sum":2},
{"parent":"MODULE","rule":"re2","sum":3094},
{"parent":"READ_HINT","rule":"COLUMNS","sum":826110},
@@ -18,6 +19,9 @@ Y_UNIT_TEST_SUITE(FrequencyTests) {
])");
TFrequencyData expected = {
+ .Keywords = {
+ {"union", 65064443},
+ },
.Types = {
{"bigint", 7101},
},
@@ -31,8 +35,10 @@ Y_UNIT_TEST_SUITE(FrequencyTests) {
},
};
+ UNIT_ASSERT_VALUES_EQUAL(actual.Keywords, expected.Keywords);
UNIT_ASSERT_VALUES_EQUAL(actual.Types, expected.Types);
UNIT_ASSERT_VALUES_EQUAL(actual.Functions, expected.Functions);
+ UNIT_ASSERT_VALUES_EQUAL(actual.Hints, expected.Hints);
}
Y_UNIT_TEST(FrequencyDataResouce) {
diff --git a/yql/essentials/sql/v1/complete/name/static/name_service.cpp b/yql/essentials/sql/v1/complete/name/static/name_service.cpp
index 37f5a06785b..3fd33102d61 100644
--- a/yql/essentials/sql/v1/complete/name/static/name_service.cpp
+++ b/yql/essentials/sql/v1/complete/name/static/name_service.cpp
@@ -2,22 +2,9 @@
#include "ranking.h"
-namespace NSQLComplete {
-
- bool NoCaseCompare(const TString& lhs, const TString& rhs) {
- return std::lexicographical_compare(
- std::begin(lhs), std::end(lhs),
- std::begin(rhs), std::end(rhs),
- [](const char lhs, const char rhs) {
- return ToLower(lhs) < ToLower(rhs);
- });
- }
+#include <yql/essentials/sql/v1/complete/text/case.h>
- auto NoCaseCompareLimit(size_t size) {
- return [size](const TString& lhs, const TString& rhs) -> bool {
- return strncasecmp(lhs.data(), rhs.data(), size) < 0;
- };
- }
+namespace NSQLComplete {
const TVector<TStringBuf> FilteredByPrefix(
const TString& prefix,
@@ -28,8 +15,8 @@ namespace NSQLComplete {
return TVector<TStringBuf>(first, last);
}
- template <class T>
- void AppendAs(TVector<TGenericName>& target, const TVector<TStringBuf>& source) {
+ template <class T, class S = TStringBuf>
+ void AppendAs(TVector<TGenericName>& target, const TVector<S>& source) {
for (const auto& element : source) {
target.emplace_back(T{TString(element)});
}
@@ -82,6 +69,11 @@ namespace NSQLComplete {
TFuture<TNameResponse> Lookup(TNameRequest request) override {
TNameResponse response;
+ Sort(request.Keywords, NoCaseCompare);
+ AppendAs<TKeyword>(
+ response.RankedNames,
+ FilteredByPrefix(request.Prefix, request.Keywords));
+
if (request.Constraints.Pragma) {
auto prefix = Prefixed(request.Prefix, ".", *request.Constraints.Pragma);
auto names = FilteredByPrefix(prefix, NameSet_.Pragmas);
diff --git a/yql/essentials/sql/v1/complete/name/static/ranking.cpp b/yql/essentials/sql/v1/complete/name/static/ranking.cpp
index 79ebbc98003..ee1cbef08f5 100644
--- a/yql/essentials/sql/v1/complete/name/static/ranking.cpp
+++ b/yql/essentials/sql/v1/complete/name/static/ranking.cpp
@@ -57,28 +57,34 @@ namespace NSQLComplete {
return std::visit([this](const auto& name) -> size_t {
using T = std::decay_t<decltype(name)>;
- auto identifier = ToLowerUTF8(ContentView(name));
+ auto content = ToLowerUTF8(ContentView(name));
+
+ if constexpr (std::is_same_v<T, TKeyword>) {
+ if (auto weight = Frequency_.Keywords.FindPtr(content)) {
+ return *weight;
+ }
+ }
if constexpr (std::is_same_v<T, TPragmaName>) {
- if (auto weight = Frequency_.Pragmas.FindPtr(identifier)) {
+ if (auto weight = Frequency_.Pragmas.FindPtr(content)) {
return *weight;
}
}
if constexpr (std::is_same_v<T, TFunctionName>) {
- if (auto weight = Frequency_.Functions.FindPtr(identifier)) {
+ if (auto weight = Frequency_.Functions.FindPtr(content)) {
return *weight;
}
}
if constexpr (std::is_same_v<T, TTypeName>) {
- if (auto weight = Frequency_.Types.FindPtr(identifier)) {
+ if (auto weight = Frequency_.Types.FindPtr(content)) {
return *weight;
}
}
if constexpr (std::is_same_v<T, THintName>) {
- if (auto weight = Frequency_.Hints.FindPtr(identifier)) {
+ if (auto weight = Frequency_.Hints.FindPtr(content)) {
return *weight;
}
}
@@ -94,6 +100,9 @@ namespace NSQLComplete {
const TStringBuf ContentView(const TGenericName& name Y_LIFETIME_BOUND) const {
return std::visit([](const auto& name) -> TStringBuf {
using T = std::decay_t<decltype(name)>;
+ if constexpr (std::is_base_of_v<TKeyword, T>) {
+ return name.Content;
+ }
if constexpr (std::is_base_of_v<TIndentifier, T>) {
return name.Indentifier;
}
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp
index c3581bfc9ea..fe2cde67baa 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete.cpp
@@ -3,6 +3,7 @@
#include <yql/essentials/sql/v1/complete/text/word.h>
#include <yql/essentials/sql/v1/complete/name/static/name_service.h>
#include <yql/essentials/sql/v1/complete/syntax/local.h>
+#include <yql/essentials/sql/v1/complete/syntax/format.h>
// FIXME(YQL-19747): unwanted dependency on a lexer implementation
#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
@@ -40,13 +41,9 @@ namespace NSQLComplete {
TStringBuf prefix = input.Text.Head(input.CursorPosition);
TCompletedToken completedToken = GetCompletedToken(prefix);
- TVector<TCandidate> candidates;
- EnrichWithKeywords(candidates, std::move(context.Keywords), completedToken);
- EnrichWithNames(candidates, context, completedToken);
-
return {
.CompletedToken = std::move(completedToken),
- .Candidates = std::move(candidates),
+ .Candidates = GetCanidates(std::move(context), completedToken),
};
}
@@ -58,33 +55,16 @@ namespace NSQLComplete {
};
}
- void EnrichWithKeywords(
- TVector<TCandidate>& candidates,
- TVector<TString> keywords,
- const TCompletedToken& prefix) {
- for (auto keyword : keywords) {
- candidates.push_back({
- .Kind = ECandidateKind::Keyword,
- .Content = std::move(keyword),
- });
- }
- FilterByContent(candidates, prefix.Content);
- candidates.crop(Configuration.Limit);
- }
-
- void EnrichWithNames(
- TVector<TCandidate>& candidates,
- const TLocalSyntaxContext& context,
- const TCompletedToken& prefix) {
- if (candidates.size() == Configuration.Limit) {
- return;
- }
-
+ TVector<TCandidate> GetCanidates(TLocalSyntaxContext context, const TCompletedToken& prefix) {
TNameRequest request = {
.Prefix = TString(prefix.Content),
- .Limit = Configuration.Limit - candidates.size(),
+ .Limit = Configuration.Limit,
};
+ for (const auto& [first, _] : context.Keywords) {
+ request.Keywords.emplace_back(first);
+ }
+
if (context.Pragma) {
TPragmaName::TConstraints constraints;
constraints.Namespace = context.Pragma->Namespace;
@@ -108,19 +88,25 @@ namespace NSQLComplete {
}
if (request.IsEmpty()) {
- return;
+ return {};
}
// User should prepare a robust INameService
TNameResponse response = Names->Lookup(std::move(request)).ExtractValueSync();
- EnrichWithNames(candidates, std::move(response.RankedNames));
+ return Convert(std::move(response.RankedNames), std::move(context.Keywords));
}
- void EnrichWithNames(TVector<TCandidate>& candidates, TVector<TGenericName> names) {
+ TVector<TCandidate> Convert(TVector<TGenericName> names, TLocalSyntaxContext::TKeywords keywords) {
+ TVector<TCandidate> candidates;
for (auto& name : names) {
- candidates.emplace_back(std::visit([](auto&& name) -> TCandidate {
+ candidates.emplace_back(std::visit([&](auto&& name) -> TCandidate {
using T = std::decay_t<decltype(name)>;
+ if constexpr (std::is_base_of_v<TKeyword, T>) {
+ TVector<TString>& seq = keywords[name.Content];
+ seq.insert(std::begin(seq), name.Content);
+ return {ECandidateKind::Keyword, FormatKeywords(seq)};
+ }
if constexpr (std::is_base_of_v<TPragmaName, T>) {
return {ECandidateKind::PragmaName, std::move(name.Indentifier)};
}
@@ -136,14 +122,7 @@ namespace NSQLComplete {
}
}, std::move(name)));
}
- }
-
- void FilterByContent(TVector<TCandidate>& candidates, TStringBuf prefix) {
- const auto lowerPrefix = ToLowerUTF8(prefix);
- auto removed = std::ranges::remove_if(candidates, [&](const auto& candidate) {
- return !ToLowerUTF8(candidate.Content).StartsWith(lowerPrefix);
- });
- candidates.erase(std::begin(removed), std::end(removed));
+ return candidates;
}
TConfiguration Configuration;
diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
index dd7145169c1..a0681b1888f 100644
--- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
@@ -324,6 +324,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "CURRENT_TIMESTAMP"},
{Keyword, "DICT<"},
{Keyword, "DISTINCT"},
+ {FunctionName, "DateTime::Split("},
{Keyword, "EMPTY_ACTION"},
{Keyword, "ENUM"},
{Keyword, "EXISTS("},
@@ -340,12 +341,11 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "SET<"},
{Keyword, "STREAM"},
{Keyword, "STRUCT"},
+ {FunctionName, "StartsWith("},
{Keyword, "TAGGED<"},
{Keyword, "TRUE"},
{Keyword, "TUPLE"},
{Keyword, "VARIANT"},
- {FunctionName, "DateTime::Split("},
- {FunctionName, "StartsWith("},
};
auto engine = MakeSqlCompletionEngineUT();
@@ -362,6 +362,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "CURRENT_TIME"},
{Keyword, "CURRENT_TIMESTAMP"},
{Keyword, "DICT<"},
+ {FunctionName, "DateTime::Split("},
{Keyword, "EMPTY_ACTION"},
{Keyword, "ENUM"},
{Keyword, "EXISTS("},
@@ -378,12 +379,11 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "SET<"},
{Keyword, "STREAM<"},
{Keyword, "STRUCT"},
+ {FunctionName, "StartsWith("},
{Keyword, "TAGGED<"},
{Keyword, "TRUE"},
{Keyword, "TUPLE"},
{Keyword, "VARIANT"},
- {FunctionName, "DateTime::Split("},
- {FunctionName, "StartsWith("},
};
auto engine = MakeSqlCompletionEngineUT();
@@ -415,8 +415,8 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "STRUCT"},
{Keyword, "TAGGED<"},
{Keyword, "TUPLE"},
- {Keyword, "VARIANT<"},
{TypeName, "Uint64"},
+ {Keyword, "VARIANT<"},
};
auto engine = MakeSqlCompletionEngineUT();
@@ -505,8 +505,8 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(InsertTableHintName) {
TVector<TCandidate> expected = {
{Keyword, "COLUMNS"},
- {Keyword, "SCHEMA"},
{HintName, "EXPIRATION"},
+ {Keyword, "SCHEMA"},
};
auto engine = MakeSqlCompletionEngineUT();
@@ -614,7 +614,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(OnFailingNameService) {
auto service = MakeHolder<TFailingNameService>();
auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(service));
- UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {""}));
+ UNIT_ASSERT_EXCEPTION(Complete(engine, {""}), TDummyException);
UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT OPTIONAL<U"}), TDummyException);
UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT CAST (1 AS "}).size(), TDummyException);
}
@@ -644,6 +644,10 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(Ranking) {
TFrequencyData frequency = {
+ .Keywords = {
+ {"select", 2},
+ {"insert", 4},
+ },
.Pragmas = {
{"yt.defaultmemorylimit", 16},
{"yt.annotations", 8},
@@ -670,6 +674,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(service));
{
TVector<TCandidate> expected = {
+ {Keyword, "INSERT"},
+ {Keyword, "SELECT"},
+ };
+ UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {""}), expected);
+ }
+ {
+ TVector<TCandidate> expected = {
{PragmaName, "DefaultMemoryLimit"},
{PragmaName, "Annotations"},
};
@@ -701,10 +712,10 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
}
{
TVector<TCandidate> expected = {
- {Keyword, "COLUMNS"},
- {Keyword, "SCHEMA"},
{HintName, "XLOCK"},
{HintName, "UNORDERED"},
+ {Keyword, "COLUMNS"},
+ {HintName, "FORCEINFERSCHEMA"},
};
UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {"SELECT * FROM a WITH "}), expected);
}
diff --git a/yql/essentials/sql/v1/complete/syntax/format.cpp b/yql/essentials/sql/v1/complete/syntax/format.cpp
new file mode 100644
index 00000000000..1c9f146c923
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/format.cpp
@@ -0,0 +1,38 @@
+#include "format.h"
+
+#include "grammar.h"
+
+#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h>
+
+#include <util/generic/hash_set.h>
+
+namespace NSQLComplete {
+
+ TString FormatKeywords(const TVector<TString>& seq) {
+ static const THashSet<std::string> Keywords = [] {
+ const auto& grammar = GetSqlGrammar();
+ const auto& vocabulary = grammar.GetVocabulary();
+
+ THashSet<std::string> keywords;
+ for (auto& token : grammar.GetKeywordTokens()) {
+ keywords.emplace(Display(vocabulary, token));
+ }
+ return keywords;
+ }();
+
+ if (seq.empty()) {
+ return "";
+ }
+
+ TString text = seq[0];
+ for (size_t i = 1; i < seq.size(); ++i) {
+ const auto& token = seq[i];
+ if (Keywords.contains(token)) {
+ text += " ";
+ }
+ text += token;
+ }
+ return text;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/format.h b/yql/essentials/sql/v1/complete/syntax/format.h
new file mode 100644
index 00000000000..6c2f1b72ac2
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/syntax/format.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+namespace NSQLComplete {
+
+ TString FormatKeywords(const TVector<TString>& seq);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp
index e6a7430ca27..4b6fac094d5 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/local.cpp
@@ -6,6 +6,7 @@
#include <yql/essentials/sql/v1/complete/antlr4/c3i.h>
#include <yql/essentials/sql/v1/complete/antlr4/c3t.h>
+#include <yql/essentials/sql/v1/complete/antlr4/vocabulary.h>
#include <yql/essentials/core/issue/yql_issue.h>
@@ -115,34 +116,22 @@ namespace NSQLComplete {
return true;
}
- TVector<TString> SiftedKeywords(const TC3Candidates& candidates) {
+ TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) {
const auto& vocabulary = Grammar->GetVocabulary();
const auto& keywordTokens = Grammar->GetKeywordTokens();
- TVector<TString> keywords;
+ TLocalSyntaxContext::TKeywords keywords;
for (const auto& token : candidates.Tokens) {
if (keywordTokens.contains(token.Number)) {
- keywords.emplace_back(Display(vocabulary, token.Number));
- for (auto following : token.Following) {
- if (keywordTokens.contains(following)) {
- keywords.back() += " ";
- }
- keywords.back() += Display(vocabulary, following);
+ auto& following = keywords[Display(vocabulary, token.Number)];
+ for (auto next : token.Following) {
+ following.emplace_back(Display(vocabulary, next));
}
}
}
return keywords;
}
- std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) {
- auto name = vocabulary.getDisplayName(tokenType);
- if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) {
- name.erase(static_cast<std::string::size_type>(0), 1);
- name.pop_back();
- }
- return name;
- }
-
std::optional<TLocalSyntaxContext::TPragma> PragmaMatch(
const NSQLTranslation::TParsedTokenList& tokens, const TC3Candidates& candidates) {
if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) {
diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h
index 8d51c54df57..2b926e1296e 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.h
+++ b/yql/essentials/sql/v1/complete/syntax/local.h
@@ -5,11 +5,13 @@
#include <yql/essentials/sql/v1/lexer/lexer.h>
#include <util/generic/string.h>
-#include <util/generic/vector.h>
+#include <util/generic/hash.h>
namespace NSQLComplete {
struct TLocalSyntaxContext {
+ using TKeywords = THashMap<TString, TVector<TString>>;
+
struct TPragma {
TString Namespace;
};
@@ -22,7 +24,7 @@ namespace NSQLComplete {
EStatementKind StatementKind;
};
- TVector<TString> Keywords;
+ TKeywords Keywords;
std::optional<TPragma> Pragma;
bool IsTypeName;
std::optional<TFunction> Function;
diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make
index e9b3ca15bdd..6631a0d9c1d 100644
--- a/yql/essentials/sql/v1/complete/syntax/ya.make
+++ b/yql/essentials/sql/v1/complete/syntax/ya.make
@@ -2,6 +2,7 @@ LIBRARY()
SRCS(
ansi.cpp
+ format.cpp
grammar.cpp
local.cpp
parser_call_stack.cpp
diff --git a/yql/essentials/sql/v1/complete/text/case.cpp b/yql/essentials/sql/v1/complete/text/case.cpp
new file mode 100644
index 00000000000..ea1b39e1613
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/text/case.cpp
@@ -0,0 +1,11 @@
+#include "case.h"
+
+#include <util/string/ascii.h>
+
+namespace NSQLComplete {
+
+ bool NoCaseCompare(const TString& lhs, const TString& rhs) {
+ return AsciiCompareIgnoreCase(lhs, rhs) < 0;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/text/case.h b/yql/essentials/sql/v1/complete/text/case.h
new file mode 100644
index 00000000000..883cb7f5048
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/text/case.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <util/generic/string.h>
+
+namespace NSQLComplete {
+
+ bool NoCaseCompare(const TString& lhs, const TString& rhs);
+
+ inline auto NoCaseCompareLimit(size_t size) {
+ return [size](const TString& lhs, const TString& rhs) -> bool {
+ return strncasecmp(lhs.data(), rhs.data(), size) < 0;
+ };
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/text/ya.make b/yql/essentials/sql/v1/complete/text/ya.make
index 030e69172ab..3d26b895e52 100644
--- a/yql/essentials/sql/v1/complete/text/ya.make
+++ b/yql/essentials/sql/v1/complete/text/ya.make
@@ -1,6 +1,7 @@
LIBRARY()
SRCS(
+ case.cpp
word.cpp
)