summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1
diff options
context:
space:
mode:
authorvityaman <[email protected]>2025-04-02 21:25:29 +0300
committerrobot-piglet <[email protected]>2025-04-02 21:36:46 +0300
commitbf8b24c06de4df2df3bf8e40e82caba8f5528301 (patch)
treec7dac59cb55d4e7d9127dfe7a00ccd5ebbac9d73 /yql/essentials/sql/v1
parent66b85e2f81db12f8a7086e50bf40b19303b4622d (diff)
YQL-19747 Complete token sequences
Token sequences plan - [x] [Easy] Support `GROUP BY`, `ORDER BY`. - [x] [Easy] Support `Optional<`, `List<`, `Dict<`. - [x] [Easy] Support `Avg(`, `Sum(`. --- Co-authored-by: Victor Smirnov [[email protected]] Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1173 commit_hash:a443dec666c486fef7f891be04d68a786be83049
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/c3i.h2
-rw-r--r--yql/essentials/sql/v1/complete/antlr4/c3t.h6
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.cpp1
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete_ut.cpp142
-rw-r--r--yql/essentials/sql/v1/complete/syntax/grammar.cpp30
-rw-r--r--yql/essentials/sql/v1/complete/syntax/grammar.h1
-rw-r--r--yql/essentials/sql/v1/complete/syntax/local.cpp20
7 files changed, 120 insertions, 82 deletions
diff --git a/yql/essentials/sql/v1/complete/antlr4/c3i.h b/yql/essentials/sql/v1/complete/antlr4/c3i.h
index ca91649a547..26c71868051 100644
--- a/yql/essentials/sql/v1/complete/antlr4/c3i.h
+++ b/yql/essentials/sql/v1/complete/antlr4/c3i.h
@@ -10,8 +10,10 @@
namespace NSQLComplete {
+ // std::vector is used to prevent copying a C3 output
struct TSuggestedToken {
TTokenId Number;
+ std::vector<TTokenId> Following;
};
struct TMatchedRule {
diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h
index 9042937678a..750da64229c 100644
--- a/yql/essentials/sql/v1/complete/antlr4/c3t.h
+++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h
@@ -13,8 +13,6 @@
#include <util/generic/string.h>
#include <util/generic/vector.h>
-#include <unordered_set>
-
namespace NSQLComplete {
template <class Lexer, class Parser>
@@ -67,8 +65,8 @@ namespace NSQLComplete {
static TC3Candidates Converted(c3::CandidatesCollection candidates) {
TC3Candidates converted;
- for (const auto& [token, _] : candidates.tokens) {
- converted.Tokens.emplace_back(token);
+ for (auto& [token, following] : candidates.tokens) {
+ converted.Tokens.emplace_back(token, std::move(following));
}
for (auto& [rule, data] : candidates.rules) {
converted.Rules.emplace_back(rule, std::move(data.ruleList));
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp
index b73aafe0a4f..74ddbc04154 100644
--- a/yql/essentials/sql/v1/complete/sql_complete.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete.cpp
@@ -102,6 +102,7 @@ namespace NSQLComplete {
return {ECandidateKind::TypeName, std::move(name.Indentifier)};
}
if constexpr (std::is_base_of_v<TFunctionName, T>) {
+ name.Indentifier += "(";
return {ECandidateKind::FunctionName, std::move(name.Indentifier)};
}
}, std::move(name)));
diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
index 1714ed47471..ade78e81a76 100644
--- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
+++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
@@ -77,7 +77,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "CREATE"},
{Keyword, "DECLARE"},
{Keyword, "DEFINE"},
- {Keyword, "DELETE"},
+ {Keyword, "DELETE FROM"},
{Keyword, "DISCARD"},
{Keyword, "DO"},
{Keyword, "DROP"},
@@ -99,7 +99,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "REVOKE"},
{Keyword, "ROLLBACK"},
{Keyword, "SELECT"},
- {Keyword, "SHOW"},
+ {Keyword, "SHOW CREATE"},
{Keyword, "UPDATE"},
{Keyword, "UPSERT"},
{Keyword, "USE"},
@@ -117,13 +117,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(Alter) {
TVector<TCandidate> expected = {
- {Keyword, "ASYNC"},
- {Keyword, "BACKUP"},
+ {Keyword, "ASYNC REPLICATION"},
+ {Keyword, "BACKUP COLLECTION"},
{Keyword, "DATABASE"},
{Keyword, "EXTERNAL"},
{Keyword, "GROUP"},
{Keyword, "OBJECT"},
- {Keyword, "RESOURCE"},
+ {Keyword, "RESOURCE POOL"},
{Keyword, "SEQUENCE"},
{Keyword, "TABLE"},
{Keyword, "TABLESTORE"},
@@ -138,17 +138,17 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(Create) {
TVector<TCandidate> expected = {
- {Keyword, "ASYNC"},
- {Keyword, "BACKUP"},
+ {Keyword, "ASYNC REPLICATION"},
+ {Keyword, "BACKUP COLLECTION"},
{Keyword, "EXTERNAL"},
{Keyword, "GROUP"},
{Keyword, "OBJECT"},
- {Keyword, "OR"},
- {Keyword, "RESOURCE"},
+ {Keyword, "OR REPLACE"},
+ {Keyword, "RESOURCE POOL"},
{Keyword, "TABLE"},
{Keyword, "TABLESTORE"},
- {Keyword, "TEMP"},
- {Keyword, "TEMPORARY"},
+ {Keyword, "TEMP TABLE"},
+ {Keyword, "TEMPORARY TABLE"},
{Keyword, "TOPIC"},
{Keyword, "TRANSFER"},
{Keyword, "USER"},
@@ -170,12 +170,12 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(Drop) {
TVector<TCandidate> expected = {
- {Keyword, "ASYNC"},
- {Keyword, "BACKUP"},
+ {Keyword, "ASYNC REPLICATION"},
+ {Keyword, "BACKUP COLLECTION"},
{Keyword, "EXTERNAL"},
{Keyword, "GROUP"},
{Keyword, "OBJECT"},
- {Keyword, "RESOURCE"},
+ {Keyword, "RESOURCE POOL"},
{Keyword, "TABLE"},
{Keyword, "TABLESTORE"},
{Keyword, "TOPIC"},
@@ -198,7 +198,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "CREATE"},
{Keyword, "DECLARE"},
{Keyword, "DEFINE"},
- {Keyword, "DELETE"},
+ {Keyword, "DELETE FROM"},
{Keyword, "DISCARD"},
{Keyword, "DO"},
{Keyword, "DROP"},
@@ -213,14 +213,14 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
{Keyword, "PARALLEL"},
{Keyword, "PRAGMA"},
{Keyword, "PROCESS"},
- {Keyword, "QUERY"},
+ {Keyword, "QUERY PLAN"},
{Keyword, "REDUCE"},
{Keyword, "REPLACE"},
{Keyword, "RESTORE"},
{Keyword, "REVOKE"},
{Keyword, "ROLLBACK"},
{Keyword, "SELECT"},
- {Keyword, "SHOW"},
+ {Keyword, "SHOW CREATE"},
{Keyword, "UPDATE"},
{Keyword, "UPSERT"},
{Keyword, "USE"},
@@ -234,21 +234,21 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(Grant) {
TVector<TCandidate> expected = {
{Keyword, "ALL"},
- {Keyword, "ALTER"},
+ {Keyword, "ALTER SCHEMA"},
{Keyword, "CONNECT"},
{Keyword, "CREATE"},
- {Keyword, "DESCRIBE"},
+ {Keyword, "DESCRIBE SCHEMA"},
{Keyword, "DROP"},
- {Keyword, "ERASE"},
+ {Keyword, "ERASE ROW"},
{Keyword, "FULL"},
{Keyword, "GRANT"},
{Keyword, "INSERT"},
{Keyword, "LIST"},
{Keyword, "MANAGE"},
{Keyword, "MODIFY"},
- {Keyword, "REMOVE"},
+ {Keyword, "REMOVE SCHEMA"},
{Keyword, "SELECT"},
- {Keyword, "UPDATE"},
+ {Keyword, "UPDATE ROW"},
{Keyword, "USE"},
};
@@ -278,36 +278,36 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(Select) {
TVector<TCandidate> expected = {
{Keyword, "ALL"},
- {Keyword, "BITCAST"},
+ {Keyword, "BITCAST("},
{Keyword, "CALLABLE"},
{Keyword, "CASE"},
- {Keyword, "CAST"},
+ {Keyword, "CAST("},
{Keyword, "CURRENT_DATE"},
{Keyword, "CURRENT_TIME"},
{Keyword, "CURRENT_TIMESTAMP"},
- {Keyword, "DICT"},
+ {Keyword, "DICT<"},
{Keyword, "DISTINCT"},
{Keyword, "EMPTY_ACTION"},
{Keyword, "ENUM"},
- {Keyword, "EXISTS"},
+ {Keyword, "EXISTS("},
{Keyword, "FALSE"},
- {Keyword, "FLOW"},
- {Keyword, "JSON_EXISTS"},
- {Keyword, "JSON_QUERY"},
- {Keyword, "JSON_VALUE"},
- {Keyword, "LIST"},
+ {Keyword, "FLOW<"},
+ {Keyword, "JSON_EXISTS("},
+ {Keyword, "JSON_QUERY("},
+ {Keyword, "JSON_VALUE("},
+ {Keyword, "LIST<"},
{Keyword, "NOT"},
{Keyword, "NULL"},
- {Keyword, "OPTIONAL"},
- {Keyword, "RESOURCE"},
- {Keyword, "SET"},
+ {Keyword, "OPTIONAL<"},
+ {Keyword, "RESOURCE<"},
+ {Keyword, "SET<"},
{Keyword, "STREAM"},
{Keyword, "STRUCT"},
- {Keyword, "TAGGED"},
+ {Keyword, "TAGGED<"},
{Keyword, "TRUE"},
{Keyword, "TUPLE"},
{Keyword, "VARIANT"},
- {FunctionName, "StartsWith"},
+ {FunctionName, "StartsWith("},
};
auto engine = MakeSqlCompletionEngineUT();
@@ -316,35 +316,35 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(SelectWhere) {
TVector<TCandidate> expected = {
- {Keyword, "BITCAST"},
+ {Keyword, "BITCAST("},
{Keyword, "CALLABLE"},
{Keyword, "CASE"},
- {Keyword, "CAST"},
+ {Keyword, "CAST("},
{Keyword, "CURRENT_DATE"},
{Keyword, "CURRENT_TIME"},
{Keyword, "CURRENT_TIMESTAMP"},
- {Keyword, "DICT"},
+ {Keyword, "DICT<"},
{Keyword, "EMPTY_ACTION"},
{Keyword, "ENUM"},
- {Keyword, "EXISTS"},
+ {Keyword, "EXISTS("},
{Keyword, "FALSE"},
- {Keyword, "FLOW"},
- {Keyword, "JSON_EXISTS"},
- {Keyword, "JSON_QUERY"},
- {Keyword, "JSON_VALUE"},
- {Keyword, "LIST"},
+ {Keyword, "FLOW<"},
+ {Keyword, "JSON_EXISTS("},
+ {Keyword, "JSON_QUERY("},
+ {Keyword, "JSON_VALUE("},
+ {Keyword, "LIST<"},
{Keyword, "NOT"},
{Keyword, "NULL"},
- {Keyword, "OPTIONAL"},
- {Keyword, "RESOURCE"},
- {Keyword, "SET"},
- {Keyword, "STREAM"},
+ {Keyword, "OPTIONAL<"},
+ {Keyword, "RESOURCE<"},
+ {Keyword, "SET<"},
+ {Keyword, "STREAM<"},
{Keyword, "STRUCT"},
- {Keyword, "TAGGED"},
+ {Keyword, "TAGGED<"},
{Keyword, "TRUE"},
{Keyword, "TUPLE"},
{Keyword, "VARIANT"},
- {FunctionName, "StartsWith"},
+ {FunctionName, "StartsWith("},
};
auto engine = MakeSqlCompletionEngineUT();
@@ -363,20 +363,20 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
Y_UNIT_TEST(TypeName) {
TVector<TCandidate> expected = {
- {Keyword, "CALLABLE"},
- {Keyword, "DECIMAL"},
- {Keyword, "DICT"},
- {Keyword, "ENUM"},
- {Keyword, "FLOW"},
- {Keyword, "LIST"},
- {Keyword, "OPTIONAL"},
- {Keyword, "RESOURCE"},
- {Keyword, "SET"},
- {Keyword, "STREAM"},
+ {Keyword, "CALLABLE<("},
+ {Keyword, "DECIMAL("},
+ {Keyword, "DICT<"},
+ {Keyword, "ENUM<"},
+ {Keyword, "FLOW<"},
+ {Keyword, "LIST<"},
+ {Keyword, "OPTIONAL<"},
+ {Keyword, "RESOURCE<"},
+ {Keyword, "SET<"},
+ {Keyword, "STREAM<"},
{Keyword, "STRUCT"},
- {Keyword, "TAGGED"},
+ {Keyword, "TAGGED<"},
{Keyword, "TUPLE"},
- {Keyword, "VARIANT"},
+ {Keyword, "VARIANT<"},
{TypeName, "Uint64"},
};
@@ -515,14 +515,14 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
}
{
TVector<TCandidate> expectedPrefix = {
- {FunctionName, "Min"},
- {FunctionName, "Max"},
- {FunctionName, "MaxOf"},
- {FunctionName, "MaxBy"},
- {FunctionName, "MinBy"},
- {FunctionName, "Math::Abs"},
- {FunctionName, "Math::Acos"},
- {FunctionName, "Math::Asin"},
+ {FunctionName, "Min("},
+ {FunctionName, "Max("},
+ {FunctionName, "MaxOf("},
+ {FunctionName, "MaxBy("},
+ {FunctionName, "MinBy("},
+ {FunctionName, "Math::Abs("},
+ {FunctionName, "Math::Acos("},
+ {FunctionName, "Math::Asin("},
};
auto actualPrefix = Complete(engine, {"SELECT m"});
diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
index c8f5a2e4a8f..b4f64630f77 100644
--- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp
@@ -6,10 +6,11 @@ namespace NSQLComplete {
class TSqlGrammar: public ISqlGrammar {
public:
- TSqlGrammar()
+ TSqlGrammar(const NSQLReflect::TLexerGrammar& grammar)
: Vocabulary(GetVocabularyP())
, AllTokens(ComputeAllTokens())
- , KeywordTokens(ComputeKeywordTokens())
+ , KeywordTokens(ComputeKeywordTokens(grammar))
+ , PunctuationTokens(ComputePunctuationTokens(grammar))
{
}
@@ -25,6 +26,10 @@ namespace NSQLComplete {
return KeywordTokens;
}
+ const std::unordered_set<TTokenId>& GetPunctuationTokens() const override {
+ return PunctuationTokens;
+ }
+
private:
static const antlr4::dfa::Vocabulary* GetVocabularyP() {
return &NALADefaultAntlr4::SQLv1Antlr4Parser(nullptr).getVocabulary();
@@ -42,26 +47,39 @@ namespace NSQLComplete {
return allTokens;
}
- std::unordered_set<TTokenId> ComputeKeywordTokens() {
+ std::unordered_set<TTokenId> ComputeKeywordTokens(
+ const NSQLReflect::TLexerGrammar& grammar) {
const auto& vocabulary = GetVocabulary();
- const auto keywords = NSQLReflect::LoadLexerGrammar().KeywordNames;
auto keywordTokens = GetAllTokens();
std::erase_if(keywordTokens, [&](TTokenId token) {
- return !keywords.contains(vocabulary.getSymbolicName(token));
+ return !grammar.KeywordNames.contains(vocabulary.getSymbolicName(token));
});
keywordTokens.erase(TOKEN_EOF);
return keywordTokens;
}
+ std::unordered_set<TTokenId> ComputePunctuationTokens(
+ const NSQLReflect::TLexerGrammar& grammar) {
+ const auto& vocabulary = GetVocabulary();
+
+ auto punctuationTokens = GetAllTokens();
+ std::erase_if(punctuationTokens, [&](TTokenId token) {
+ return !grammar.PunctuationNames.contains(vocabulary.getSymbolicName(token));
+ });
+
+ return punctuationTokens;
+ }
+
const antlr4::dfa::Vocabulary* Vocabulary;
const std::unordered_set<TTokenId> AllTokens;
const std::unordered_set<TTokenId> KeywordTokens;
+ const std::unordered_set<TTokenId> PunctuationTokens;
};
const ISqlGrammar& GetSqlGrammar() {
- const static TSqlGrammar DefaultSqlGrammar{};
+ const static TSqlGrammar DefaultSqlGrammar(NSQLReflect::LoadLexerGrammar());
return DefaultSqlGrammar;
}
diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.h b/yql/essentials/sql/v1/complete/syntax/grammar.h
index b6449698ea5..a349bd4a3de 100644
--- a/yql/essentials/sql/v1/complete/syntax/grammar.h
+++ b/yql/essentials/sql/v1/complete/syntax/grammar.h
@@ -21,6 +21,7 @@ namespace NSQLComplete {
virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0;
virtual const std::unordered_set<TTokenId>& GetAllTokens() const = 0;
virtual const std::unordered_set<TTokenId>& GetKeywordTokens() const = 0;
+ virtual const std::unordered_set<TTokenId>& GetPunctuationTokens() const = 0;
virtual ~ISqlGrammar() = default;
};
diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp
index 430718a56f1..cac43e5a320 100644
--- a/yql/essentials/sql/v1/complete/syntax/local.cpp
+++ b/yql/essentials/sql/v1/complete/syntax/local.cpp
@@ -73,6 +73,9 @@ namespace NSQLComplete {
for (auto keywordToken : Grammar->GetKeywordTokens()) {
ignoredTokens.erase(keywordToken);
}
+ for (auto punctuationToken : Grammar->GetPunctuationTokens()) {
+ ignoredTokens.erase(punctuationToken);
+ }
return ignoredTokens;
}
@@ -107,12 +110,27 @@ namespace NSQLComplete {
TVector<TString> keywords;
for (const auto& token : candidates.Tokens) {
if (keywordTokens.contains(token.Number)) {
- keywords.emplace_back(vocabulary.getDisplayName(token.Number));
+ keywords.emplace_back(Display(vocabulary, token.Number));
+ for (auto following : token.Following) {
+ if (keywordTokens.contains(following)) {
+ keywords.back() += " ";
+ }
+ keywords.back() += Display(vocabulary, following);
+ }
}
}
return keywords;
}
+ std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) {
+ auto name = vocabulary.getDisplayName(tokenType);
+ if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) {
+ name.erase(static_cast<std::string::size_type>(0), 1);
+ name.pop_back();
+ }
+ return name;
+ }
+
bool IsTypeNameMatched(const TC3Candidates& candidates) {
return AnyOf(candidates.Rules, [&](const TMatchedRule& rule) {
return IsLikelyTypeStack(rule.ParserCallStack);