diff options
author | vityaman <[email protected]> | 2025-04-02 21:25:29 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-04-02 21:36:46 +0300 |
commit | bf8b24c06de4df2df3bf8e40e82caba8f5528301 (patch) | |
tree | c7dac59cb55d4e7d9127dfe7a00ccd5ebbac9d73 /yql/essentials/sql/v1 | |
parent | 66b85e2f81db12f8a7086e50bf40b19303b4622d (diff) |
YQL-19747 Complete token sequences
Token sequences plan
- [x] [Easy] Support `GROUP BY`, `ORDER BY`.
- [x] [Easy] Support `Optional<`, `List<`, `Dict<`.
- [x] [Easy] Support `Avg(`, `Sum(`.
---
Co-authored-by: Victor Smirnov [[email protected]]
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1173
commit_hash:a443dec666c486fef7f891be04d68a786be83049
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3i.h | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3t.h | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.cpp | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete_ut.cpp | 142 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/grammar.cpp | 30 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/grammar.h | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/local.cpp | 20 |
7 files changed, 120 insertions, 82 deletions
diff --git a/yql/essentials/sql/v1/complete/antlr4/c3i.h b/yql/essentials/sql/v1/complete/antlr4/c3i.h index ca91649a547..26c71868051 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3i.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3i.h @@ -10,8 +10,10 @@ namespace NSQLComplete { + // std::vector is used to prevent copying a C3 output struct TSuggestedToken { TTokenId Number; + std::vector<TTokenId> Following; }; struct TMatchedRule { diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h index 9042937678a..750da64229c 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3t.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h @@ -13,8 +13,6 @@ #include <util/generic/string.h> #include <util/generic/vector.h> -#include <unordered_set> - namespace NSQLComplete { template <class Lexer, class Parser> @@ -67,8 +65,8 @@ namespace NSQLComplete { static TC3Candidates Converted(c3::CandidatesCollection candidates) { TC3Candidates converted; - for (const auto& [token, _] : candidates.tokens) { - converted.Tokens.emplace_back(token); + for (auto& [token, following] : candidates.tokens) { + converted.Tokens.emplace_back(token, std::move(following)); } for (auto& [rule, data] : candidates.rules) { converted.Rules.emplace_back(rule, std::move(data.ruleList)); diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp index b73aafe0a4f..74ddbc04154 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -102,6 +102,7 @@ namespace NSQLComplete { return {ECandidateKind::TypeName, std::move(name.Indentifier)}; } if constexpr (std::is_base_of_v<TFunctionName, T>) { + name.Indentifier += "("; return {ECandidateKind::FunctionName, std::move(name.Indentifier)}; } }, std::move(name))); diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index 1714ed47471..ade78e81a76 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -77,7 +77,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "CREATE"}, {Keyword, "DECLARE"}, {Keyword, "DEFINE"}, - {Keyword, "DELETE"}, + {Keyword, "DELETE FROM"}, {Keyword, "DISCARD"}, {Keyword, "DO"}, {Keyword, "DROP"}, @@ -99,7 +99,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "REVOKE"}, {Keyword, "ROLLBACK"}, {Keyword, "SELECT"}, - {Keyword, "SHOW"}, + {Keyword, "SHOW CREATE"}, {Keyword, "UPDATE"}, {Keyword, "UPSERT"}, {Keyword, "USE"}, @@ -117,13 +117,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(Alter) { TVector<TCandidate> expected = { - {Keyword, "ASYNC"}, - {Keyword, "BACKUP"}, + {Keyword, "ASYNC REPLICATION"}, + {Keyword, "BACKUP COLLECTION"}, {Keyword, "DATABASE"}, {Keyword, "EXTERNAL"}, {Keyword, "GROUP"}, {Keyword, "OBJECT"}, - {Keyword, "RESOURCE"}, + {Keyword, "RESOURCE POOL"}, {Keyword, "SEQUENCE"}, {Keyword, "TABLE"}, {Keyword, "TABLESTORE"}, @@ -138,17 +138,17 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(Create) { TVector<TCandidate> expected = { - {Keyword, "ASYNC"}, - {Keyword, "BACKUP"}, + {Keyword, "ASYNC REPLICATION"}, + {Keyword, "BACKUP COLLECTION"}, {Keyword, "EXTERNAL"}, {Keyword, "GROUP"}, {Keyword, "OBJECT"}, - {Keyword, "OR"}, - {Keyword, "RESOURCE"}, + {Keyword, "OR REPLACE"}, + {Keyword, "RESOURCE POOL"}, {Keyword, "TABLE"}, {Keyword, "TABLESTORE"}, - {Keyword, "TEMP"}, - {Keyword, "TEMPORARY"}, + {Keyword, "TEMP TABLE"}, + {Keyword, "TEMPORARY TABLE"}, {Keyword, "TOPIC"}, {Keyword, "TRANSFER"}, {Keyword, "USER"}, @@ -170,12 +170,12 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(Drop) { TVector<TCandidate> expected = { - {Keyword, "ASYNC"}, - {Keyword, "BACKUP"}, + {Keyword, "ASYNC REPLICATION"}, + {Keyword, "BACKUP COLLECTION"}, {Keyword, "EXTERNAL"}, {Keyword, "GROUP"}, {Keyword, "OBJECT"}, - {Keyword, "RESOURCE"}, + {Keyword, "RESOURCE POOL"}, {Keyword, "TABLE"}, {Keyword, "TABLESTORE"}, {Keyword, "TOPIC"}, @@ -198,7 +198,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "CREATE"}, {Keyword, "DECLARE"}, {Keyword, "DEFINE"}, - {Keyword, "DELETE"}, + {Keyword, "DELETE FROM"}, {Keyword, "DISCARD"}, {Keyword, "DO"}, {Keyword, "DROP"}, @@ -213,14 +213,14 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "PARALLEL"}, {Keyword, "PRAGMA"}, {Keyword, "PROCESS"}, - {Keyword, "QUERY"}, + {Keyword, "QUERY PLAN"}, {Keyword, "REDUCE"}, {Keyword, "REPLACE"}, {Keyword, "RESTORE"}, {Keyword, "REVOKE"}, {Keyword, "ROLLBACK"}, {Keyword, "SELECT"}, - {Keyword, "SHOW"}, + {Keyword, "SHOW CREATE"}, {Keyword, "UPDATE"}, {Keyword, "UPSERT"}, {Keyword, "USE"}, @@ -234,21 +234,21 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(Grant) { TVector<TCandidate> expected = { {Keyword, "ALL"}, - {Keyword, "ALTER"}, + {Keyword, "ALTER SCHEMA"}, {Keyword, "CONNECT"}, {Keyword, "CREATE"}, - {Keyword, "DESCRIBE"}, + {Keyword, "DESCRIBE SCHEMA"}, {Keyword, "DROP"}, - {Keyword, "ERASE"}, + {Keyword, "ERASE ROW"}, {Keyword, "FULL"}, {Keyword, "GRANT"}, {Keyword, "INSERT"}, {Keyword, "LIST"}, {Keyword, "MANAGE"}, {Keyword, "MODIFY"}, - {Keyword, "REMOVE"}, + {Keyword, "REMOVE SCHEMA"}, {Keyword, "SELECT"}, - {Keyword, "UPDATE"}, + {Keyword, "UPDATE ROW"}, {Keyword, "USE"}, }; @@ -278,36 +278,36 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(Select) { TVector<TCandidate> expected = { {Keyword, "ALL"}, - {Keyword, "BITCAST"}, + {Keyword, "BITCAST("}, {Keyword, "CALLABLE"}, {Keyword, "CASE"}, - {Keyword, "CAST"}, + {Keyword, "CAST("}, {Keyword, "CURRENT_DATE"}, {Keyword, "CURRENT_TIME"}, {Keyword, "CURRENT_TIMESTAMP"}, - {Keyword, "DICT"}, + {Keyword, "DICT<"}, {Keyword, "DISTINCT"}, {Keyword, "EMPTY_ACTION"}, {Keyword, "ENUM"}, - {Keyword, "EXISTS"}, + {Keyword, "EXISTS("}, {Keyword, "FALSE"}, - {Keyword, "FLOW"}, - {Keyword, "JSON_EXISTS"}, - {Keyword, "JSON_QUERY"}, - {Keyword, "JSON_VALUE"}, - {Keyword, "LIST"}, + {Keyword, "FLOW<"}, + {Keyword, "JSON_EXISTS("}, + {Keyword, "JSON_QUERY("}, + {Keyword, "JSON_VALUE("}, + {Keyword, "LIST<"}, {Keyword, "NOT"}, {Keyword, "NULL"}, - {Keyword, "OPTIONAL"}, - {Keyword, "RESOURCE"}, - {Keyword, "SET"}, + {Keyword, "OPTIONAL<"}, + {Keyword, "RESOURCE<"}, + {Keyword, "SET<"}, {Keyword, "STREAM"}, {Keyword, "STRUCT"}, - {Keyword, "TAGGED"}, + {Keyword, "TAGGED<"}, {Keyword, "TRUE"}, {Keyword, "TUPLE"}, {Keyword, "VARIANT"}, - {FunctionName, "StartsWith"}, + {FunctionName, "StartsWith("}, }; auto engine = MakeSqlCompletionEngineUT(); @@ -316,35 +316,35 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(SelectWhere) { TVector<TCandidate> expected = { - {Keyword, "BITCAST"}, + {Keyword, "BITCAST("}, {Keyword, "CALLABLE"}, {Keyword, "CASE"}, - {Keyword, "CAST"}, + {Keyword, "CAST("}, {Keyword, "CURRENT_DATE"}, {Keyword, "CURRENT_TIME"}, {Keyword, "CURRENT_TIMESTAMP"}, - {Keyword, "DICT"}, + {Keyword, "DICT<"}, {Keyword, "EMPTY_ACTION"}, {Keyword, "ENUM"}, - {Keyword, "EXISTS"}, + {Keyword, "EXISTS("}, {Keyword, "FALSE"}, - {Keyword, "FLOW"}, - {Keyword, "JSON_EXISTS"}, - {Keyword, "JSON_QUERY"}, - {Keyword, "JSON_VALUE"}, - {Keyword, "LIST"}, + {Keyword, "FLOW<"}, + {Keyword, "JSON_EXISTS("}, + {Keyword, "JSON_QUERY("}, + {Keyword, "JSON_VALUE("}, + {Keyword, "LIST<"}, {Keyword, "NOT"}, {Keyword, "NULL"}, - {Keyword, "OPTIONAL"}, - {Keyword, "RESOURCE"}, - {Keyword, "SET"}, - {Keyword, "STREAM"}, + {Keyword, "OPTIONAL<"}, + {Keyword, "RESOURCE<"}, + {Keyword, "SET<"}, + {Keyword, "STREAM<"}, {Keyword, "STRUCT"}, - {Keyword, "TAGGED"}, + {Keyword, "TAGGED<"}, {Keyword, "TRUE"}, {Keyword, "TUPLE"}, {Keyword, "VARIANT"}, - {FunctionName, "StartsWith"}, + {FunctionName, "StartsWith("}, }; auto engine = MakeSqlCompletionEngineUT(); @@ -363,20 +363,20 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { Y_UNIT_TEST(TypeName) { TVector<TCandidate> expected = { - {Keyword, "CALLABLE"}, - {Keyword, "DECIMAL"}, - {Keyword, "DICT"}, - {Keyword, "ENUM"}, - {Keyword, "FLOW"}, - {Keyword, "LIST"}, - {Keyword, "OPTIONAL"}, - {Keyword, "RESOURCE"}, - {Keyword, "SET"}, - {Keyword, "STREAM"}, + {Keyword, "CALLABLE<("}, + {Keyword, "DECIMAL("}, + {Keyword, "DICT<"}, + {Keyword, "ENUM<"}, + {Keyword, "FLOW<"}, + {Keyword, "LIST<"}, + {Keyword, "OPTIONAL<"}, + {Keyword, "RESOURCE<"}, + {Keyword, "SET<"}, + {Keyword, "STREAM<"}, {Keyword, "STRUCT"}, - {Keyword, "TAGGED"}, + {Keyword, "TAGGED<"}, {Keyword, "TUPLE"}, - {Keyword, "VARIANT"}, + {Keyword, "VARIANT<"}, {TypeName, "Uint64"}, }; @@ -515,14 +515,14 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { } { TVector<TCandidate> expectedPrefix = { - {FunctionName, "Min"}, - {FunctionName, "Max"}, - {FunctionName, "MaxOf"}, - {FunctionName, "MaxBy"}, - {FunctionName, "MinBy"}, - {FunctionName, "Math::Abs"}, - {FunctionName, "Math::Acos"}, - {FunctionName, "Math::Asin"}, + {FunctionName, "Min("}, + {FunctionName, "Max("}, + {FunctionName, "MaxOf("}, + {FunctionName, "MaxBy("}, + {FunctionName, "MinBy("}, + {FunctionName, "Math::Abs("}, + {FunctionName, "Math::Acos("}, + {FunctionName, "Math::Asin("}, }; auto actualPrefix = Complete(engine, {"SELECT m"}); diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.cpp b/yql/essentials/sql/v1/complete/syntax/grammar.cpp index c8f5a2e4a8f..b4f64630f77 100644 --- a/yql/essentials/sql/v1/complete/syntax/grammar.cpp +++ b/yql/essentials/sql/v1/complete/syntax/grammar.cpp @@ -6,10 +6,11 @@ namespace NSQLComplete { class TSqlGrammar: public ISqlGrammar { public: - TSqlGrammar() + TSqlGrammar(const NSQLReflect::TLexerGrammar& grammar) : Vocabulary(GetVocabularyP()) , AllTokens(ComputeAllTokens()) - , KeywordTokens(ComputeKeywordTokens()) + , KeywordTokens(ComputeKeywordTokens(grammar)) + , PunctuationTokens(ComputePunctuationTokens(grammar)) { } @@ -25,6 +26,10 @@ namespace NSQLComplete { return KeywordTokens; } + const std::unordered_set<TTokenId>& GetPunctuationTokens() const override { + return PunctuationTokens; + } + private: static const antlr4::dfa::Vocabulary* GetVocabularyP() { return &NALADefaultAntlr4::SQLv1Antlr4Parser(nullptr).getVocabulary(); @@ -42,26 +47,39 @@ namespace NSQLComplete { return allTokens; } - std::unordered_set<TTokenId> ComputeKeywordTokens() { + std::unordered_set<TTokenId> ComputeKeywordTokens( + const NSQLReflect::TLexerGrammar& grammar) { const auto& vocabulary = GetVocabulary(); - const auto keywords = NSQLReflect::LoadLexerGrammar().KeywordNames; auto keywordTokens = GetAllTokens(); std::erase_if(keywordTokens, [&](TTokenId token) { - return !keywords.contains(vocabulary.getSymbolicName(token)); + return !grammar.KeywordNames.contains(vocabulary.getSymbolicName(token)); }); keywordTokens.erase(TOKEN_EOF); return keywordTokens; } + std::unordered_set<TTokenId> ComputePunctuationTokens( + const NSQLReflect::TLexerGrammar& grammar) { + const auto& vocabulary = GetVocabulary(); + + auto punctuationTokens = GetAllTokens(); + std::erase_if(punctuationTokens, [&](TTokenId token) { + return !grammar.PunctuationNames.contains(vocabulary.getSymbolicName(token)); + }); + + return punctuationTokens; + } + const antlr4::dfa::Vocabulary* Vocabulary; const std::unordered_set<TTokenId> AllTokens; const std::unordered_set<TTokenId> KeywordTokens; + const std::unordered_set<TTokenId> PunctuationTokens; }; const ISqlGrammar& GetSqlGrammar() { - const static TSqlGrammar DefaultSqlGrammar{}; + const static TSqlGrammar DefaultSqlGrammar(NSQLReflect::LoadLexerGrammar()); return DefaultSqlGrammar; } diff --git a/yql/essentials/sql/v1/complete/syntax/grammar.h b/yql/essentials/sql/v1/complete/syntax/grammar.h index b6449698ea5..a349bd4a3de 100644 --- a/yql/essentials/sql/v1/complete/syntax/grammar.h +++ b/yql/essentials/sql/v1/complete/syntax/grammar.h @@ -21,6 +21,7 @@ namespace NSQLComplete { virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0; virtual const std::unordered_set<TTokenId>& GetAllTokens() const = 0; virtual const std::unordered_set<TTokenId>& GetKeywordTokens() const = 0; + virtual const std::unordered_set<TTokenId>& GetPunctuationTokens() const = 0; virtual ~ISqlGrammar() = default; }; diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp index 430718a56f1..cac43e5a320 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.cpp +++ b/yql/essentials/sql/v1/complete/syntax/local.cpp @@ -73,6 +73,9 @@ namespace NSQLComplete { for (auto keywordToken : Grammar->GetKeywordTokens()) { ignoredTokens.erase(keywordToken); } + for (auto punctuationToken : Grammar->GetPunctuationTokens()) { + ignoredTokens.erase(punctuationToken); + } return ignoredTokens; } @@ -107,12 +110,27 @@ namespace NSQLComplete { TVector<TString> keywords; for (const auto& token : candidates.Tokens) { if (keywordTokens.contains(token.Number)) { - keywords.emplace_back(vocabulary.getDisplayName(token.Number)); + keywords.emplace_back(Display(vocabulary, token.Number)); + for (auto following : token.Following) { + if (keywordTokens.contains(following)) { + keywords.back() += " "; + } + keywords.back() += Display(vocabulary, following); + } } } return keywords; } + std::string Display(const antlr4::dfa::Vocabulary& vocabulary, TTokenId tokenType) { + auto name = vocabulary.getDisplayName(tokenType); + if (2 <= name.length() && name.starts_with('\'') && name.ends_with('\'')) { + name.erase(static_cast<std::string::size_type>(0), 1); + name.pop_back(); + } + return name; + } + bool IsTypeNameMatched(const TC3Candidates& candidates) { return AnyOf(candidates.Rules, [&](const TMatchedRule& rule) { return IsLikelyTypeStack(rule.ParserCallStack); |