diff options
author | tesseract <[email protected]> | 2025-01-21 12:50:29 +0300 |
---|---|---|
committer | tesseract <[email protected]> | 2025-01-21 14:32:19 +0300 |
commit | e677409ecb6106695a976307290b2f6bad3d72c0 (patch) | |
tree | 7c4fe8c7334a8f814506c857a08322ea800a8b79 /yql/essentials/sql/v1/lexer/lexer.cpp | |
parent | e2324a4c7934ecbc80eb47f70d2586c4995499b5 (diff) |
YQL for create, alter and drop transfer from topic to table
commit_hash:09502f46a7ee665609d2c4ba8d9e0aa421720cdb
Diffstat (limited to 'yql/essentials/sql/v1/lexer/lexer.cpp')
-rw-r--r-- | yql/essentials/sql/v1/lexer/lexer.cpp | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp index 1d38ec3d8b0..2609e0f7f6f 100644 --- a/yql/essentials/sql/v1/lexer/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -8,8 +8,11 @@ #include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h> #include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> #include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/sql/v1/sql.h> #include <util/string/ascii.h> +#include <util/string/builder.h> +#include <util/string/strip.h> #if defined(_tsan_enabled_) #include <util/system/mutex.h> @@ -80,4 +83,204 @@ bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) { return AsciiEqualsIgnoreCase(token.Name, token.Content); } +using NSQLTranslation::TParsedTokenList; +using TTokenIterator = TParsedTokenList::const_iterator; + +namespace { + +enum EParenType { + Open, + Close, + None +}; + +using TAdvanceCallback = std::function<EParenType(TTokenIterator& curr, TTokenIterator end)>; + +TTokenIterator SkipWS(TTokenIterator curr, TTokenIterator end) { + while (curr != end && curr->Name == "WS") { + ++curr; + } + return curr; +} + +TTokenIterator SkipWSOrComment(TTokenIterator curr, TTokenIterator end) { + while (curr != end && (curr->Name == "WS" || curr->Name == "COMMENT")) { + ++curr; + } + return curr; +} + +TTokenIterator SkipToNextBalanced(TTokenIterator begin, TTokenIterator end, const TAdvanceCallback& advance) { + i64 level = 0; + TTokenIterator curr = begin; + while (curr != end) { + switch (advance(curr, end)) { + case EParenType::Open: { + ++level; + break; + } + case EParenType::Close: { + --level; + if (level < 0) { + return end; + } else if (level == 0) { + return curr; + } + break; + } + case EParenType::None: + break; + } + } + return curr; +} + +TTokenIterator GetNextStatementBegin(TTokenIterator begin, TTokenIterator end) { + TAdvanceCallback advanceLambdaBody = [](TTokenIterator& curr, TTokenIterator end) -> EParenType { + Y_UNUSED(end); + if (curr->Name == "LBRACE_CURLY") { + ++curr; + return EParenType::Open; + } else if (curr->Name == "RBRACE_CURLY") { + ++curr; + return EParenType::Close; + } else { + ++curr; + return EParenType::None; + } + }; + + TAdvanceCallback advanceAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType { + auto tmp = curr; + if (curr->Name == "DEFINE") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && (curr->Name == "ACTION" || curr->Name == "SUBQUERY")) { + ++curr; + return EParenType::Open; + } + } else if (curr->Name == "END") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && curr->Name == "DEFINE") { + ++curr; + return EParenType::Close; + } + } + + curr = tmp; + ++curr; + return EParenType::None; + }; + + TAdvanceCallback advanceInlineAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType { + auto tmp = curr; + if (curr->Name == "DO") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && curr->Name == "BEGIN") { + ++curr; + return EParenType::Open; + } + } else if (curr->Name == "END") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && curr->Name == "DO") { + ++curr; + return EParenType::Close; + } + } + + curr = tmp; + ++curr; + return EParenType::None; + }; + + TTokenIterator curr = begin; + while (curr != end) { + bool matched = false; + for (auto cb : {advanceLambdaBody, advanceAction, advanceInlineAction}) { + TTokenIterator tmp = curr; + if (cb(tmp, end) == EParenType::Open) { + curr = SkipToNextBalanced(curr, end, cb); + matched = true; + if (curr == end) { + return curr; + } + } + } + if (matched) { + continue; + } + if (curr->Name == "SEMICOLON") { + auto next = SkipWS(curr + 1, end); + while (next != end && next->Name == "COMMENT" && curr->Line == next->Line) { + curr = next; + next = SkipWS(next + 1, end); + } + ++curr; + break; + } + ++curr; + } + + return curr; +} + +void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenIterator>& output) { + output.clear(); + if (begin == end) { + return; + } + output.push_back(begin); + auto curr = begin; + while (curr != end) { + curr = GetNextStatementBegin(curr, end); + output.push_back(curr); + } +} + +} + +bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, TVector<TString>& statements, NYql::TIssues& issues) { + TParsedTokenList allTokens; + auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { + if (token.Name != "EOF") { + allTokens.push_back(token); + } + }; + + if (!lexer->Tokenize(query, "Query", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { + return false; + } + + TVector<TTokenIterator> statementsTokens; + SplitByStatements(allTokens.begin(), allTokens.end(), statementsTokens); + + for (size_t i = 1; i < statementsTokens.size(); ++i) { + TStringBuilder currentQueryBuilder; + for (auto it = statementsTokens[i - 1]; it != statementsTokens[i]; ++it) { + currentQueryBuilder << it->Content; + } + TString statement = currentQueryBuilder; + statement = StripStringLeft(statement); + + bool isBlank = true; + for (auto c : statement) { + if (c != ';') { + isBlank = false; + break; + } + }; + + if (isBlank) { + continue; + } + + statements.push_back(statement); + } + + return true; +} + } // namespace NSQLTranslationV1 |