summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/lexer.cpp
diff options
context:
space:
mode:
authortesseract <[email protected]>2025-01-21 12:50:29 +0300
committertesseract <[email protected]>2025-01-21 14:32:19 +0300
commite677409ecb6106695a976307290b2f6bad3d72c0 (patch)
tree7c4fe8c7334a8f814506c857a08322ea800a8b79 /yql/essentials/sql/v1/lexer/lexer.cpp
parente2324a4c7934ecbc80eb47f70d2586c4995499b5 (diff)
YQL for create, alter and drop transfer from topic to table
commit_hash:09502f46a7ee665609d2c4ba8d9e0aa421720cdb
Diffstat (limited to 'yql/essentials/sql/v1/lexer/lexer.cpp')
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp203
1 files changed, 203 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
index 1d38ec3d8b0..2609e0f7f6f 100644
--- a/yql/essentials/sql/v1/lexer/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -8,8 +8,11 @@
#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h>
#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/sql/v1/sql.h>
#include <util/string/ascii.h>
+#include <util/string/builder.h>
+#include <util/string/strip.h>
#if defined(_tsan_enabled_)
#include <util/system/mutex.h>
@@ -80,4 +83,204 @@ bool IsProbablyKeyword(const NSQLTranslation::TParsedToken& token) {
return AsciiEqualsIgnoreCase(token.Name, token.Content);
}
+using NSQLTranslation::TParsedTokenList;
+using TTokenIterator = TParsedTokenList::const_iterator;
+
+namespace {
+
+enum EParenType {
+ Open,
+ Close,
+ None
+};
+
+using TAdvanceCallback = std::function<EParenType(TTokenIterator& curr, TTokenIterator end)>;
+
+TTokenIterator SkipWS(TTokenIterator curr, TTokenIterator end) {
+ while (curr != end && curr->Name == "WS") {
+ ++curr;
+ }
+ return curr;
+}
+
+TTokenIterator SkipWSOrComment(TTokenIterator curr, TTokenIterator end) {
+ while (curr != end && (curr->Name == "WS" || curr->Name == "COMMENT")) {
+ ++curr;
+ }
+ return curr;
+}
+
+TTokenIterator SkipToNextBalanced(TTokenIterator begin, TTokenIterator end, const TAdvanceCallback& advance) {
+ i64 level = 0;
+ TTokenIterator curr = begin;
+ while (curr != end) {
+ switch (advance(curr, end)) {
+ case EParenType::Open: {
+ ++level;
+ break;
+ }
+ case EParenType::Close: {
+ --level;
+ if (level < 0) {
+ return end;
+ } else if (level == 0) {
+ return curr;
+ }
+ break;
+ }
+ case EParenType::None:
+ break;
+ }
+ }
+ return curr;
+}
+
+TTokenIterator GetNextStatementBegin(TTokenIterator begin, TTokenIterator end) {
+ TAdvanceCallback advanceLambdaBody = [](TTokenIterator& curr, TTokenIterator end) -> EParenType {
+ Y_UNUSED(end);
+ if (curr->Name == "LBRACE_CURLY") {
+ ++curr;
+ return EParenType::Open;
+ } else if (curr->Name == "RBRACE_CURLY") {
+ ++curr;
+ return EParenType::Close;
+ } else {
+ ++curr;
+ return EParenType::None;
+ }
+ };
+
+ TAdvanceCallback advanceAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType {
+ auto tmp = curr;
+ if (curr->Name == "DEFINE") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && (curr->Name == "ACTION" || curr->Name == "SUBQUERY")) {
+ ++curr;
+ return EParenType::Open;
+ }
+ } else if (curr->Name == "END") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && curr->Name == "DEFINE") {
+ ++curr;
+ return EParenType::Close;
+ }
+ }
+
+ curr = tmp;
+ ++curr;
+ return EParenType::None;
+ };
+
+ TAdvanceCallback advanceInlineAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType {
+ auto tmp = curr;
+ if (curr->Name == "DO") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && curr->Name == "BEGIN") {
+ ++curr;
+ return EParenType::Open;
+ }
+ } else if (curr->Name == "END") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && curr->Name == "DO") {
+ ++curr;
+ return EParenType::Close;
+ }
+ }
+
+ curr = tmp;
+ ++curr;
+ return EParenType::None;
+ };
+
+ TTokenIterator curr = begin;
+ while (curr != end) {
+ bool matched = false;
+ for (auto cb : {advanceLambdaBody, advanceAction, advanceInlineAction}) {
+ TTokenIterator tmp = curr;
+ if (cb(tmp, end) == EParenType::Open) {
+ curr = SkipToNextBalanced(curr, end, cb);
+ matched = true;
+ if (curr == end) {
+ return curr;
+ }
+ }
+ }
+ if (matched) {
+ continue;
+ }
+ if (curr->Name == "SEMICOLON") {
+ auto next = SkipWS(curr + 1, end);
+ while (next != end && next->Name == "COMMENT" && curr->Line == next->Line) {
+ curr = next;
+ next = SkipWS(next + 1, end);
+ }
+ ++curr;
+ break;
+ }
+ ++curr;
+ }
+
+ return curr;
+}
+
+void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenIterator>& output) {
+ output.clear();
+ if (begin == end) {
+ return;
+ }
+ output.push_back(begin);
+ auto curr = begin;
+ while (curr != end) {
+ curr = GetNextStatementBegin(curr, end);
+ output.push_back(curr);
+ }
+}
+
+}
+
+bool SplitQueryToStatements(const TString& query, NSQLTranslation::ILexer::TPtr& lexer, TVector<TString>& statements, NYql::TIssues& issues) {
+ TParsedTokenList allTokens;
+ auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
+ if (token.Name != "EOF") {
+ allTokens.push_back(token);
+ }
+ };
+
+ if (!lexer->Tokenize(query, "Query", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ TVector<TTokenIterator> statementsTokens;
+ SplitByStatements(allTokens.begin(), allTokens.end(), statementsTokens);
+
+ for (size_t i = 1; i < statementsTokens.size(); ++i) {
+ TStringBuilder currentQueryBuilder;
+ for (auto it = statementsTokens[i - 1]; it != statementsTokens[i]; ++it) {
+ currentQueryBuilder << it->Content;
+ }
+ TString statement = currentQueryBuilder;
+ statement = StripStringLeft(statement);
+
+ bool isBlank = true;
+ for (auto c : statement) {
+ if (c != ';') {
+ isBlank = false;
+ break;
+ }
+ };
+
+ if (isBlank) {
+ continue;
+ }
+
+ statements.push_back(statement);
+ }
+
+ return true;
+}
+
} // namespace NSQLTranslationV1