aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2025-02-28 19:58:58 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-02-28 20:15:53 +0300
commit77397379b6394220a2dfd2802f417cdd8c214905 (patch)
tree0921befe1120e354ab5a47cb126df6bc54b7483a
parenteae2230242d713b9044f14d0920dd0845d03145c (diff)
downloadydb-77397379b6394220a2dfd2802f417cdd8c214905.tar.gz
Intermediate changes
commit_hash:5fc851d2c72810067fe0d407b66535b17de63129
-rw-r--r--yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake8
-rw-r--r--yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make43
-rw-r--r--yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake8
-rw-r--r--yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make43
-rw-r--r--yql/essentials/parser/antlr_ast/gen/ya.make4
-rw-r--r--yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg1176
-rw-r--r--yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg344
-rw-r--r--yql/essentials/parser/antlr_ast/ya.make4
-rw-r--r--yql/essentials/parser/ya.make1
-rw-r--r--yql/essentials/sql/v1/complete/c3_engine.h116
-rw-r--r--yql/essentials/sql/v1/complete/sql_antlr4.cpp116
-rw-r--r--yql/essentials/sql/v1/complete/sql_antlr4.h28
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.cpp89
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete.h44
-rw-r--r--yql/essentials/sql/v1/complete/sql_complete_ut.cpp323
-rw-r--r--yql/essentials/sql/v1/complete/sql_context.cpp123
-rw-r--r--yql/essentials/sql/v1/complete/sql_context.h23
-rw-r--r--yql/essentials/sql/v1/complete/sql_syntax.cpp19
-rw-r--r--yql/essentials/sql/v1/complete/sql_syntax.h10
-rw-r--r--yql/essentials/sql/v1/complete/string_util.cpp29
-rw-r--r--yql/essentials/sql/v1/complete/string_util.h17
-rw-r--r--yql/essentials/sql/v1/complete/string_util_ut.cpp21
-rw-r--r--yql/essentials/sql/v1/complete/ut/ya.make8
-rw-r--r--yql/essentials/sql/v1/complete/ya.make24
-rw-r--r--yql/essentials/sql/v1/ya.make1
25 files changed, 2622 insertions, 0 deletions
diff --git a/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake
new file mode 100644
index 0000000000..e039c0caf6
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake
@@ -0,0 +1,8 @@
+set(GRAMMAR_STRING_CORE_SINGLE "~([']) | (QUOTE_SINGLE QUOTE_SINGLE)")
+set(GRAMMAR_STRING_CORE_DOUBLE "~([\"]) | (QUOTE_DOUBLE QUOTE_DOUBLE)")
+set(GRAMMAR_MULTILINE_COMMENT_CORE "MULTILINE_COMMENT | .")
+
+configure_file(
+ ${CMAKE_SOURCE_DIR}/yql/essentials/sql/v1/SQLv1Antlr4.g.in
+ ${CMAKE_BINARY_DIR}/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4.g
+)
diff --git a/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make
new file mode 100644
index 0000000000..d0b36ae35a
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make
@@ -0,0 +1,43 @@
+LIBRARY()
+
+SET(SQL_GRAMMAR ${ARCADIA_BUILD_ROOT}/${MODDIR}/SQLv1Antlr4.g4)
+
+IF(EXPORT_CMAKE)
+ MANUAL_GENERATION(${SQL_GRAMMAR})
+ELSE()
+ SET(GRAMMAR_STRING_CORE_SINGLE "\"~([']) | (QUOTE_SINGLE QUOTE_SINGLE)\"")
+ SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE#]) | (QUOTE_DOUBLE QUOTE_DOUBLE)\"")
+ SET(GRAMMAR_MULTILINE_COMMENT_CORE "\"MULTILINE_COMMENT | .\"")
+
+ CONFIGURE_FILE(${ARCADIA_ROOT}/yql/essentials/sql/v1/SQLv1Antlr4.g.in ${SQL_GRAMMAR})
+ENDIF()
+
+COPY_FILE(
+ ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+)
+
+COPY_FILE(
+ ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
+)
+
+RUN_ANTLR4(
+ ${SQL_GRAMMAR}
+ -no-listener
+ -package NALAAnsiAntlr4
+ -lib .
+ IN
+ ${SQL_GRAMMAR}
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
+ OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h
+ OUTPUT_INCLUDES contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h
+ CWD ${ARCADIA_BUILD_ROOT}/${MODDIR}
+)
+
+PEERDIR(
+ contrib/libs/antlr4_cpp_runtime
+)
+
+END()
diff --git a/yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake
new file mode 100644
index 0000000000..1fb34e7415
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake
@@ -0,0 +1,8 @@
+set(GRAMMAR_STRING_CORE_SINGLE "~(['\\\\]) | (BACKSLASH .)")
+set(GRAMMAR_STRING_CORE_DOUBLE "~([\"\\\\]) | (BACKSLASH .)")
+set(GRAMMAR_MULTILINE_COMMENT_CORE ".")
+
+configure_file(
+ ${CMAKE_SOURCE_DIR}/yql/essentials/sql/v1/SQLv1Antlr4.g.in
+ ${CMAKE_BINARY_DIR}/yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4.g
+)
diff --git a/yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make
new file mode 100644
index 0000000000..2ef05bd592
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make
@@ -0,0 +1,43 @@
+LIBRARY()
+
+SET(SQL_GRAMMAR ${ARCADIA_BUILD_ROOT}/${MODDIR}/SQLv1Antlr4.g)
+
+IF(EXPORT_CMAKE)
+ MANUAL_GENERATION(${SQL_GRAMMAR})
+ELSE()
+ SET(GRAMMAR_STRING_CORE_SINGLE "\"~(['#BACKSLASH#]) | (BACKSLASH .)\"")
+ SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE##BACKSLASH#]) | (BACKSLASH .)\"")
+ SET(GRAMMAR_MULTILINE_COMMENT_CORE "\".\"")
+
+ CONFIGURE_FILE(${ARCADIA_ROOT}/yql/essentials/sql/v1/SQLv1Antlr4.g.in ${SQL_GRAMMAR})
+ENDIF()
+
+COPY_FILE(
+ ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+)
+
+COPY_FILE(
+ ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
+)
+
+RUN_ANTLR4(
+ ${SQL_GRAMMAR}
+ -no-listener
+ -package NALADefaultAntlr4
+ -lib .
+ IN
+ ${SQL_GRAMMAR}
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
+ ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
+ OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h
+ OUTPUT_INCLUDES contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h
+ CWD ${ARCADIA_BUILD_ROOT}/${MODDIR}
+)
+
+PEERDIR(
+ contrib/libs/antlr4_cpp_runtime
+)
+
+END()
diff --git a/yql/essentials/parser/antlr_ast/gen/ya.make b/yql/essentials/parser/antlr_ast/gen/ya.make
new file mode 100644
index 0000000000..2f94911b40
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/gen/ya.make
@@ -0,0 +1,4 @@
+RECURSE(
+ v1_antlr4
+ v1_ansi_antlr4
+)
diff --git a/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
new file mode 100644
index 0000000000..c6592680f7
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
@@ -0,0 +1,1176 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2015 Dan McLaughlin, Mike Lischke
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import "Files.stg" // All file specific stuff.
+
+cppTypeInitMap ::= [
+ "int":"0",
+ "long":"0",
+ "float":"0.0f",
+ "double":"0.0",
+ "bool":"false",
+ "short":"0",
+ "char":"0",
+ default: "nullptr" // anything other than a primitive type is an object
+]
+
+LexerHeader(lexer, atn, actionFuncs, sempredFuncs, superClass = {antlr4::Lexer}) ::= <<
+<namedActions.context>
+
+class <file.exportMacro> <lexer.name> : public <superClass> {
+public:
+<if (lexer.tokens)>
+ enum {
+ <lexer.tokens: {k | TOKEN_<k> = <lexer.tokens.(k)>}; separator=", ", wrap, anchor>
+ };
+<endif>
+
+<if (lexer.escapedChannels)>
+ enum {
+ <lexer.escapedChannels: {k | <k> = <lexer.escapedChannels.(k)>}; separator=", ", wrap, anchor>
+ };
+<endif>
+
+<if (rest(lexer.escapedModeNames))>
+ enum {
+ <rest(lexer.escapedModeNames): {m | TOKEN_<m> = <i>}; separator=", ", wrap, anchor>
+ };
+<endif>
+
+ explicit <lexer.name>(antlr4::CharStream *input);
+
+ ~<lexer.name>() override;
+
+ <namedActions.members>
+
+ std::string getGrammarFileName() const override;
+
+ const std::vector\<std::string>& getRuleNames() const override;
+
+ const std::vector\<std::string>& getChannelNames() const override;
+
+ const std::vector\<std::string>& getModeNames() const override;
+
+ const antlr4::dfa::Vocabulary& getVocabulary() const override;
+
+ antlr4::atn::SerializedATNView getSerializedATN() const override;
+
+ const antlr4::atn::ATN& getATN() const override;
+
+ <if (actionFuncs)>
+ void action(antlr4::RuleContext *context, size_t ruleIndex, size_t actionIndex) override;
+ <endif>
+
+ <if (sempredFuncs)>
+ bool sempred(antlr4::RuleContext *_localctx, size_t ruleIndex, size_t predicateIndex) override;
+ <endif>
+
+ // By default the static state used to implement the lexer is lazily initialized during the first
+ // call to the constructor. You can call this function if you wish to initialize the static state
+ // ahead of time.
+ static void initialize();
+
+private:
+ <namedActions.declarations>
+
+ // Individual action functions triggered by action() above.
+ <actionFuncs.values; separator="\n">
+
+ // Individual semantic predicate functions triggered by sempred() above.
+ <sempredFuncs.values; separator="\n">
+
+ <atn>
+};
+>>
+
+Lexer(lexer, atn, actionFuncs, sempredFuncs, superClass = {Lexer}) ::= <<
+
+using namespace antlr4;
+
+namespace {
+
+struct <lexer.name; format = "cap">StaticData final {
+ <lexer.name; format = "cap">StaticData(std::vector\<std::string> ruleNames,
+ std::vector\<std::string> channelNames,
+ std::vector\<std::string> modeNames,
+ std::vector\<std::string> literalNames,
+ std::vector\<std::string> symbolicNames)
+ : ruleNames(std::move(ruleNames)), channelNames(std::move(channelNames)),
+ modeNames(std::move(modeNames)), literalNames(std::move(literalNames)),
+ symbolicNames(std::move(symbolicNames)),
+ vocabulary(this->literalNames, this->symbolicNames) {}
+
+ <lexer.name; format = "cap">StaticData(const <lexer.name; format = "cap">StaticData&) = delete;
+ <lexer.name; format = "cap">StaticData(<lexer.name; format = "cap">StaticData&&) = delete;
+ <lexer.name; format = "cap">StaticData& operator=(const <lexer.name; format = "cap">StaticData&) = delete;
+ <lexer.name; format = "cap">StaticData& operator=(<lexer.name; format = "cap">StaticData&&) = delete;
+
+ std::vector\<antlr4::dfa::DFA> decisionToDFA;
+ antlr4::atn::PredictionContextCache sharedContextCache;
+ const std::vector\<std::string> ruleNames;
+ const std::vector\<std::string> channelNames;
+ const std::vector\<std::string> modeNames;
+ const std::vector\<std::string> literalNames;
+ const std::vector\<std::string> symbolicNames;
+ const antlr4::dfa::Vocabulary vocabulary;
+ antlr4::atn::SerializedATNView serializedATN;
+ std::unique_ptr\<antlr4::atn::ATN> atn;
+};
+
+::antlr4::internal::OnceFlag <lexer.grammarName; format = "lower">LexerOnceFlag;
+#if ANTLR4_USE_THREAD_LOCAL_CACHE
+static thread_local
+#endif
+std::unique_ptr\<<lexer.name; format = "cap">StaticData> <lexer.grammarName; format = "lower">LexerStaticData = nullptr;
+
+void <lexer.grammarName; format = "lower">LexerInitialize() {
+#if ANTLR4_USE_THREAD_LOCAL_CACHE
+ if (<lexer.grammarName; format = "lower">LexerStaticData != nullptr) {
+ return;
+ }
+#else
+ assert(<lexer.grammarName; format = "lower">LexerStaticData == nullptr);
+#endif
+ auto staticData = std::make_unique\<<lexer.name; format = "cap">StaticData>(
+ std::vector\<std::string>{
+ <lexer.ruleNames: {r | "<r>"}; separator = ", ", wrap, anchor>
+ },
+ std::vector\<std::string>{
+ "DEFAULT_TOKEN_CHANNEL", "HIDDEN"<if (lexer.channels)>, <lexer.channels: {c | "<c>"}; separator = ", ", wrap, anchor><endif>
+ },
+ std::vector\<std::string>{
+ <lexer.modes: {m | "<m>"}; separator = ", ", wrap, anchor>
+ },
+ std::vector\<std::string>{
+ <lexer.literalNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor>
+ },
+ std::vector\<std::string>{
+ <lexer.symbolicNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor>
+ }
+ );
+ <atn>
+ <lexer.grammarName; format = "lower">LexerStaticData = std::move(staticData);
+}
+
+}
+
+<lexer.name>::<lexer.name>(CharStream *input) : <superClass>(input) {
+ <lexer.name>::initialize();
+ _interpreter = new atn::LexerATNSimulator(this, *<lexer.grammarName; format = "lower">LexerStaticData->atn, <lexer.grammarName; format = "lower">LexerStaticData->decisionToDFA, <lexer.grammarName; format = "lower">LexerStaticData->sharedContextCache);
+}
+
+<lexer.name>::~<lexer.name>() {
+ delete _interpreter;
+}
+
+std::string <lexer.name>::getGrammarFileName() const {
+ return "<lexer.grammarFileName>";
+}
+
+const std::vector\<std::string>& <lexer.name>::getRuleNames() const {
+ return <lexer.grammarName; format = "lower">LexerStaticData->ruleNames;
+}
+
+const std::vector\<std::string>& <lexer.name>::getChannelNames() const {
+ return <lexer.grammarName; format = "lower">LexerStaticData->channelNames;
+}
+
+const std::vector\<std::string>& <lexer.name>::getModeNames() const {
+ return <lexer.grammarName; format = "lower">LexerStaticData->modeNames;
+}
+
+const dfa::Vocabulary& <lexer.name>::getVocabulary() const {
+ return <lexer.grammarName; format = "lower">LexerStaticData->vocabulary;
+}
+
+antlr4::atn::SerializedATNView <lexer.name>::getSerializedATN() const {
+ return <lexer.grammarName; format = "lower">LexerStaticData->serializedATN;
+}
+
+const atn::ATN& <lexer.name>::getATN() const {
+ return *<lexer.grammarName; format = "lower">LexerStaticData->atn;
+}
+
+<namedActions.definitions>
+
+<if (actionFuncs)>
+void <lexer.name>::action(RuleContext *context, size_t ruleIndex, size_t actionIndex) {
+ switch (ruleIndex) {
+ <lexer.actionFuncs.values: {f | case <f.ruleIndex>: <f.name>Action(antlrcpp::downCast\<<f.ctxType> *>(context), actionIndex); break;}; separator="\n">
+
+ default:
+ break;
+ }
+}
+<endif>
+
+<if (sempredFuncs)>
+bool <lexer.name>::sempred(RuleContext *context, size_t ruleIndex, size_t predicateIndex) {
+ switch (ruleIndex) {
+ <lexer.sempredFuncs.values: {f | case <f.ruleIndex>: return <f.name>Sempred(antlrcpp::downCast\<<f.ctxType> *>(context), predicateIndex);}; separator="\n">
+
+ default:
+ break;
+ }
+ return true;
+}
+<endif>
+
+<actionFuncs.values; separator="\n">
+
+<sempredFuncs.values; separator="\n">
+
+void <lexer.name>::initialize() {
+#if ANTLR4_USE_THREAD_LOCAL_CACHE
+ <lexer.grammarName; format = "lower">LexerInitialize();
+#else
+ ::antlr4::internal::call_once(<lexer.grammarName; format = "lower">LexerOnceFlag, <lexer.grammarName; format = "lower">LexerInitialize);
+#endif
+}
+>>
+
+RuleActionFunctionHeader(r, actions) ::= <<
+void <r.name>Action(<r.ctxType> *context, size_t actionIndex);
+>>
+
+RuleActionFunction(r, actions) ::= <<
+void <r.factory.grammar.name>::<r.name>Action(<r.ctxType> *context, size_t actionIndex) {
+ switch (actionIndex) {
+ <actions: {index | case <index>: <actions.(index)> break;}; separator="\n">
+
+ default:
+ break;
+ }
+}
+
+>>
+
+RuleSempredFunctionHeader(r, actions) ::= <<
+bool <r.name>Sempred(<r.ctxType> *_localctx, size_t predicateIndex);
+>>
+
+RuleSempredFunction(r, actions) ::= <<
+<! Called for both lexer and parser. But only one of them is actually available. Testing for the parser directly
+ generates a warning, however. So do the check via the factory instead. !>
+bool <if (r.factory.g.lexer)><lexer.name><else><parser.name><endif>::<r.name>Sempred(<r.ctxType> *_localctx, size_t predicateIndex) {
+ switch (predicateIndex) {
+ <actions: {index | case <index>: return <actions.(index)>}; separator=";\n">;
+
+ default:
+ break;
+ }
+ return true;
+}
+
+>>
+
+//--------------------------------------------------------------------------------------------------
+
+ParserHeader(parser, funcs, atn, sempredFuncs, superClass = {antlr4::Parser}) ::= <<
+<namedActions.context>
+
+class <file.exportMacro> <parser.name> : public <superClass> {
+public:
+<if (parser.tokens)>
+ enum {
+ <parser.tokens: {k | TOKEN_<k> = <parser.tokens.(k)>}; separator=", ", wrap, anchor>
+ };
+<endif>
+
+<if (parser.rules)>
+ enum {
+ <parser.rules: {r | Rule<r.name; format="cap"> = <r.index>}; separator=", ", wrap, anchor>
+ };
+<endif>
+
+ explicit <parser.name>(antlr4::TokenStream *input);
+
+ <parser.name>(antlr4::TokenStream *input, const antlr4::atn::ParserATNSimulatorOptions &options);
+
+ ~<parser.name>() override;
+
+ std::string getGrammarFileName() const override;
+
+ const antlr4::atn::ATN& getATN() const override;
+
+ const std::vector\<std::string>& getRuleNames() const override;
+
+ const antlr4::dfa::Vocabulary& getVocabulary() const override;
+
+ antlr4::atn::SerializedATNView getSerializedATN() const override;
+
+ <namedActions.members>
+
+ <parser.funcs: {f | class <f.name; format = "cap">Context;}; separator = "\n"> <! Forward declare context classes. !>
+
+ <funcs; separator = "\n">
+
+ <if (sempredFuncs)>
+ bool sempred(antlr4::RuleContext *_localctx, size_t ruleIndex, size_t predicateIndex) override;
+
+ <sempredFuncs.values; separator = "\n">
+ <endif>
+
+ // By default the static state used to implement the parser is lazily initialized during the first
+ // call to the constructor. You can call this function if you wish to initialize the static state
+ // ahead of time.
+ static void initialize();
+
+ static const size_t TOKEN_EOF = antlr4::Token::EOF;
+
+private:
+ <namedActions.declarations>
+};
+>>
+
+Parser(parser, funcs, atn, sempredFuncs, superClass = {Parser}) ::= <<
+
+using namespace antlr4;
+
+namespace {
+
+struct <parser.name; format = "cap">StaticData final {
+ <parser.name; format = "cap">StaticData(std::vector\<std::string> ruleNames,
+ std::vector\<std::string> literalNames,
+ std::vector\<std::string> symbolicNames)
+ : ruleNames(std::move(ruleNames)), literalNames(std::move(literalNames)),
+ symbolicNames(std::move(symbolicNames)),
+ vocabulary(this->literalNames, this->symbolicNames) {}
+
+ <parser.name; format = "cap">StaticData(const <parser.name; format = "cap">StaticData&) = delete;
+ <parser.name; format = "cap">StaticData(<parser.name; format = "cap">StaticData&&) = delete;
+ <parser.name; format = "cap">StaticData& operator=(const <parser.name; format = "cap">StaticData&) = delete;
+ <parser.name; format = "cap">StaticData& operator=(<parser.name; format = "cap">StaticData&&) = delete;
+
+ std::vector\<antlr4::dfa::DFA> decisionToDFA;
+ antlr4::atn::PredictionContextCache sharedContextCache;
+ const std::vector\<std::string> ruleNames;
+ const std::vector\<std::string> literalNames;
+ const std::vector\<std::string> symbolicNames;
+ const antlr4::dfa::Vocabulary vocabulary;
+ antlr4::atn::SerializedATNView serializedATN;
+ std::unique_ptr\<antlr4::atn::ATN> atn;
+};
+
+::antlr4::internal::OnceFlag <parser.grammarName; format = "lower">ParserOnceFlag;
+#if ANTLR4_USE_THREAD_LOCAL_CACHE
+static thread_local
+#endif
+std::unique_ptr\<<parser.name; format = "cap">StaticData> <parser.grammarName; format = "lower">ParserStaticData = nullptr;
+
+void <parser.grammarName; format = "lower">ParserInitialize() {
+#if ANTLR4_USE_THREAD_LOCAL_CACHE
+ if (<parser.grammarName; format = "lower">ParserStaticData != nullptr) {
+ return;
+ }
+#else
+ assert(<parser.grammarName; format = "lower">ParserStaticData == nullptr);
+#endif
+ auto staticData = std::make_unique\<<parser.name; format = "cap">StaticData>(
+ std::vector\<std::string>{
+ <parser.ruleNames: {r | "<r>"}; separator = ", ", wrap, anchor>
+ },
+ std::vector\<std::string>{
+ <parser.literalNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor>
+ },
+ std::vector\<std::string>{
+ <parser.symbolicNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor>
+ }
+ );
+ <atn>
+ <parser.grammarName; format = "lower">ParserStaticData = std::move(staticData);
+}
+
+}
+
+<parser.name>::<parser.name>(TokenStream *input) : <parser.name>(input, antlr4::atn::ParserATNSimulatorOptions()) {}
+
+<parser.name>::<parser.name>(TokenStream *input, const antlr4::atn::ParserATNSimulatorOptions &options) : <superClass>(input) {
+ <parser.name>::initialize();
+ _interpreter = new atn::ParserATNSimulator(this, *<parser.grammarName; format = "lower">ParserStaticData->atn, <parser.grammarName; format = "lower">ParserStaticData->decisionToDFA, <parser.grammarName; format = "lower">ParserStaticData->sharedContextCache, options);
+}
+
+<parser.name>::~<parser.name>() {
+ delete _interpreter;
+}
+
+const atn::ATN& <parser.name>::getATN() const {
+ return *<parser.grammarName; format = "lower">ParserStaticData->atn;
+}
+
+std::string <parser.name>::getGrammarFileName() const {
+ return "<parser.grammarFileName>";
+}
+
+const std::vector\<std::string>& <parser.name>::getRuleNames() const {
+ return <parser.grammarName; format = "lower">ParserStaticData->ruleNames;
+}
+
+const dfa::Vocabulary& <parser.name>::getVocabulary() const {
+ return <parser.grammarName; format = "lower">ParserStaticData->vocabulary;
+}
+
+antlr4::atn::SerializedATNView <parser.name>::getSerializedATN() const {
+ return <parser.grammarName; format = "lower">ParserStaticData->serializedATN;
+}
+
+<namedActions.definitions>
+
+<funcs; separator = "\n\n">
+
+<if (sempredFuncs)>
+bool <parser.name>::sempred(RuleContext *context, size_t ruleIndex, size_t predicateIndex) {
+ switch (ruleIndex) {
+ <parser.sempredFuncs.values: {f |
+ case <f.ruleIndex>: return <f.name>Sempred(antlrcpp::downCast\<<f.ctxType> *>(context), predicateIndex);}; separator="\n">
+
+ default:
+ break;
+ }
+ return true;
+}
+
+<sempredFuncs.values; separator="\n"><endif>
+
+void <parser.name>::initialize() {
+#if ANTLR4_USE_THREAD_LOCAL_CACHE
+ <parser.grammarName; format = "lower">ParserInitialize();
+#else
+ ::antlr4::internal::call_once(<parser.grammarName; format = "lower">ParserOnceFlag, <parser.grammarName; format = "lower">ParserInitialize);
+#endif
+}
+>>
+
+SerializedATNHeader(model) ::= <<
+>>
+
+SerializedATN(model) ::= <<
+static const int32_t serializedATNSegment[] = {
+ <model.serialized: {s | <s>}; separator=",", wrap>
+};
+staticData->serializedATN = antlr4::atn::SerializedATNView(serializedATNSegment, sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0]));
+
+antlr4::atn::ATNDeserializer deserializer;
+staticData->atn = deserializer.deserialize(staticData->serializedATN);
+
+const size_t count = staticData->atn->getNumberOfDecisions();
+staticData->decisionToDFA.reserve(count);
+for (size_t i = 0; i \< count; i++) { <! Rework class ATN to allow standard iterations. !>
+ staticData->decisionToDFA.emplace_back(staticData->atn->getDecisionState(i), i);
+}
+>>
+
+RuleFunctionHeader(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble, exceptions) ::= <<
+<ruleCtx>
+<! TODO: untested !><if (altLabelCtxs)><altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator="\n"><endif>
+<currentRule.ctxType>* <currentRule.escapedName>(<args; separator=",">);
+
+>>
+
+RuleFunction(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble, exceptions) ::= <<
+<ruleCtx>
+<! TODO: untested !><altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator = "\n">
+<parser.name>::<currentRule.ctxType>* <parser.name>::<currentRule.escapedName>(<args; separator=",">) {
+ <currentRule.ctxType> *_localctx = _tracker.createInstance\<<currentRule.ctxType>\>(_ctx, getState()<currentRule.args:{a | , <a.escapedName>}>);
+ enterRule(_localctx, <currentRule.startState>, <parser.name>::Rule<currentRule.name; format = "cap">);
+ <namedActions.init>
+ <locals; separator = "\n">
+
+#if __cplusplus > 201703L
+ auto onExit = finally([=, this] {
+#else
+ auto onExit = finally([=] {
+#endif
+ <finallyAction>
+ exitRule();
+ });
+ try {
+<! TODO: untested !><if (currentRule.hasLookaheadBlock)>
+ size_t alt;
+ <endif>
+ <code>
+<! TODO: untested !> <postamble; separator = "\n">
+ <namedActions.after>
+ }
+ <if (exceptions)>
+ <exceptions; separator="\n">
+ <else>
+ catch (RecognitionException &e) {
+ _errHandler->reportError(this, e);
+ _localctx->exception = std::current_exception();
+ _errHandler->recover(this, _localctx->exception);
+ }
+ <endif>
+
+ return _localctx;
+}
+>>
+
+LeftRecursiveRuleFunctionHeader(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble) ::= <<
+<ruleCtx>
+<! TODO: untested !><altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator="\n">
+<currentRule.ctxType>* <currentRule.escapedName>(<currentRule.args; separator = ", ">);
+<currentRule.ctxType>* <currentRule.escapedName>(int precedence<currentRule.args: {a | , <a>}>);
+>>
+
+LeftRecursiveRuleFunction(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble) ::= <<
+<ruleCtx>
+<altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator="\n">
+
+<parser.name>::<currentRule.ctxType>* <parser.name>::<currentRule.escapedName>(<currentRule.args; separator=", ">) {
+<! TODO: currentRule.args untested !> return <currentRule.escapedName>(0<currentRule.args: {a | , <a.escapedName>}>);
+}
+
+<parser.name>::<currentRule.ctxType>* <parser.name>::<currentRule.escapedName>(int precedence<currentRule.args:{a | , <a>}>) {
+ ParserRuleContext *parentContext = _ctx;
+ size_t parentState = getState();
+ <parser.name>::<currentRule.ctxType> *_localctx = _tracker.createInstance\<<currentRule.ctxType>\>(_ctx, parentState<currentRule.args: {a | , <a.escapedName>}>);
+ <parser.name>::<currentRule.ctxType> *previousContext = _localctx;
+ (void)previousContext; // Silence compiler, in case the context is not used by generated code.
+ size_t startState = <currentRule.startState>;
+ enterRecursionRule(_localctx, <currentRule.startState>, <parser.name>::Rule<currentRule.name; format = "cap">, precedence);
+
+ <namedActions.init>
+<! TODO: untested !> <locals; separator = "\n">
+
+#if __cplusplus > 201703L
+ auto onExit = finally([=, this] {
+#else
+ auto onExit = finally([=] {
+#endif
+ <if (finallyAction)><finallyAction><endif>
+ unrollRecursionContexts(parentContext);
+ });
+ try {
+ <if (currentRule.hasLookaheadBlock)>size_t alt;<endif>
+ <code>
+<! TODO: untested !><postamble; separator = "\n">
+ <namedActions.after>
+ }
+ catch (RecognitionException &e) {
+ _errHandler->reportError(this, e);
+ _localctx->exception = std::current_exception();
+ _errHandler->recover(this, _localctx->exception);
+ }
+ return _localctx;
+}
+>>
+
+StructDeclHeader(struct, ctorAttrs, attrs, getters, dispatchMethods, interfaces, extensionMembers) ::= <<
+class <file.exportMacro> <struct.escapedName> : public <if (contextSuperClass)><contextSuperClass><else>antlr4::ParserRuleContext<endif><if(interfaces)>, <interfaces; separator=", "><endif> {
+public:
+ <attrs: {a | <a>;}; separator = "\n">
+ <if (ctorAttrs)><struct.escapedName>(antlr4::ParserRuleContext *parent, size_t invokingState);<endif>
+ <struct.escapedName>(antlr4::ParserRuleContext *parent, size_t invokingState<ctorAttrs: {a | , <a>}>);
+<if (struct.provideCopyFrom)> <! don't need copy unless we have subclasses !>
+ <struct.escapedName>() = default;
+ void copyFrom(<struct.escapedName> *context);
+ using antlr4::ParserRuleContext::copyFrom;
+<endif>
+
+ virtual size_t getRuleIndex() const override;
+ <getters: {g | <g>}; separator = "\n">
+
+ <dispatchMethods; separator = "\n">
+<! TODO: untested !> <extensionMembers; separator = "\n">
+};
+
+>>
+
+StructDecl(struct, ctorAttrs, attrs, getters, dispatchMethods, interfaces, extensionMembers, signatures) ::= <<
+//----------------- <struct.escapedName> ------------------------------------------------------------------
+
+<if (ctorAttrs)>
+<parser.name>::<struct.escapedName>::<struct.escapedName>(ParserRuleContext *parent, size_t invokingState)
+ : <if (contextSuperClass)><contextSuperClass><else>ParserRuleContext<endif>(parent, invokingState) {
+}
+<endif>
+
+<parser.name>::<struct.escapedName>::<struct.escapedName>(ParserRuleContext *parent, size_t invokingState<ctorAttrs: {a | , <a>}>)
+ : <if (contextSuperClass)><contextSuperClass><else>ParserRuleContext<endif>(parent, invokingState) {
+ <struct.ctorAttrs: {a | this-><a.escapedName> = <a.escapedName>;}; separator="\n">
+}
+
+<getters: {g | <g>}; separator = "\n">
+
+size_t <parser.name>::<struct.escapedName>::getRuleIndex() const {
+ return <parser.name>::Rule<struct.derivedFromName; format = "cap">;
+}
+
+<if (struct.provideCopyFrom)>
+void <parser.name>::<struct.escapedName>::copyFrom(<struct.escapedName> *ctx) {
+ <if (contextSuperClass)><contextSuperClass><else>ParserRuleContext<endif>::copyFrom(ctx);
+ <struct.attrs: {a | this-><a.escapedName> = ctx-><a.escapedName>;}; separator = "\n">
+}
+<endif>
+<dispatchMethods; separator = "\n\n">
+<! TODO: untested !><extensionMembers; separator = "\n\n">
+
+>>
+
+AltLabelStructDeclHeader(struct, attrs, getters, dispatchMethods) ::= <<
+class <file.exportMacro> <struct.escapedName> : public <currentRule.name; format = "cap">Context {
+public:
+ <struct.escapedName>(<currentRule.name; format = "cap">Context *ctx);
+
+ <if (attrs)><attrs: {a | <a>;}; separator = "\n"><endif>
+ <getters: {g | <g>}; separator = "\n">
+ <dispatchMethods; separator = "\n">
+};
+
+>>
+
+AltLabelStructDecl(struct, attrs, getters, dispatchMethods) ::= <<
+//----------------- <struct.escapedName> ------------------------------------------------------------------
+
+<! TODO: untested !><if (attrs)><attrs: {a | <a>}; separator = "\n"><endif>
+<getters: {g | <g>}; separator = "\n">
+<parser.name>::<struct.escapedName>::<struct.escapedName>(<currentRule.name; format = "cap">Context *ctx) { copyFrom(ctx); }
+
+<dispatchMethods; separator="\n">
+>>
+
+
+CodeBlockForOuterMostAltHeader(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= "<! Required to exist, but unused. !>"
+CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= <<
+<if (currentOuterMostAltCodeBlock.altLabel)>
+_localctx = _tracker.createInstance\<<parser.name>::<currentOuterMostAltCodeBlock.altLabel; format = "cap">Context>(_localctx);
+<endif>
+enterOuterAlt(_localctx, <currentOuterMostAltCodeBlock.alt.altNum>);
+<CodeBlockForAlt(currentAltCodeBlock = currentOuterMostAltCodeBlock, ...)>
+>>
+
+CodeBlockForAltHeader(currentAltCodeBlock, locals, preamble, ops) ::= "<! Required to exist, but unused. !>"
+CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= <<
+<! TODO: untested !><locals; separator = "\n">
+<! TODO: untested !><preamble; separator = "\n">
+<ops; separator = "\n">
+>>
+
+LL1AltBlockHeader(choice, preamble, alts, error) ::= "<! Required to exist, but unused. !>"
+LL1AltBlock(choice, preamble, alts, error) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+<! TODO: untested !><if (choice.label)>LL1AltBlock(choice, preamble, alts, error) <labelref(choice.label)> = _input->LT(1);<endif>
+<preamble; separator="\n">
+switch (_input->LA(1)) {
+ <choice.altLook, alts: {look, alt | <cases(tokens = look)> {
+ <alt>
+ break;
+\}
+}; separator = "\n">
+default:
+ <error>
+}
+>>
+
+LL1OptionalBlockHeader(choice, alts, error) ::= "<! Required but unused. !>"
+LL1OptionalBlock(choice, alts, error) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+switch (_input->LA(1)) {
+ <choice.altLook, alts: {look, alt | <cases(tokens = look)> {
+ <alt>
+ break;
+\}
+}; separator="\n">
+default:
+ break;
+}
+>>
+
+LL1OptionalBlockSingleAltHeader(choice, expr, alts, preamble, error, followExpr) ::= "<! Required but unused. !>"
+LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+
+<preamble; separator = "\n">
+if (<expr>) {
+ <alts; separator = "\n">
+}
+>>
+
+LL1StarBlockSingleAltHeader(choice, loopExpr, alts, preamble, iteration) ::= "<! Required but unused. !>"
+LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+<preamble; separator="\n">
+while (<loopExpr>) {
+ <alts; separator="\n">
+ setState(<choice.loopBackStateNumber>);
+ _errHandler->sync(this);
+ <iteration>
+}
+>>
+
+LL1PlusBlockSingleAltHeader(choice, loopExpr, alts, preamble, iteration) ::= "<! Required but unused. !>"
+LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= <<
+setState(<choice.blockStartStateNumber>); <! alt block decision !>
+_errHandler->sync(this);
+<preamble; separator="\n">
+do {
+ <alts; separator="\n">
+ setState(<choice.stateNumber>); <! loopback/exit decision !>
+ _errHandler->sync(this);
+ <iteration>
+} while (<loopExpr>);
+>>
+
+// LL(*) stuff
+
+AltBlockHeader(choice, preamble, alts, error) ::= "<! Unused but must be present. !>"
+AltBlock(choice, preamble, alts, error) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+<! TODO: untested !><if (choice.label)><labelref(choice.label)> = _input->LT(1);<endif>
+<! TODO: untested !><preamble; separator = "\n">
+switch (getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx)) {
+<alts: {alt | case <i>: {
+ <alt>
+ break;
+\}
+}; separator="\n">
+default:
+ break;
+}
+>>
+
+OptionalBlockHeader(choice, alts, error) ::= "<! Unused but must be present. !>"
+OptionalBlock(choice, alts, error) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+
+switch (getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx)) {
+<alts: {alt | case <i><if (!choice.ast.greedy)> + 1<endif>: {
+ <alt>
+ break;
+\}
+}; separator = "\n">
+default:
+ break;
+}
+>>
+
+StarBlockHeader(choice, alts, sync, iteration) ::= "<! Unused but must be present. !>"
+StarBlock(choice, alts, sync, iteration) ::= <<
+setState(<choice.stateNumber>);
+_errHandler->sync(this);
+alt = getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx);
+while (alt != <choice.exitAlt> && alt != atn::ATN::INVALID_ALT_NUMBER) {
+ if (alt == 1<if(!choice.ast.greedy)> + 1<endif>) {
+ <iteration>
+ <alts> <! should only be one !>
+ }
+ setState(<choice.loopBackStateNumber>);
+ _errHandler->sync(this);
+ alt = getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx);
+}
+>>
+
+PlusBlockHeader(choice, alts, error) ::= "<! Required to exist, but unused. !>"
+PlusBlock(choice, alts, error) ::= <<
+setState(<choice.blockStartStateNumber>); <! alt block decision !>
+_errHandler->sync(this);
+alt = 1<if(!choice.ast.greedy)> + 1<endif>;
+do {
+ switch (alt) {
+ <alts: {alt | case <i><if (!choice.ast.greedy)> + 1<endif>: {
+ <alt>
+ break;
+ \}
+}; separator="\n">
+ default:
+ <error>
+ }
+ setState(<choice.loopBackStateNumber>); <! loopback/exit decision !>
+ _errHandler->sync(this);
+ alt = getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx);
+} while (alt != <choice.exitAlt> && alt != atn::ATN::INVALID_ALT_NUMBER);
+>>
+
+Sync(s) ::= "Sync(s) sync(<s.expecting.name>);"
+
+ThrowNoViableAltHeader(t) ::= "<! Unused but must be present. !>"
+ThrowNoViableAlt(t) ::= "throw NoViableAltException(this);"
+
+TestSetInlineHeader(s) ::= "<! Required but unused. !>"
+TestSetInline(s) ::= <<
+<s.bitsets: {bits | <if (rest(rest(bits.tokens)))><bitsetBitfieldComparison(s, bits)><else><bitsetInlineComparison(s, bits)><endif>}; separator=" || ">
+>>
+
+// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test
+testShiftInRange(shiftAmount) ::= <<
+((<shiftAmount> & ~ 0x3fULL) == 0)
+>>
+
+bitsetBitfieldComparison(s, bits) ::= <<
+(<testShiftInRange({<offsetShift(s.varName, bits.shift)>})> &&
+ ((1ULL \<\< <offsetShift(s.varName, bits.shift)>) & <bits.calculated>) != 0)
+>>
+
+isZero ::= [
+ "0": true,
+ default: false
+]
+
+offsetShift(shiftAmount, offset, prefix = false) ::= <%
+<if (!isZero.(offset))>(<if (prefix)><parser.name>::TOKEN_<endif><shiftAmount> - <offset>)<else><if (prefix)><parser.name>::TOKEN_<endif><shiftAmount><endif>
+%>
+
+bitsetInlineComparison(s, bits) ::= <%
+<bits.tokens: {t | <s.varName> == <parser.name>::TOKEN_<t.name>}; separator = "\n\n|| ">
+%>
+
+cases(tokens) ::= <<
+<tokens: {t | case <parser.name>::TOKEN_<t.name>:}; separator="\n">
+>>
+
+InvokeRuleHeader(r, argExprsChunks) ::= "InvokeRuleHeader"
+InvokeRule(r, argExprsChunks) ::= <<
+setState(<r.stateNumber>);
+<if(r.labels)><r.labels: {l | <labelref(l)> = }><endif><r.escapedName>(<if(r.ast.options.p)><r.ast.options.p><if(argExprsChunks)>,<endif><endif><argExprsChunks>);
+>>
+
+MatchTokenHeader(m) ::= "<! Required but unused. !>"
+MatchToken(m) ::= <<
+setState(<m.stateNumber>);
+<if (m.labels)><m.labels: {l | <labelref(l)> = }><endif>match(<parser.name>::TOKEN_<m.escapedName>);
+>>
+
+MatchSetHeader(m, expr, capture) ::= "<! Required but unused. !>"
+MatchSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, false)>"
+
+MatchNotSetHeader(m, expr, capture) ::= "<! Required but unused. !>"
+MatchNotSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, true)>"
+
+CommonSetStuff(m, expr, capture, invert) ::= <<
+setState(<m.stateNumber>);
+<if (m.labels)><m.labels: {l | <labelref(l)> = }>_input->LT(1);<endif>
+<capture>
+if (<if (invert)><m.varName> == 0 || <m.varName> == Token::EOF || <else>!<endif>(<expr>)) {
+ <if (m.labels)><m.labels: {l | <labelref(l)> = }><endif>_errHandler->recoverInline(this);
+}
+else {
+ _errHandler->reportMatch(this);
+ consume();
+}
+>>
+
+WildcardHeader(w) ::= "<! Required but unused. !>"
+Wildcard(w) ::= <<
+setState(<w.stateNumber>);
+<if (w.labels)><w.labels: {l | <labelref(l)> = }><endif>matchWildcard();
+>>
+
+// ACTION STUFF
+
+ActionHeader(a, foo, chunks) ::= "<chunks>"
+Action(a, foo, chunks) ::= "<chunks>"
+
+ArgAction(a, chunks) ::= "ArgAction(a, chunks) <chunks>"
+
+SemPredHeader(p, chunks, failChunks) ::= "<! Required but unused. !>"
+SemPred(p, chunks, failChunks) ::= <<
+setState(<p.stateNumber>);
+
+if (!(<chunks>)) throw FailedPredicateException(this, <p.predicate><if (failChunks)>, <failChunks><elseif (p.msg)>, <p.msg><endif>);
+>>
+
+ExceptionClauseHeader(e, catchArg, catchAction) ::= "<! Required but unused. !>"
+ExceptionClause(e, catchArg, catchAction) ::= <<
+catch (<catchArg>) {
+ <catchAction>
+}
+>>
+
+// Lexer actions are not associated with model objects.
+
+LexerSkipCommand() ::= "skip();"
+LexerMoreCommand() ::= "more();"
+LexerPopModeCommand() ::= "popMode();"
+
+LexerTypeCommand(arg, grammar) ::= "type = <grammar.name>::<arg>;"
+LexerChannelCommand(arg, grammar) ::= "channel = <arg>;"
+LexerModeCommand(arg, grammar) ::= "mode = <grammar.name>Mode::<arg>;"
+LexerPushModeCommand(arg, grammar) ::= "pushMode(<grammar.name>Mode::<arg>);"
+
+ActionTextHeader(t) ::= "<t.text>"
+ActionText(t) ::= "<t.text>"
+
+ActionTemplateHeader(t) ::= "<! Required but unused. !>"
+ActionTemplate(t) ::= "<t.st>"
+
+ArgRefHeader(t) ::= "<! Required but unused. !>"
+ArgRef(a) ::= "_localctx-><a.escapedName>"
+
+LocalRefHeader(t) ::= "<! Required but unused. !>"
+LocalRef(a) ::= "_localctx-><a.escapedName>"
+
+RetValueRefHeader(t) ::= "<! Required but unused. !>"
+RetValueRef(a) ::= "_localctx-><a.escapedName>"
+
+QRetValueRefHeader(t) ::= "<! Required but unused. !>"
+QRetValueRef(a) ::= "<ctx(a)>-><a.dict>-><a.escapedName>"
+/** How to translate $tokenLabel */
+
+TokenRefHeader(t) ::= "<! Required but unused. !>"
+TokenRef(t) ::= "<ctx(t)>-><t.escapedName>"
+
+LabelRefHeader(t) ::= "<! Required but unused. !>"
+LabelRef(t) ::= "<ctx(t)>-><t.escapedName>"
+
+ListLabelRefHeader(t) ::= "<! Required but unused. !>"
+ListLabelRef(t) ::= "<ctx(t)>-><ListLabelName(t.escapedName)>"
+
+SetAttrHeader(t) ::= "<! Required but unused. !>"
+SetAttr(s,rhsChunks) ::= "<ctx(s)>-><s.escapedName> = <rhsChunks>;"
+
+InputSymbolType() ::= "<file.InputSymbolType; null = {Token}> *"
+
+TokenPropertyRef_textHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_text(t) ::= <<(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getText() : "")>>
+
+TokenPropertyRef_typeHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_type(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getType() : 0)"
+
+TokenPropertyRef_lineHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_line(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getLine() : 0)"
+
+TokenPropertyRef_posHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_pos(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getCharPositionInLine() : 0)"
+
+TokenPropertyRef_channelHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_channel(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getChannel() : 0)"
+
+TokenPropertyRef_indexHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_index(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getTokenIndex() : 0)"
+
+TokenPropertyRef_intHeader(t) ::= "<! Required but unused. !>"
+TokenPropertyRef_int(t) ::= "(<ctx(t)>-><t.label> != nullptr ? std::stoi(<ctx(t)>-><t.label>->getText()) : 0)"
+
+RulePropertyRef_startHeader(r) ::= "<! Required but unused. !>"
+RulePropertyRef_start(r) ::= "(<ctx(r)>-><r.label> != nullptr ? (<ctx(r)>-><r.label>->start) : nullptr)"
+
+RulePropertyRef_stopHeader(r) ::= "<! Required but unused. !>"
+RulePropertyRef_stop(r) ::= "(<ctx(r)>-><r.label> != nullptr ? (<ctx(r)>-><r.label>->stop) : nullptr)"
+
+RulePropertyRef_textHeader(r) ::= "<! Required but unused. !>"
+RulePropertyRef_text(r) ::= "(<ctx(r)>-><r.label> != nullptr ? _input->getText(<ctx(r)>-><r.label>->start, <ctx(r)>-><r.label>->stop) : nullptr)"
+
+RulePropertyRef_ctxHeader(r) ::= "<! Required but unused. !>"
+RulePropertyRef_ctx(r) ::= "<ctx(r)>-><r.label>"
+
+ThisRulePropertyRef_start(r) ::= "ThisRulePropertyRef_start(r) _localctx->start"
+ThisRulePropertyRef_stop(r) ::= "ThisRulePropertyRef_stop(r) _localctx->stop"
+
+ThisRulePropertyRef_textHeader(r) ::= "<! Required but unused. !>"
+ThisRulePropertyRef_text(r) ::= "_input->getText(_localctx->start, _input->LT(-1))"
+
+ThisRulePropertyRef_ctxHeader(r) ::= "<! Required but unused. !>"
+ThisRulePropertyRef_ctx(r) ::= "_localctx"
+
+ThisRulePropertyRef_parserHeader(r) ::= "<! Required but unused. !>"
+ThisRulePropertyRef_parser(r) ::= "this"
+
+NonLocalAttrRef(s) ::= "NonLocalAttrRef(s) ((<s.ruleName; format=\"cap\">Context)getInvokingContext(<s.ruleIndex>)).<s.escapedName>"
+SetNonLocalAttr(s, rhsChunks) ::=
+ "SetNonLocalAttr(s, rhsChunks) ((<s.ruleName; format=\"cap\">Context)getInvokingContext(<s.ruleIndex>)).<s.escapedName> = <rhsChunks>;"
+
+AddToLabelListHeader(a) ::= "<! Required but unused. !>"
+AddToLabelList(a) ::= <<
+<ctx(a.label)>-><a.listName>.push_back(<labelref(a.label)>);
+>>
+
+TokenLabelType() ::= "<file.TokenLabelType; null = {Token}> *"
+
+TokenDeclHeader(t) ::= "antlr4::<TokenLabelType()><t.escapedName> = nullptr"
+TokenDecl(t) ::= "<! Variable Declaration !>"
+
+TokenTypeDeclHeader(t) ::= "<! Local Variable !>"
+TokenTypeDecl(t) ::= "size_t <t.escapedName> = 0;"
+
+TokenListDeclHeader(t) ::= "std::vector\<antlr4::Token *> <t.escapedName>"
+TokenListDecl(t) ::= "<! Variable Declaration !>"
+
+RuleContextDeclHeader(r) ::= "<parser.name>::<r.ctxName> *<r.escapedName> = nullptr"
+RuleContextDecl(r) ::= "<! Variable Declaration !>"
+
+RuleContextListDeclHeader(rdecl) ::= "std::vector\<<rdecl.ctxName> *> <rdecl.escapedName>"
+RuleContextListDecl(rdecl) ::= "<! Variable Declaration !>"
+
+ContextTokenGetterDeclHeader(t) ::= "antlr4::tree::TerminalNode *TOKEN_<t.escapedName>();"
+ContextTokenGetterDecl(t) ::= <<
+tree::TerminalNode* <parser.name>::<t.ctx.name>::TOKEN_<t.escapedName>() {
+ return getToken(<parser.name>::TOKEN_<t.escapedName>, 0);
+}
+
+>>
+
+ContextTokenListGetterDeclHeader(t) ::= "std::vector\<antlr4::tree::TerminalNode *> TOKEN_<t.escapedName>();"
+ContextTokenListGetterDecl(t) ::= <<
+std::vector\<tree::TerminalNode *> <parser.name>::<t.ctx.name>::TOKEN_<t.escapedName>() {
+ return getTokens(<parser.name>::TOKEN_<t.escapedName>);
+}
+
+>>
+
+ContextTokenListIndexedGetterDeclHeader(t) ::= "antlr4::tree::TerminalNode* TOKEN_<t.escapedName>(size_t i);"
+ContextTokenListIndexedGetterDecl(t) ::= <<
+tree::TerminalNode* <parser.name>::<t.ctx.name>::TOKEN_<t.escapedName>(size_t i) {
+ return getToken(<parser.name>::TOKEN_<t.escapedName>, i);
+}
+
+>>
+
+ContextRuleGetterDeclHeader(r) ::= "<r.ctxName> *<r.escapedName>();"
+ContextRuleGetterDecl(r) ::= <<
+<! Note: ctxName is the name of the context to return, while ctx is the owning context. !>
+<parser.name>::<r.ctxName>* <parser.name>::<r.ctx.name>::<r.escapedName>() {
+ return getRuleContext\<<parser.name>::<r.ctxName>\>(0);
+}
+
+>>
+
+ContextRuleListGetterDeclHeader(r) ::= "std::vector\<<r.ctxName> *> <r.escapedName>();"
+ContextRuleListGetterDecl(r) ::= <<
+std::vector\<<parser.name>::<r.ctxName> *> <parser.name>::<r.ctx.name>::<r.escapedName>() {
+ return getRuleContexts\<<parser.name>::<r.ctxName>\>();
+}
+
+>>
+
+ContextRuleListIndexedGetterDeclHeader(r) ::= "<r.ctxName>* <r.escapedName>(size_t i);"
+ContextRuleListIndexedGetterDecl(r) ::= <<
+<parser.name>::<r.ctxName>* <parser.name>::<r.ctx.name>::<r.escapedName>(size_t i) {
+ return getRuleContext\<<parser.name>::<r.ctxName>\>(i);
+}
+
+>>
+
+LexerRuleContext() ::= "antlr4::RuleContext"
+
+// The rule context name is the rule followed by a suffix; e.g. r becomes rContext.
+RuleContextNameSuffix() ::= "Context"
+
+ImplicitTokenLabel(tokenName) ::= <<
+<tokenName; format = "lower">Token
+>>
+
+ImplicitRuleLabel(ruleName) ::= "<ruleName>Context"
+ImplicitSetLabel(id) ::= "_tset<id>"
+ListLabelName(label) ::= "<label>"
+
+CaptureNextToken(d) ::= "CaptureNextToken(d) <d.varName> = _input->LT(1);"
+
+CaptureNextTokenTypeHeader(d) ::= "<! Required but unused. !>"
+CaptureNextTokenType(d) ::= "<d.varName> = _input->LA(1);"
+
+ListenerDispatchMethodHeader(method) ::= <<
+virtual void <if (method.isEnter)>enter<else>exit<endif>Rule(antlr4::tree::ParseTreeListener *listener) override;
+>>
+ListenerDispatchMethod(method) ::= <<
+void <parser.name>::<struct.escapedName>::<if (method.isEnter)>enter<else>exit<endif>Rule(tree::ParseTreeListener *listener) {
+ auto parserListener = dynamic_cast\<<parser.grammarName>Listener *>(listener);
+ if (parserListener != nullptr)
+ parserListener-><if(method.isEnter)>enter<else>exit<endif><struct.derivedFromName; format="cap">(this);
+}
+>>
+
+VisitorDispatchMethodHeader(method) ::= <<
+
+virtual std::any accept(antlr4::tree::ParseTreeVisitor *visitor) override;
+>>
+VisitorDispatchMethod(method) ::= <<
+
+std::any <parser.name>::<struct.escapedName>::accept(tree::ParseTreeVisitor *visitor) {
+ if (auto parserVisitor = dynamic_cast\<<parser.grammarName>Visitor*>(visitor))
+ return parserVisitor->visit<struct.derivedFromName; format="cap">(this);
+ else
+ return visitor->visitChildren(this);
+}
+>>
+
+AttributeDeclHeader(d) ::= "<d.type> <d.escapedName><if(d.initValue)> = <d.initValue><endif>"
+AttributeDecl(d) ::= "<d.type> <d.escapedName>"
+
+/** If we don't know location of label def x, use this template */
+labelref(x) ::= "<if (!x.isLocal)>antlrcpp::downCast\<<x.ctx.name> *>(_localctx)-><endif><x.escapedName>"
+
+/** For any action chunk, what is correctly-typed context struct ptr? */
+ctx(actionChunk) ::= "antlrcpp::downCast\<<actionChunk.ctx.name> *>(_localctx)"
+
+// used for left-recursive rules
+recRuleAltPredicate(ruleName,opPrec) ::= "precpred(_ctx, <opPrec>)"
+recRuleSetReturnAction(src,name) ::= "recRuleSetReturnAction(src,name) $<name>=$<src>.<name>;"
+recRuleSetStopToken() ::= "_ctx->stop = _input->LT(-1);"
+
+recRuleAltStartAction(ruleName, ctxName, label, isListLabel) ::= <<
+_localctx = _tracker.createInstance\<<ctxName>Context>(parentContext, parentState);
+<if(label)>
+<if(isListLabel)>
+_localctx-><label>.push_back(previousContext);
+<else>
+_localctx-><label> = previousContext;
+<endif>
+<endif>
+pushNewRecursionContext(_localctx, startState, Rule<ruleName; format = "cap">);
+>>
+
+// Separate context variable to avoid frequent pointer type casts.
+recRuleLabeledAltStartAction(ruleName, currentAltLabel, label, isListLabel) ::= <<
+auto newContext = _tracker.createInstance\<<currentAltLabel; format = "cap">Context>(_tracker.createInstance\<<ruleName; format="cap">Context>(parentContext, parentState));
+_localctx = newContext;
+<if(label)>
+<if(isListLabel)>
+newContext-><label>.push_back(previousContext);
+<else>
+newContext-><label> = previousContext;
+<endif>
+<endif>
+pushNewRecursionContext(newContext, startState, Rule<ruleName; format = "cap">);
+>>
+
+recRuleReplaceContext(ctxName) ::= <<
+_localctx = _tracker.createInstance\<<ctxName>Context>(_localctx);
+_ctx = _localctx;
+previousContext = _localctx;
+>>
+
+recRuleSetPrevCtx() ::= <<
+if (!_parseListeners.empty())
+ triggerExitRuleEvent();
+previousContext = _localctx;
+>>
+
+/** Using a type to init value map, try to init a type; if not in table
+ * must be an object, default value is "null".
+ */
+initValue(typeName) ::= <<
+<cppTypeInitMap.(typeName)>
+>> \ No newline at end of file
diff --git a/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
new file mode 100644
index 0000000000..c2a36eb758
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg
@@ -0,0 +1,344 @@
+/*
+ * [The "BSD license"]
+ * Copyright (c) 2016, Mike Lischke
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+codeFileExtension() ::= ".cpp"
+headerFileExtension() ::= ".h"
+
+fileHeader(grammarFileName, ANTLRVersion, header) ::= <<
+<header>
+
+// Generated from <grammarFileName> by ANTLR <ANTLRVersion>
+>>
+
+LexerFileHeader(file, lexer, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)>
+
+#pragma once
+
+<namedActions.preinclude>
+
+#include "antlr4-runtime.h"
+
+<namedActions.postinclude>
+
+<if(file.genPackage)>namespace <file.genPackage> {<endif>
+
+<lexer>
+
+<if (file.genPackage)>
+} // namespace <file.genPackage>
+<endif>
+>>
+
+LexerFile(file, lexer, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)>
+
+<namedActions.preinclude>
+
+#include "<file.lexer.name>.h"
+
+<namedActions.postinclude>
+
+using namespace antlr4;
+
+<if (file.genPackage)>using namespace <file.genPackage>;<endif>
+
+<lexer>
+
+>>
+
+ParserFileHeader(file, parser, namedActions, contextSuperClass) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)>
+
+#pragma once
+
+<namedActions.preinclude>
+
+#include "antlr4-runtime.h"
+
+<namedActions.postinclude>
+
+<if (file.genPackage)>namespace <file.genPackage> {<endif>
+
+<parser>
+
+<if (file.genPackage)>
+} // namespace <file.genPackage>
+<endif>
+>>
+
+ParserFile(file, parser, namedActions, contextSuperClass) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)>
+
+<namedActions.preinclude>
+
+<if (file.genListener)>#include "<file.grammarName>Listener.h"<endif>
+<if (file.genVisitor)>#include "<file.grammarName>Visitor.h"<endif>
+
+#include "<file.parser.name>.h"
+
+<namedActions.postinclude>
+
+using namespace antlrcpp;
+<if (file.genPackage)>using namespace <file.genPackage>;<endif>
+<parser>
+
+>>
+
+BaseListenerFileHeader(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+#pragma once
+
+<namedActions.baselistenerpreinclude>
+
+#include "antlr4-runtime.h"
+#include "<file.grammarName>Listener.h"
+
+<namedActions.baselistenerpostinclude>
+
+<if(file.genPackage)>
+namespace <file.genPackage> {
+<endif>
+
+/**
+ * This class provides an empty implementation of <file.grammarName>Listener,
+ * which can be extended to create a listener which only needs to handle a subset
+ * of the available methods.
+ */
+class <file.exportMacro> <file.grammarName>BaseListener : public <file.grammarName>Listener {
+public:
+<namedActions.baselistenerdeclarations>
+
+<file.listenerNames: {lname |
+ virtual void enter<lname; format="cap">(<file.parserName>::<lname; format = "cap">Context * /*ctx*/) override { \}
+ virtual void exit<lname; format="cap">(<file.parserName>::<lname; format = "cap">Context * /*ctx*/) override { \}
+}; separator="\n">
+
+ virtual void enterEveryRule(antlr4::ParserRuleContext * /*ctx*/) override { }
+ virtual void exitEveryRule(antlr4::ParserRuleContext * /*ctx*/) override { }
+ virtual void visitTerminal(antlr4::tree::TerminalNode * /*node*/) override { }
+ virtual void visitErrorNode(antlr4::tree::ErrorNode * /*node*/) override { }
+
+<if (namedActions.baselistenermembers)>
+private:
+<namedActions.baselistenermembers>
+<endif>
+};
+
+<if (file.genPackage)>
+} // namespace <file.genPackage>
+<endif>
+>>
+
+BaseListenerFile(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+<namedActions.baselistenerpreinclude>
+
+#include "<file.grammarName>BaseListener.h"
+
+<namedActions.baselistenerpostinclude>
+
+<if(file.genPackage)>
+using namespace <file.genPackage>;
+<endif>
+
+<namedActions.baselistenerdefinitions>
+>>
+
+ListenerFileHeader(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+#pragma once
+
+<namedActions.listenerpreinclude>
+
+#include "antlr4-runtime.h"
+#include "<file.parserName>.h"
+
+<namedActions.listenerpostinclude>
+
+<if(file.genPackage)>
+namespace <file.genPackage> {
+<endif>
+
+/**
+ * This interface defines an abstract listener for a parse tree produced by <file.parserName>.
+ */
+class <file.exportMacro> <file.grammarName>Listener : public antlr4::tree::ParseTreeListener {
+public:
+<namedActions.listenerdeclarations>
+
+<file.listenerNames: {lname |
+ virtual void enter<lname; format = "cap">(<file.parserName>::<lname; format ="cap">Context *ctx) = 0;
+ virtual void exit<lname; format = "cap">(<file.parserName>::<lname; format="cap">Context *ctx) = 0;
+}; separator = "\n">
+
+<if (namedActions.listenermembers)>
+private:
+<namedActions.listenermembers>
+<endif>
+};
+
+<if(file.genPackage)>
+} // namespace <file.genPackage>
+<endif>
+>>
+
+ListenerFile(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+<namedActions.listenerpreinclude>
+
+#include "<file.grammarName>Listener.h"
+
+<namedActions.listenerpostinclude>
+
+<if(file.genPackage)>
+using namespace <file.genPackage>;
+<endif>
+
+<namedActions.listenerdefinitions>
+>>
+
+BaseVisitorFileHeader(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+#pragma once
+
+<namedActions.basevisitorpreinclude>
+
+#include "antlr4-runtime.h"
+#include "<file.grammarName>Visitor.h"
+
+<namedActions.basevisitorpostinclude>
+
+<if(file.genPackage)>
+namespace <file.genPackage> {
+<endif>
+
+/**
+ * This class provides an empty implementation of <file.grammarName>Visitor, which can be
+ * extended to create a visitor which only needs to handle a subset of the available methods.
+ */
+class <file.exportMacro> <file.grammarName>BaseVisitor : public <file.grammarName>Visitor {
+public:
+<namedActions.basevisitordeclarations>
+
+<file.visitorNames: { lname |
+ virtual std::any visit<lname; format = "cap">(<file.parserName>::<lname; format = "cap">Context *ctx) override {
+ return visitChildren(ctx);
+ \}
+}; separator="\n">
+
+<if (namedActions.basevisitormembers)>
+private:
+<namedActions.basevisitormembers>
+<endif>
+};
+
+<if(file.genPackage)>
+} // namespace <file.genPackage>
+<endif>
+>>
+
+BaseVisitorFile(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+<namedActions.basevisitorpreinclude>
+
+#include "<file.grammarName>BaseVisitor.h"
+
+<namedActions.basevisitorpostinclude>
+
+<if(file.genPackage)>
+using namespace <file.genPackage>;
+<endif>
+
+<namedActions.basevisitordefinitions>
+
+>>
+
+VisitorFileHeader(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+#pragma once
+
+<namedActions.visitorpreinclude>
+
+#include "antlr4-runtime.h"
+#include "<file.parserName>.h"
+
+<namedActions.visitorpostinclude>
+
+<if(file.genPackage)>namespace <file.genPackage> {<endif>
+
+/**
+ * This class defines an abstract visitor for a parse tree
+ * produced by <file.parserName>.
+ */
+class <file.exportMacro> <file.grammarName>Visitor : public antlr4::tree::AbstractParseTreeVisitor {
+public:
+ <namedActions.visitordeclarations>
+
+ /**
+ * Visit parse trees produced by <file.parserName>.
+ */
+ <file.visitorNames: {lname |
+ virtual std::any visit<lname; format = "cap">(<file.parserName>::<lname; format = "cap">Context *context) = 0;
+ }; separator="\n">
+
+<if (namedActions.visitormembers)>
+private:
+<namedActions.visitormembers>
+<endif>
+};
+
+<if(file.genPackage)>
+} // namespace <file.genPackage>
+<endif>
+>>
+
+VisitorFile(file, header, namedActions) ::= <<
+<fileHeader(file.grammarFileName, file.ANTLRVersion, header)>
+
+<namedActions.visitorpreinclude>
+
+#include "<file.grammarName>Visitor.h"
+
+<namedActions.visitorpostinclude>
+
+<if(file.genPackage)>
+using namespace <file.genPackage>;
+<endif>
+
+<namedActions.visitordefinitions>
+
+>> \ No newline at end of file
diff --git a/yql/essentials/parser/antlr_ast/ya.make b/yql/essentials/parser/antlr_ast/ya.make
new file mode 100644
index 0000000000..d3b33a2dfa
--- /dev/null
+++ b/yql/essentials/parser/antlr_ast/ya.make
@@ -0,0 +1,4 @@
+RECURSE(
+ gen
+)
+
diff --git a/yql/essentials/parser/ya.make b/yql/essentials/parser/ya.make
index 7f1b0fe700..c1cacaa6a8 100644
--- a/yql/essentials/parser/ya.make
+++ b/yql/essentials/parser/ya.make
@@ -1,4 +1,5 @@
RECURSE(
+ antlr_ast
lexer_common
pg_catalog
pg_wrapper
diff --git a/yql/essentials/sql/v1/complete/c3_engine.h b/yql/essentials/sql/v1/complete/c3_engine.h
new file mode 100644
index 0000000000..8b729f880b
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/c3_engine.h
@@ -0,0 +1,116 @@
+#pragma once
+
+#include "sql_antlr4.h"
+#include "string_util.h"
+
+#include <contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h>
+#include <contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h>
+#include <contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h>
+#include <contrib/libs/antlr4-c3/src/CodeCompletionCore.hpp>
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+#include <vector>
+#include <unordered_set>
+
+namespace NSQLComplete {
+
+ // std::vector is used to prevent copying from c3 results
+ struct TSuggestedToken {
+ TTokenId Number;
+ std::vector<TRuleId> ParserCallStack;
+ };
+
+ class IC3Engine {
+ public:
+ using TPtr = THolder<IC3Engine>;
+
+ // std::unordered_set is used to prevent copying into c3 core
+ struct TConfig {
+ std::unordered_set<TTokenId> IgnoredTokens;
+ std::unordered_set<TRuleId> PreferredRules;
+ };
+
+ virtual TVector<TSuggestedToken> Complete(TStringBuf queryPrefix) = 0;
+ virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0;
+ virtual ~IC3Engine() = default;
+ };
+
+ template <class Lexer, class Parser>
+ struct TAntlrGrammar {
+ using TLexer = Lexer;
+ using TParser = Parser;
+
+ TAntlrGrammar() = delete;
+ };
+
+ template <class G>
+ class TC3Engine: public IC3Engine {
+ public:
+ explicit TC3Engine(TConfig config)
+ : Chars()
+ , Lexer(&Chars)
+ , Tokens(&Lexer)
+ , Parser(&Tokens)
+ , CompletionCore(&Parser)
+ {
+ Lexer.removeErrorListeners();
+ Parser.removeErrorListeners();
+
+ CompletionCore.ignoredTokens = std::move(config.IgnoredTokens);
+ CompletionCore.preferredRules = std::move(config.PreferredRules);
+ }
+
+ TVector<TSuggestedToken> Complete(TStringBuf queryPrefix) override {
+ Assign(queryPrefix);
+ const auto caretTokenIndex = CaretTokenIndex(queryPrefix);
+ auto candidates = CompletionCore.collectCandidates(caretTokenIndex);
+ return Converted(std::move(candidates));
+ }
+
+ const antlr4::dfa::Vocabulary& GetVocabulary() const override {
+ return Lexer.getVocabulary();
+ }
+
+ private:
+ void Assign(TStringBuf queryPrefix) {
+ Chars.load(queryPrefix.Data(), queryPrefix.Size(), /* lenient = */ false);
+ Lexer.reset();
+ Tokens.setTokenSource(&Lexer);
+
+ Tokens.fill();
+ }
+
+ size_t CaretTokenIndex(TStringBuf queryPrefix) {
+ const auto tokensCount = Tokens.size();
+ if (2 <= tokensCount && !LastWord(queryPrefix).Empty()) {
+ return tokensCount - 2;
+ }
+ return tokensCount - 1;
+ }
+
+ static TVector<TSuggestedToken> Converted(c3::CandidatesCollection candidates) {
+ TVector<TSuggestedToken> converted;
+ for (const auto& [token, _] : candidates.tokens) {
+ std::vector<TRuleId> parserCallStack;
+
+ if (
+ auto rules = candidates.rules.find(token);
+ rules != std::end(candidates.rules)) {
+ parserCallStack = std::move(rules->second.ruleList);
+ }
+
+ converted.emplace_back(token, std::move(parserCallStack));
+ }
+ return converted;
+ }
+
+ antlr4::ANTLRInputStream Chars;
+ G::TLexer Lexer;
+ antlr4::BufferedTokenStream Tokens;
+ G::TParser Parser;
+ c3::CodeCompletionCore CompletionCore;
+ };
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_antlr4.cpp b/yql/essentials/sql/v1/complete/sql_antlr4.cpp
new file mode 100644
index 0000000000..33c847f3e2
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_antlr4.cpp
@@ -0,0 +1,116 @@
+#include "sql_antlr4.h"
+
+#include <yql/essentials/sql/v1/format/sql_format.h>
+
+#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h>
+#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h>
+
+#define RULE_(mode, name) NALA##mode##Antlr4::SQLv1Antlr4Parser::Rule##name
+
+#define RULE(name) RULE_(Default, name)
+
+#define STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(name) \
+ static_assert(RULE_(Default, name) == RULE_(Ansi, name))
+
+namespace NSQLComplete {
+
+ class TSqlGrammar: public ISqlGrammar {
+ public:
+ TSqlGrammar(bool isAnsiLexer)
+ : Vocabulary(GetVocabulary(isAnsiLexer))
+ , AllTokens(ComputeAllTokens())
+ , KeywordTokens(ComputeKeywordTokens())
+ {
+ }
+
+ const antlr4::dfa::Vocabulary& GetVocabulary() const override {
+ return *Vocabulary;
+ }
+
+ const std::unordered_set<TTokenId>& GetAllTokens() const override {
+ return AllTokens;
+ }
+
+ const std::unordered_set<TTokenId>& GetKeywordTokens() const override {
+ return KeywordTokens;
+ }
+
+ const TVector<TRuleId>& GetKeywordRules() const override {
+ static const TVector<TRuleId> KeywordRules = {
+ RULE(Keyword),
+ RULE(Keyword_expr_uncompat),
+ RULE(Keyword_table_uncompat),
+ RULE(Keyword_select_uncompat),
+ RULE(Keyword_alter_uncompat),
+ RULE(Keyword_in_uncompat),
+ RULE(Keyword_window_uncompat),
+ RULE(Keyword_hint_uncompat),
+ RULE(Keyword_as_compat),
+ RULE(Keyword_compat),
+ };
+
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_expr_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_table_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_select_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_alter_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_in_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_window_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_hint_uncompat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_as_compat);
+ STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_compat);
+
+ return KeywordRules;
+ }
+
+ private:
+ static const antlr4::dfa::Vocabulary* GetVocabulary(bool isAnsiLexer) {
+ if (isAnsiLexer) { // Taking a reference is okay as vocabulary storage is static
+ return &NALAAnsiAntlr4::SQLv1Antlr4Parser(nullptr).getVocabulary();
+ }
+ return &NALADefaultAntlr4::SQLv1Antlr4Parser(nullptr).getVocabulary();
+ }
+
+ std::unordered_set<TTokenId> ComputeAllTokens() {
+ const auto& vocabulary = GetVocabulary();
+
+ std::unordered_set<TTokenId> allTokens;
+
+ for (size_t type = 1; type <= vocabulary.getMaxTokenType(); ++type) {
+ allTokens.emplace(type);
+ }
+
+ return allTokens;
+ }
+
+ std::unordered_set<TTokenId> ComputeKeywordTokens() {
+ const auto& vocabulary = GetVocabulary();
+ const auto keywords = NSQLFormat::GetKeywords();
+
+ auto keywordTokens = GetAllTokens();
+ std::erase_if(keywordTokens, [&](TTokenId token) {
+ return !keywords.contains(vocabulary.getSymbolicName(token));
+ });
+ keywordTokens.erase(TOKEN_EOF);
+
+ return keywordTokens;
+ }
+
+ const antlr4::dfa::Vocabulary* Vocabulary;
+ const std::unordered_set<TTokenId> AllTokens;
+ const std::unordered_set<TTokenId> KeywordTokens;
+ };
+
+ const ISqlGrammar& GetSqlGrammar(bool isAnsiLexer) {
+ const static TSqlGrammar DefaultSqlGrammar(/* isAnsiLexer = */ false);
+ const static TSqlGrammar AnsiSqlGrammar(/* isAnsiLexer = */ true);
+
+ if (isAnsiLexer) {
+ return AnsiSqlGrammar;
+ }
+ return DefaultSqlGrammar;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_antlr4.h b/yql/essentials/sql/v1/complete/sql_antlr4.h
new file mode 100644
index 0000000000..3d3c4c024a
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_antlr4.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "sql_syntax.h"
+
+#include <contrib/libs/antlr4_cpp_runtime/src/Token.h>
+#include <contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h>
+
+#include <unordered_set>
+
+namespace NSQLComplete {
+
+ using TTokenId = size_t;
+ using TRuleId = size_t;
+
+ constexpr TTokenId TOKEN_EOF = antlr4::Token::EOF;
+
+ class ISqlGrammar {
+ public:
+ virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0;
+ virtual const std::unordered_set<TTokenId>& GetAllTokens() const = 0;
+ virtual const std::unordered_set<TTokenId>& GetKeywordTokens() const = 0;
+ virtual const TVector<TRuleId>& GetKeywordRules() const = 0;
+ virtual ~ISqlGrammar() = default;
+ };
+
+ const ISqlGrammar& GetSqlGrammar(bool isAnsiLexer);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp
new file mode 100644
index 0000000000..2a16a250e5
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_complete.cpp
@@ -0,0 +1,89 @@
+#include "sql_complete.h"
+
+#include "sql_context.h"
+#include "string_util.h"
+
+#include <util/generic/algorithm.h>
+#include <util/charset/utf8.h>
+
+namespace NSQLComplete {
+
+ class TSqlCompletionEngine: public ISqlCompletionEngine {
+ public:
+ TSqlCompletionEngine()
+ : ContextInference(MakeSqlContextInference())
+ {
+ }
+
+ TCompletion Complete(TCompletionInput input) {
+ auto prefix = input.Text.Head(input.CursorPosition);
+ auto completedToken = GetCompletedToken(prefix);
+
+ auto context = ContextInference->Analyze(input);
+
+ TVector<TCandidate> candidates;
+ EnrichWithKeywords(candidates, context.Keywords);
+
+ FilterByContent(candidates, completedToken.Content);
+
+ RankingSort(candidates);
+
+ return {
+ .CompletedToken = std::move(completedToken),
+ .Candidates = std::move(candidates),
+ };
+ }
+
+ private:
+ TCompletedToken GetCompletedToken(TStringBuf prefix) {
+ return {
+ .Content = LastWord(prefix),
+ .SourcePosition = LastWordIndex(prefix),
+ };
+ }
+
+ void EnrichWithKeywords(TVector<TCandidate>& candidates, TVector<TString> keywords) {
+ for (auto keyword : keywords) {
+ candidates.push_back({
+ .Kind = ECandidateKind::Keyword,
+ .Content = std::move(keyword),
+ });
+ }
+ }
+
+ void FilterByContent(TVector<TCandidate>& candidates, TStringBuf prefix) {
+ const auto lowerPrefix = ToLowerUTF8(prefix);
+ auto removed = std::ranges::remove_if(candidates, [&](const auto& candidate) {
+ return !ToLowerUTF8(candidate.Content).StartsWith(lowerPrefix);
+ });
+ candidates.erase(std::begin(removed), std::end(removed));
+ }
+
+ void RankingSort(TVector<TCandidate>& candidates) {
+ Sort(candidates, [](const TCandidate& lhs, const TCandidate& rhs) {
+ return std::tie(lhs.Kind, lhs.Content) < std::tie(rhs.Kind, rhs.Content);
+ });
+ }
+
+ ISqlContextInference::TPtr ContextInference;
+ };
+
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngine() {
+ return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine());
+ }
+
+} // namespace NSQLComplete
+
+template <>
+void Out<NSQLComplete::ECandidateKind>(IOutputStream& out, NSQLComplete::ECandidateKind kind) {
+ switch (kind) {
+ case NSQLComplete::ECandidateKind::Keyword:
+ out << "Keyword";
+ break;
+ }
+}
+
+template <>
+void Out<NSQLComplete::TCandidate>(IOutputStream& out, const NSQLComplete::TCandidate& candidate) {
+ out << "(" << candidate.Kind << ": " << candidate.Content << ")";
+}
diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h
new file mode 100644
index 0000000000..99e74cce7a
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_complete.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+namespace NSQLComplete {
+
+ struct TCompletionInput {
+ TStringBuf Text;
+ size_t CursorPosition = Text.length();
+ };
+
+ struct TCompletedToken {
+ TStringBuf Content;
+ size_t SourcePosition;
+ };
+
+ enum class ECandidateKind {
+ Keyword,
+ };
+
+ struct TCandidate {
+ ECandidateKind Kind;
+ TString Content;
+
+ friend bool operator==(const TCandidate& lhs, const TCandidate& rhs) = default;
+ };
+
+ struct TCompletion {
+ TCompletedToken CompletedToken;
+ TVector<TCandidate> Candidates;
+ };
+
+ class ISqlCompletionEngine {
+ public:
+ using TPtr = THolder<ISqlCompletionEngine>;
+
+ virtual TCompletion Complete(TCompletionInput input) = 0;
+ virtual ~ISqlCompletionEngine() = default;
+ };
+
+ ISqlCompletionEngine::TPtr MakeSqlCompletionEngine();
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
new file mode 100644
index 0000000000..e0a012f9f6
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp
@@ -0,0 +1,323 @@
+#include "sql_complete.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NSQLComplete;
+
+Y_UNIT_TEST_SUITE(SqlCompleteTests) {
+ using ECandidateKind::Keyword;
+
+ TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) {
+ return engine->Complete({prefix}).Candidates;
+ }
+
+ Y_UNIT_TEST(Beginning) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ALTER"},
+ {Keyword, "ANALYZE"},
+ {Keyword, "BACKUP"},
+ {Keyword, "BATCH"},
+ {Keyword, "COMMIT"},
+ {Keyword, "CREATE"},
+ {Keyword, "DECLARE"},
+ {Keyword, "DEFINE"},
+ {Keyword, "DELETE"},
+ {Keyword, "DISCARD"},
+ {Keyword, "DO"},
+ {Keyword, "DROP"},
+ {Keyword, "EVALUATE"},
+ {Keyword, "EXPLAIN"},
+ {Keyword, "EXPORT"},
+ {Keyword, "FOR"},
+ {Keyword, "FROM"},
+ {Keyword, "GRANT"},
+ {Keyword, "IF"},
+ {Keyword, "IMPORT"},
+ {Keyword, "INSERT"},
+ {Keyword, "PARALLEL"},
+ {Keyword, "PRAGMA"},
+ {Keyword, "PROCESS"},
+ {Keyword, "REDUCE"},
+ {Keyword, "REPLACE"},
+ {Keyword, "RESTORE"},
+ {Keyword, "REVOKE"},
+ {Keyword, "ROLLBACK"},
+ {Keyword, "SELECT"},
+ {Keyword, "SHOW"},
+ {Keyword, "UPDATE"},
+ {Keyword, "UPSERT"},
+ {Keyword, "USE"},
+ {Keyword, "VALUES"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {";"}), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"; "}), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" ; "}), expected);
+ }
+
+ Y_UNIT_TEST(Alter) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ASYNC"},
+ {Keyword, "BACKUP"},
+ {Keyword, "DATABASE"},
+ {Keyword, "EXTERNAL"},
+ {Keyword, "GROUP"},
+ {Keyword, "OBJECT"},
+ {Keyword, "RESOURCE"},
+ {Keyword, "SEQUENCE"},
+ {Keyword, "TABLE"},
+ {Keyword, "TABLESTORE"},
+ {Keyword, "TOPIC"},
+ {Keyword, "TRANSFER"},
+ {Keyword, "USER"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected);
+ }
+
+ Y_UNIT_TEST(Create) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ASYNC"},
+ {Keyword, "BACKUP"},
+ {Keyword, "EXTERNAL"},
+ {Keyword, "GROUP"},
+ {Keyword, "OBJECT"},
+ {Keyword, "OR"},
+ {Keyword, "RESOURCE"},
+ {Keyword, "TABLE"},
+ {Keyword, "TABLESTORE"},
+ {Keyword, "TEMP"},
+ {Keyword, "TEMPORARY"},
+ {Keyword, "TOPIC"},
+ {Keyword, "TRANSFER"},
+ {Keyword, "USER"},
+ {Keyword, "VIEW"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected);
+ }
+
+ Y_UNIT_TEST(Delete) {
+ TVector<TCandidate> expected = {
+ {Keyword, "FROM"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected);
+ }
+
+ Y_UNIT_TEST(Drop) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ASYNC"},
+ {Keyword, "BACKUP"},
+ {Keyword, "EXTERNAL"},
+ {Keyword, "GROUP"},
+ {Keyword, "OBJECT"},
+ {Keyword, "RESOURCE"},
+ {Keyword, "TABLE"},
+ {Keyword, "TABLESTORE"},
+ {Keyword, "TOPIC"},
+ {Keyword, "TRANSFER"},
+ {Keyword, "USER"},
+ {Keyword, "VIEW"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected);
+ }
+
+ Y_UNIT_TEST(Explain) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ALTER"},
+ {Keyword, "ANALYZE"},
+ {Keyword, "BACKUP"},
+ {Keyword, "BATCH"},
+ {Keyword, "COMMIT"},
+ {Keyword, "CREATE"},
+ {Keyword, "DECLARE"},
+ {Keyword, "DEFINE"},
+ {Keyword, "DELETE"},
+ {Keyword, "DISCARD"},
+ {Keyword, "DO"},
+ {Keyword, "DROP"},
+ {Keyword, "EVALUATE"},
+ {Keyword, "EXPORT"},
+ {Keyword, "FOR"},
+ {Keyword, "FROM"},
+ {Keyword, "GRANT"},
+ {Keyword, "IF"},
+ {Keyword, "IMPORT"},
+ {Keyword, "INSERT"},
+ {Keyword, "PARALLEL"},
+ {Keyword, "PRAGMA"},
+ {Keyword, "PROCESS"},
+ {Keyword, "QUERY"},
+ {Keyword, "REDUCE"},
+ {Keyword, "REPLACE"},
+ {Keyword, "RESTORE"},
+ {Keyword, "REVOKE"},
+ {Keyword, "ROLLBACK"},
+ {Keyword, "SELECT"},
+ {Keyword, "SHOW"},
+ {Keyword, "UPDATE"},
+ {Keyword, "UPSERT"},
+ {Keyword, "USE"},
+ {Keyword, "VALUES"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected);
+ }
+
+ Y_UNIT_TEST(Grant) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ALL"},
+ {Keyword, "ALTER"},
+ {Keyword, "CONNECT"},
+ {Keyword, "CREATE"},
+ {Keyword, "DESCRIBE"},
+ {Keyword, "DROP"},
+ {Keyword, "ERASE"},
+ {Keyword, "FULL"},
+ {Keyword, "GRANT"},
+ {Keyword, "INSERT"},
+ {Keyword, "LIST"},
+ {Keyword, "MANAGE"},
+ {Keyword, "MODIFY"},
+ {Keyword, "REMOVE"},
+ {Keyword, "SELECT"},
+ {Keyword, "UPDATE"},
+ {Keyword, "USE"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected);
+ }
+
+ Y_UNIT_TEST(Insert) {
+ TVector<TCandidate> expected = {
+ {Keyword, "INTO"},
+ {Keyword, "OR"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected);
+ }
+
+ Y_UNIT_TEST(Pragma) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ANSI"},
+ {Keyword, "CALLABLE"},
+ {Keyword, "DICT"},
+ {Keyword, "ENUM"},
+ {Keyword, "FLOW"},
+ {Keyword, "LIST"},
+ {Keyword, "OPTIONAL"},
+ {Keyword, "RESOURCE"},
+ {Keyword, "SET"},
+ {Keyword, "STRUCT"},
+ {Keyword, "TAGGED"},
+ {Keyword, "TUPLE"},
+ {Keyword, "VARIANT"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA "}), expected);
+ }
+
+ Y_UNIT_TEST(Select) {
+ TVector<TCandidate> expected = {
+ {Keyword, "ALL"},
+ {Keyword, "BITCAST"},
+ {Keyword, "CALLABLE"},
+ {Keyword, "CASE"},
+ {Keyword, "CAST"},
+ {Keyword, "CURRENT_DATE"},
+ {Keyword, "CURRENT_TIME"},
+ {Keyword, "CURRENT_TIMESTAMP"},
+ {Keyword, "DICT"},
+ {Keyword, "DISTINCT"},
+ {Keyword, "EMPTY_ACTION"},
+ {Keyword, "ENUM"},
+ {Keyword, "EXISTS"},
+ {Keyword, "FALSE"},
+ {Keyword, "FLOW"},
+ {Keyword, "JSON_EXISTS"},
+ {Keyword, "JSON_QUERY"},
+ {Keyword, "JSON_VALUE"},
+ {Keyword, "LIST"},
+ {Keyword, "NOT"},
+ {Keyword, "NULL"},
+ {Keyword, "OPTIONAL"},
+ {Keyword, "RESOURCE"},
+ {Keyword, "SET"},
+ {Keyword, "STREAM"},
+ {Keyword, "STRUCT"},
+ {Keyword, "TAGGED"},
+ {Keyword, "TRUE"},
+ {Keyword, "TUPLE"},
+ {Keyword, "VARIANT"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected);
+ }
+
+ Y_UNIT_TEST(Upsert) {
+ TVector<TCandidate> expected = {
+ {Keyword, "INTO"},
+ {Keyword, "OBJECT"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected);
+ }
+
+ Y_UNIT_TEST(UTF8Wide) {
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0);
+ }
+
+ Y_UNIT_TEST(WordBreak) {
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT ("}).size(), 28);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT (1)"}).size(), 30);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT 1;"}).size(), 35);
+ }
+
+ Y_UNIT_TEST(Typing) {
+ const auto queryUtf16 = TUtf16String::FromUtf8(
+ "SELECT \n"
+ " 123467, \"Hello, {name}! 编码\"}, \n"
+ " (1 + (5 * 1 / 0)), MIN(identifier), \n"
+ " Bool(field), Math::Sin(var) \n"
+ "FROM `local/test/space/table` JOIN test;");
+
+ auto engine = MakeSqlCompletionEngine();
+
+ for (std::size_t size = 0; size <= queryUtf16.size(); ++size) {
+ const TWtringBuf prefixUtf16(queryUtf16, 0, size);
+ auto completion = engine->Complete({TString::FromUtf16(prefixUtf16)});
+ Y_DO_NOT_OPTIMIZE_AWAY(completion);
+ }
+ }
+
+ Y_UNIT_TEST(CaseInsensitivity) {
+ TVector<TCandidate> expected = {
+ {Keyword, "SELECT"},
+ };
+
+ auto engine = MakeSqlCompletionEngine();
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected);
+ UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected);
+ }
+} // Y_UNIT_TEST_SUITE(SqlCompleteTests)
diff --git a/yql/essentials/sql/v1/complete/sql_context.cpp b/yql/essentials/sql/v1/complete/sql_context.cpp
new file mode 100644
index 0000000000..18f676e40b
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_context.cpp
@@ -0,0 +1,123 @@
+#include "sql_context.h"
+
+#include "c3_engine.h"
+#include "sql_syntax.h"
+
+#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h>
+#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h>
+
+#include <util/generic/algorithm.h>
+#include <util/stream/output.h>
+
+namespace NSQLComplete {
+
+ template <bool IsAnsiLexer>
+ class TSpecializedSqlContextInference: public ISqlContextInference {
+ private:
+ using TDefaultYQLGrammar = TAntlrGrammar<
+ NALADefaultAntlr4::SQLv1Antlr4Lexer,
+ NALADefaultAntlr4::SQLv1Antlr4Parser>;
+
+ using TAnsiYQLGrammar = TAntlrGrammar<
+ NALAAnsiAntlr4::SQLv1Antlr4Lexer,
+ NALAAnsiAntlr4::SQLv1Antlr4Parser>;
+
+ using G = std::conditional_t<
+ IsAnsiLexer,
+ TAnsiYQLGrammar,
+ TDefaultYQLGrammar>;
+
+ public:
+ TSpecializedSqlContextInference()
+ : Grammar(&GetSqlGrammar(IsAnsiLexer))
+ , C3(ComputeC3Config())
+ {
+ }
+
+ TCompletionContext Analyze(TCompletionInput input) override {
+ auto prefix = input.Text.Head(input.CursorPosition);
+ auto tokens = C3.Complete(prefix);
+ FilterIdKeywords(tokens);
+ return {
+ .Keywords = SiftedKeywords(tokens),
+ };
+ }
+
+ private:
+ IC3Engine::TConfig ComputeC3Config() {
+ return {
+ .IgnoredTokens = ComputeIgnoredTokens(),
+ .PreferredRules = ComputePreferredRules(),
+ };
+ }
+
+ std::unordered_set<TTokenId> ComputeIgnoredTokens() {
+ auto ignoredTokens = Grammar->GetAllTokens();
+ for (auto keywordToken : Grammar->GetKeywordTokens()) {
+ ignoredTokens.erase(keywordToken);
+ }
+ return ignoredTokens;
+ }
+
+ std::unordered_set<TRuleId> ComputePreferredRules() {
+ const auto& keywordRules = Grammar->GetKeywordRules();
+
+ std::unordered_set<TRuleId> preferredRules;
+ preferredRules.insert(std::begin(keywordRules), std::end(keywordRules));
+ return preferredRules;
+ }
+
+ void FilterIdKeywords(TVector<TSuggestedToken>& tokens) {
+ const auto& keywordRules = Grammar->GetKeywordRules();
+ auto [first, last] = std::ranges::remove_if(tokens, [&](const TSuggestedToken& token) {
+ return AnyOf(token.ParserCallStack, [&](TRuleId rule) {
+ return Find(keywordRules, rule) != std::end(keywordRules);
+ });
+ });
+ tokens.erase(first, last);
+ }
+
+ TVector<TString> SiftedKeywords(const TVector<TSuggestedToken>& tokens) {
+ const auto& vocabulary = Grammar->GetVocabulary();
+ const auto& keywordTokens = Grammar->GetKeywordTokens();
+
+ TVector<TString> keywords;
+ for (const auto& token : tokens) {
+ if (keywordTokens.contains(token.Number)) {
+ keywords.emplace_back(vocabulary.getDisplayName(token.Number));
+ }
+ }
+ return keywords;
+ }
+
+ const ISqlGrammar* Grammar;
+ TC3Engine<G> C3;
+ };
+
+ class TSqlContextInference: public ISqlContextInference {
+ public:
+ TCompletionContext Analyze(TCompletionInput input) override {
+ auto isAnsiLexer = IsAnsiQuery(TString(input.Text));
+ auto& engine = GetSpecializedEngine(isAnsiLexer);
+ return engine.Analyze(std::move(input));
+ }
+
+ private:
+ ISqlContextInference& GetSpecializedEngine(bool isAnsiLexer) {
+ if (isAnsiLexer) {
+ return AnsiEngine;
+ }
+ return DefaultEngine;
+ }
+
+ TSpecializedSqlContextInference</* IsAnsiLexer = */ false> DefaultEngine;
+ TSpecializedSqlContextInference</* IsAnsiLexer = */ true> AnsiEngine;
+ };
+
+ ISqlContextInference::TPtr MakeSqlContextInference() {
+ return TSqlContextInference::TPtr(new TSqlContextInference());
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_context.h b/yql/essentials/sql/v1/complete/sql_context.h
new file mode 100644
index 0000000000..bc3b8d4840
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_context.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "sql_complete.h"
+
+#include <util/generic/string.h>
+
+namespace NSQLComplete {
+
+ struct TCompletionContext {
+ TVector<TString> Keywords;
+ };
+
+ class ISqlContextInference {
+ public:
+ using TPtr = THolder<ISqlContextInference>;
+
+ virtual TCompletionContext Analyze(TCompletionInput input) = 0;
+ virtual ~ISqlContextInference() = default;
+ };
+
+ ISqlContextInference::TPtr MakeSqlContextInference();
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_syntax.cpp b/yql/essentials/sql/v1/complete/sql_syntax.cpp
new file mode 100644
index 0000000000..ba5a08d371
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_syntax.cpp
@@ -0,0 +1,19 @@
+#include "sql_syntax.h"
+
+#include <yql/essentials/public/issue/yql_issue.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+
+namespace NSQLComplete {
+
+ using NSQLTranslation::ParseTranslationSettings;
+ using NSQLTranslation::TTranslationSettings;
+ using NYql::TIssues;
+
+ bool IsAnsiQuery(const TString& query) {
+ TTranslationSettings settings;
+ TIssues issues;
+ ParseTranslationSettings(query, settings, issues);
+ return settings.AnsiLexer;
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/sql_syntax.h b/yql/essentials/sql/v1/complete/sql_syntax.h
new file mode 100644
index 0000000000..f03cbc9fb9
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/sql_syntax.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+namespace NSQLComplete {
+
+ // Permits invalid special comments
+ bool IsAnsiQuery(const TString& query);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/string_util.cpp b/yql/essentials/sql/v1/complete/string_util.cpp
new file mode 100644
index 0000000000..12a6701065
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/string_util.cpp
@@ -0,0 +1,29 @@
+#include "string_util.h"
+
+#include <util/generic/strbuf.h>
+
+namespace NSQLComplete {
+
+ bool IsWordBoundary(char ch) { // Is optimized into table lookup by clang
+ for (size_t i = 0; i < sizeof(WordBreakCharacters) - 1; ++i) {
+ if (WordBreakCharacters[i] == ch) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ size_t LastWordIndex(TStringBuf text) {
+ for (auto it = std::rbegin(text); it != std::rend(text); std::advance(it, 1)) {
+ if (IsWordBoundary(*it)) {
+ return std::distance(it, std::rend(text));
+ }
+ }
+ return 0;
+ }
+
+ TStringBuf LastWord(TStringBuf text) {
+ return text.SubStr(LastWordIndex(text));
+ }
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/string_util.h b/yql/essentials/sql/v1/complete/string_util.h
new file mode 100644
index 0000000000..bafc578d82
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/string_util.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <util/charset/unidata.h>
+
+#include <string_view>
+
+namespace NSQLComplete {
+
+ static const char WordBreakCharacters[] = " \t\v\f\a\b\r\n`~!@#$%^&*-=+[](){}\\|;:'\".,<>/?";
+
+ bool IsWordBoundary(char ch);
+
+ size_t LastWordIndex(TStringBuf text);
+
+ TStringBuf LastWord(TStringBuf text);
+
+} // namespace NSQLComplete
diff --git a/yql/essentials/sql/v1/complete/string_util_ut.cpp b/yql/essentials/sql/v1/complete/string_util_ut.cpp
new file mode 100644
index 0000000000..ca3ed546a3
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/string_util_ut.cpp
@@ -0,0 +1,21 @@
+#include "string_util.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NSQLComplete;
+
+Y_UNIT_TEST_SUITE(StringUtilTest) {
+ Y_UNIT_TEST(Blank) {
+ UNIT_ASSERT_VALUES_EQUAL(LastWord(""), "");
+ UNIT_ASSERT_VALUES_EQUAL(LastWord(" "), "");
+ UNIT_ASSERT_VALUES_EQUAL(LastWord(" "), "");
+ UNIT_ASSERT_VALUES_EQUAL(LastWord(" "), "");
+ }
+
+ Y_UNIT_TEST(Space) {
+ UNIT_ASSERT_VALUES_EQUAL(LastWord("two "), "");
+ UNIT_ASSERT_VALUES_EQUAL(LastWord("one two "), "");
+ UNIT_ASSERT_VALUES_EQUAL(LastWord("two"), "two");
+ UNIT_ASSERT_VALUES_EQUAL(LastWord("one two"), "two");
+ }
+} // Y_UNIT_TEST_SUITE(StringUtilTest)
diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make
new file mode 100644
index 0000000000..91f7da1361
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/ut/ya.make
@@ -0,0 +1,8 @@
+UNITTEST_FOR(yql/essentials/sql/v1/complete)
+
+SRCS(
+ sql_complete_ut.cpp
+ string_util_ut.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make
new file mode 100644
index 0000000000..70189e5f50
--- /dev/null
+++ b/yql/essentials/sql/v1/complete/ya.make
@@ -0,0 +1,24 @@
+LIBRARY()
+
+SRCS(
+ sql_antlr4.cpp
+ sql_complete.cpp
+ sql_context.cpp
+ sql_syntax.cpp
+ string_util.cpp
+)
+
+PEERDIR(
+ contrib/libs/antlr4_cpp_runtime
+ contrib/libs/antlr4-c3
+ yql/essentials/sql/settings
+ yql/essentials/sql/v1/format
+ yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4
+ yql/essentials/parser/antlr_ast/gen/v1_antlr4
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/yql/essentials/sql/v1/ya.make b/yql/essentials/sql/v1/ya.make
index 08fd499b3f..1d2105f6fa 100644
--- a/yql/essentials/sql/v1/ya.make
+++ b/yql/essentials/sql/v1/ya.make
@@ -56,6 +56,7 @@ GENERATE_ENUM_SERIALIZATION(sql_call_param.h)
END()
RECURSE(
+ complete
format
lexer
perf