diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-28 19:58:58 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-28 20:15:53 +0300 |
commit | 77397379b6394220a2dfd2802f417cdd8c214905 (patch) | |
tree | 0921befe1120e354ab5a47cb126df6bc54b7483a | |
parent | eae2230242d713b9044f14d0920dd0845d03145c (diff) | |
download | ydb-77397379b6394220a2dfd2802f417cdd8c214905.tar.gz |
Intermediate changes
commit_hash:5fc851d2c72810067fe0d407b66535b17de63129
25 files changed, 2622 insertions, 0 deletions
diff --git a/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake new file mode 100644 index 0000000000..e039c0caf6 --- /dev/null +++ b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/epilogue.cmake @@ -0,0 +1,8 @@ +set(GRAMMAR_STRING_CORE_SINGLE "~([']) | (QUOTE_SINGLE QUOTE_SINGLE)") +set(GRAMMAR_STRING_CORE_DOUBLE "~([\"]) | (QUOTE_DOUBLE QUOTE_DOUBLE)") +set(GRAMMAR_MULTILINE_COMMENT_CORE "MULTILINE_COMMENT | .") + +configure_file( + ${CMAKE_SOURCE_DIR}/yql/essentials/sql/v1/SQLv1Antlr4.g.in + ${CMAKE_BINARY_DIR}/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4.g +) diff --git a/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make new file mode 100644 index 0000000000..d0b36ae35a --- /dev/null +++ b/yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/ya.make @@ -0,0 +1,43 @@ +LIBRARY() + +SET(SQL_GRAMMAR ${ARCADIA_BUILD_ROOT}/${MODDIR}/SQLv1Antlr4.g4) + +IF(EXPORT_CMAKE) + MANUAL_GENERATION(${SQL_GRAMMAR}) +ELSE() + SET(GRAMMAR_STRING_CORE_SINGLE "\"~([']) | (QUOTE_SINGLE QUOTE_SINGLE)\"") + SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE#]) | (QUOTE_DOUBLE QUOTE_DOUBLE)\"") + SET(GRAMMAR_MULTILINE_COMMENT_CORE "\"MULTILINE_COMMENT | .\"") + + CONFIGURE_FILE(${ARCADIA_ROOT}/yql/essentials/sql/v1/SQLv1Antlr4.g.in ${SQL_GRAMMAR}) +ENDIF() + +COPY_FILE( + ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +) + +COPY_FILE( + ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg +) + +RUN_ANTLR4( + ${SQL_GRAMMAR} + -no-listener + -package NALAAnsiAntlr4 + -lib . + IN + ${SQL_GRAMMAR} + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg + OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h + OUTPUT_INCLUDES contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h + CWD ${ARCADIA_BUILD_ROOT}/${MODDIR} +) + +PEERDIR( + contrib/libs/antlr4_cpp_runtime +) + +END() diff --git a/yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake new file mode 100644 index 0000000000..1fb34e7415 --- /dev/null +++ b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/epilogue.cmake @@ -0,0 +1,8 @@ +set(GRAMMAR_STRING_CORE_SINGLE "~(['\\\\]) | (BACKSLASH .)") +set(GRAMMAR_STRING_CORE_DOUBLE "~([\"\\\\]) | (BACKSLASH .)") +set(GRAMMAR_MULTILINE_COMMENT_CORE ".") + +configure_file( + ${CMAKE_SOURCE_DIR}/yql/essentials/sql/v1/SQLv1Antlr4.g.in + ${CMAKE_BINARY_DIR}/yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4.g +) diff --git a/yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make new file mode 100644 index 0000000000..2ef05bd592 --- /dev/null +++ b/yql/essentials/parser/antlr_ast/gen/v1_antlr4/ya.make @@ -0,0 +1,43 @@ +LIBRARY() + +SET(SQL_GRAMMAR ${ARCADIA_BUILD_ROOT}/${MODDIR}/SQLv1Antlr4.g) + +IF(EXPORT_CMAKE) + MANUAL_GENERATION(${SQL_GRAMMAR}) +ELSE() + SET(GRAMMAR_STRING_CORE_SINGLE "\"~(['#BACKSLASH#]) | (BACKSLASH .)\"") + SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE##BACKSLASH#]) | (BACKSLASH .)\"") + SET(GRAMMAR_MULTILINE_COMMENT_CORE "\".\"") + + CONFIGURE_FILE(${ARCADIA_ROOT}/yql/essentials/sql/v1/SQLv1Antlr4.g.in ${SQL_GRAMMAR}) +ENDIF() + +COPY_FILE( + ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +) + +COPY_FILE( + ${ARCADIA_ROOT}/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg +) + +RUN_ANTLR4( + ${SQL_GRAMMAR} + -no-listener + -package NALADefaultAntlr4 + -lib . + IN + ${SQL_GRAMMAR} + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg + ${ARCADIA_BUILD_ROOT}/${MODDIR}/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg + OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h + OUTPUT_INCLUDES contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h + CWD ${ARCADIA_BUILD_ROOT}/${MODDIR} +) + +PEERDIR( + contrib/libs/antlr4_cpp_runtime +) + +END() diff --git a/yql/essentials/parser/antlr_ast/gen/ya.make b/yql/essentials/parser/antlr_ast/gen/ya.make new file mode 100644 index 0000000000..2f94911b40 --- /dev/null +++ b/yql/essentials/parser/antlr_ast/gen/ya.make @@ -0,0 +1,4 @@ +RECURSE( + v1_antlr4 + v1_ansi_antlr4 +) diff --git a/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg new file mode 100644 index 0000000000..c6592680f7 --- /dev/null +++ b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -0,0 +1,1176 @@ +/* + * [The "BSD license"] + * Copyright (c) 2015 Dan McLaughlin, Mike Lischke + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +import "Files.stg" // All file specific stuff. + +cppTypeInitMap ::= [ + "int":"0", + "long":"0", + "float":"0.0f", + "double":"0.0", + "bool":"false", + "short":"0", + "char":"0", + default: "nullptr" // anything other than a primitive type is an object +] + +LexerHeader(lexer, atn, actionFuncs, sempredFuncs, superClass = {antlr4::Lexer}) ::= << +<namedActions.context> + +class <file.exportMacro> <lexer.name> : public <superClass> { +public: +<if (lexer.tokens)> + enum { + <lexer.tokens: {k | TOKEN_<k> = <lexer.tokens.(k)>}; separator=", ", wrap, anchor> + }; +<endif> + +<if (lexer.escapedChannels)> + enum { + <lexer.escapedChannels: {k | <k> = <lexer.escapedChannels.(k)>}; separator=", ", wrap, anchor> + }; +<endif> + +<if (rest(lexer.escapedModeNames))> + enum { + <rest(lexer.escapedModeNames): {m | TOKEN_<m> = <i>}; separator=", ", wrap, anchor> + }; +<endif> + + explicit <lexer.name>(antlr4::CharStream *input); + + ~<lexer.name>() override; + + <namedActions.members> + + std::string getGrammarFileName() const override; + + const std::vector\<std::string>& getRuleNames() const override; + + const std::vector\<std::string>& getChannelNames() const override; + + const std::vector\<std::string>& getModeNames() const override; + + const antlr4::dfa::Vocabulary& getVocabulary() const override; + + antlr4::atn::SerializedATNView getSerializedATN() const override; + + const antlr4::atn::ATN& getATN() const override; + + <if (actionFuncs)> + void action(antlr4::RuleContext *context, size_t ruleIndex, size_t actionIndex) override; + <endif> + + <if (sempredFuncs)> + bool sempred(antlr4::RuleContext *_localctx, size_t ruleIndex, size_t predicateIndex) override; + <endif> + + // By default the static state used to implement the lexer is lazily initialized during the first + // call to the constructor. You can call this function if you wish to initialize the static state + // ahead of time. + static void initialize(); + +private: + <namedActions.declarations> + + // Individual action functions triggered by action() above. + <actionFuncs.values; separator="\n"> + + // Individual semantic predicate functions triggered by sempred() above. + <sempredFuncs.values; separator="\n"> + + <atn> +}; +>> + +Lexer(lexer, atn, actionFuncs, sempredFuncs, superClass = {Lexer}) ::= << + +using namespace antlr4; + +namespace { + +struct <lexer.name; format = "cap">StaticData final { + <lexer.name; format = "cap">StaticData(std::vector\<std::string> ruleNames, + std::vector\<std::string> channelNames, + std::vector\<std::string> modeNames, + std::vector\<std::string> literalNames, + std::vector\<std::string> symbolicNames) + : ruleNames(std::move(ruleNames)), channelNames(std::move(channelNames)), + modeNames(std::move(modeNames)), literalNames(std::move(literalNames)), + symbolicNames(std::move(symbolicNames)), + vocabulary(this->literalNames, this->symbolicNames) {} + + <lexer.name; format = "cap">StaticData(const <lexer.name; format = "cap">StaticData&) = delete; + <lexer.name; format = "cap">StaticData(<lexer.name; format = "cap">StaticData&&) = delete; + <lexer.name; format = "cap">StaticData& operator=(const <lexer.name; format = "cap">StaticData&) = delete; + <lexer.name; format = "cap">StaticData& operator=(<lexer.name; format = "cap">StaticData&&) = delete; + + std::vector\<antlr4::dfa::DFA> decisionToDFA; + antlr4::atn::PredictionContextCache sharedContextCache; + const std::vector\<std::string> ruleNames; + const std::vector\<std::string> channelNames; + const std::vector\<std::string> modeNames; + const std::vector\<std::string> literalNames; + const std::vector\<std::string> symbolicNames; + const antlr4::dfa::Vocabulary vocabulary; + antlr4::atn::SerializedATNView serializedATN; + std::unique_ptr\<antlr4::atn::ATN> atn; +}; + +::antlr4::internal::OnceFlag <lexer.grammarName; format = "lower">LexerOnceFlag; +#if ANTLR4_USE_THREAD_LOCAL_CACHE +static thread_local +#endif +std::unique_ptr\<<lexer.name; format = "cap">StaticData> <lexer.grammarName; format = "lower">LexerStaticData = nullptr; + +void <lexer.grammarName; format = "lower">LexerInitialize() { +#if ANTLR4_USE_THREAD_LOCAL_CACHE + if (<lexer.grammarName; format = "lower">LexerStaticData != nullptr) { + return; + } +#else + assert(<lexer.grammarName; format = "lower">LexerStaticData == nullptr); +#endif + auto staticData = std::make_unique\<<lexer.name; format = "cap">StaticData>( + std::vector\<std::string>{ + <lexer.ruleNames: {r | "<r>"}; separator = ", ", wrap, anchor> + }, + std::vector\<std::string>{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN"<if (lexer.channels)>, <lexer.channels: {c | "<c>"}; separator = ", ", wrap, anchor><endif> + }, + std::vector\<std::string>{ + <lexer.modes: {m | "<m>"}; separator = ", ", wrap, anchor> + }, + std::vector\<std::string>{ + <lexer.literalNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor> + }, + std::vector\<std::string>{ + <lexer.symbolicNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor> + } + ); + <atn> + <lexer.grammarName; format = "lower">LexerStaticData = std::move(staticData); +} + +} + +<lexer.name>::<lexer.name>(CharStream *input) : <superClass>(input) { + <lexer.name>::initialize(); + _interpreter = new atn::LexerATNSimulator(this, *<lexer.grammarName; format = "lower">LexerStaticData->atn, <lexer.grammarName; format = "lower">LexerStaticData->decisionToDFA, <lexer.grammarName; format = "lower">LexerStaticData->sharedContextCache); +} + +<lexer.name>::~<lexer.name>() { + delete _interpreter; +} + +std::string <lexer.name>::getGrammarFileName() const { + return "<lexer.grammarFileName>"; +} + +const std::vector\<std::string>& <lexer.name>::getRuleNames() const { + return <lexer.grammarName; format = "lower">LexerStaticData->ruleNames; +} + +const std::vector\<std::string>& <lexer.name>::getChannelNames() const { + return <lexer.grammarName; format = "lower">LexerStaticData->channelNames; +} + +const std::vector\<std::string>& <lexer.name>::getModeNames() const { + return <lexer.grammarName; format = "lower">LexerStaticData->modeNames; +} + +const dfa::Vocabulary& <lexer.name>::getVocabulary() const { + return <lexer.grammarName; format = "lower">LexerStaticData->vocabulary; +} + +antlr4::atn::SerializedATNView <lexer.name>::getSerializedATN() const { + return <lexer.grammarName; format = "lower">LexerStaticData->serializedATN; +} + +const atn::ATN& <lexer.name>::getATN() const { + return *<lexer.grammarName; format = "lower">LexerStaticData->atn; +} + +<namedActions.definitions> + +<if (actionFuncs)> +void <lexer.name>::action(RuleContext *context, size_t ruleIndex, size_t actionIndex) { + switch (ruleIndex) { + <lexer.actionFuncs.values: {f | case <f.ruleIndex>: <f.name>Action(antlrcpp::downCast\<<f.ctxType> *>(context), actionIndex); break;}; separator="\n"> + + default: + break; + } +} +<endif> + +<if (sempredFuncs)> +bool <lexer.name>::sempred(RuleContext *context, size_t ruleIndex, size_t predicateIndex) { + switch (ruleIndex) { + <lexer.sempredFuncs.values: {f | case <f.ruleIndex>: return <f.name>Sempred(antlrcpp::downCast\<<f.ctxType> *>(context), predicateIndex);}; separator="\n"> + + default: + break; + } + return true; +} +<endif> + +<actionFuncs.values; separator="\n"> + +<sempredFuncs.values; separator="\n"> + +void <lexer.name>::initialize() { +#if ANTLR4_USE_THREAD_LOCAL_CACHE + <lexer.grammarName; format = "lower">LexerInitialize(); +#else + ::antlr4::internal::call_once(<lexer.grammarName; format = "lower">LexerOnceFlag, <lexer.grammarName; format = "lower">LexerInitialize); +#endif +} +>> + +RuleActionFunctionHeader(r, actions) ::= << +void <r.name>Action(<r.ctxType> *context, size_t actionIndex); +>> + +RuleActionFunction(r, actions) ::= << +void <r.factory.grammar.name>::<r.name>Action(<r.ctxType> *context, size_t actionIndex) { + switch (actionIndex) { + <actions: {index | case <index>: <actions.(index)> break;}; separator="\n"> + + default: + break; + } +} + +>> + +RuleSempredFunctionHeader(r, actions) ::= << +bool <r.name>Sempred(<r.ctxType> *_localctx, size_t predicateIndex); +>> + +RuleSempredFunction(r, actions) ::= << +<! Called for both lexer and parser. But only one of them is actually available. Testing for the parser directly + generates a warning, however. So do the check via the factory instead. !> +bool <if (r.factory.g.lexer)><lexer.name><else><parser.name><endif>::<r.name>Sempred(<r.ctxType> *_localctx, size_t predicateIndex) { + switch (predicateIndex) { + <actions: {index | case <index>: return <actions.(index)>}; separator=";\n">; + + default: + break; + } + return true; +} + +>> + +//-------------------------------------------------------------------------------------------------- + +ParserHeader(parser, funcs, atn, sempredFuncs, superClass = {antlr4::Parser}) ::= << +<namedActions.context> + +class <file.exportMacro> <parser.name> : public <superClass> { +public: +<if (parser.tokens)> + enum { + <parser.tokens: {k | TOKEN_<k> = <parser.tokens.(k)>}; separator=", ", wrap, anchor> + }; +<endif> + +<if (parser.rules)> + enum { + <parser.rules: {r | Rule<r.name; format="cap"> = <r.index>}; separator=", ", wrap, anchor> + }; +<endif> + + explicit <parser.name>(antlr4::TokenStream *input); + + <parser.name>(antlr4::TokenStream *input, const antlr4::atn::ParserATNSimulatorOptions &options); + + ~<parser.name>() override; + + std::string getGrammarFileName() const override; + + const antlr4::atn::ATN& getATN() const override; + + const std::vector\<std::string>& getRuleNames() const override; + + const antlr4::dfa::Vocabulary& getVocabulary() const override; + + antlr4::atn::SerializedATNView getSerializedATN() const override; + + <namedActions.members> + + <parser.funcs: {f | class <f.name; format = "cap">Context;}; separator = "\n"> <! Forward declare context classes. !> + + <funcs; separator = "\n"> + + <if (sempredFuncs)> + bool sempred(antlr4::RuleContext *_localctx, size_t ruleIndex, size_t predicateIndex) override; + + <sempredFuncs.values; separator = "\n"> + <endif> + + // By default the static state used to implement the parser is lazily initialized during the first + // call to the constructor. You can call this function if you wish to initialize the static state + // ahead of time. + static void initialize(); + + static const size_t TOKEN_EOF = antlr4::Token::EOF; + +private: + <namedActions.declarations> +}; +>> + +Parser(parser, funcs, atn, sempredFuncs, superClass = {Parser}) ::= << + +using namespace antlr4; + +namespace { + +struct <parser.name; format = "cap">StaticData final { + <parser.name; format = "cap">StaticData(std::vector\<std::string> ruleNames, + std::vector\<std::string> literalNames, + std::vector\<std::string> symbolicNames) + : ruleNames(std::move(ruleNames)), literalNames(std::move(literalNames)), + symbolicNames(std::move(symbolicNames)), + vocabulary(this->literalNames, this->symbolicNames) {} + + <parser.name; format = "cap">StaticData(const <parser.name; format = "cap">StaticData&) = delete; + <parser.name; format = "cap">StaticData(<parser.name; format = "cap">StaticData&&) = delete; + <parser.name; format = "cap">StaticData& operator=(const <parser.name; format = "cap">StaticData&) = delete; + <parser.name; format = "cap">StaticData& operator=(<parser.name; format = "cap">StaticData&&) = delete; + + std::vector\<antlr4::dfa::DFA> decisionToDFA; + antlr4::atn::PredictionContextCache sharedContextCache; + const std::vector\<std::string> ruleNames; + const std::vector\<std::string> literalNames; + const std::vector\<std::string> symbolicNames; + const antlr4::dfa::Vocabulary vocabulary; + antlr4::atn::SerializedATNView serializedATN; + std::unique_ptr\<antlr4::atn::ATN> atn; +}; + +::antlr4::internal::OnceFlag <parser.grammarName; format = "lower">ParserOnceFlag; +#if ANTLR4_USE_THREAD_LOCAL_CACHE +static thread_local +#endif +std::unique_ptr\<<parser.name; format = "cap">StaticData> <parser.grammarName; format = "lower">ParserStaticData = nullptr; + +void <parser.grammarName; format = "lower">ParserInitialize() { +#if ANTLR4_USE_THREAD_LOCAL_CACHE + if (<parser.grammarName; format = "lower">ParserStaticData != nullptr) { + return; + } +#else + assert(<parser.grammarName; format = "lower">ParserStaticData == nullptr); +#endif + auto staticData = std::make_unique\<<parser.name; format = "cap">StaticData>( + std::vector\<std::string>{ + <parser.ruleNames: {r | "<r>"}; separator = ", ", wrap, anchor> + }, + std::vector\<std::string>{ + <parser.literalNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor> + }, + std::vector\<std::string>{ + <parser.symbolicNames: {t | <t>}; null = "\"\"", separator = ", ", wrap, anchor> + } + ); + <atn> + <parser.grammarName; format = "lower">ParserStaticData = std::move(staticData); +} + +} + +<parser.name>::<parser.name>(TokenStream *input) : <parser.name>(input, antlr4::atn::ParserATNSimulatorOptions()) {} + +<parser.name>::<parser.name>(TokenStream *input, const antlr4::atn::ParserATNSimulatorOptions &options) : <superClass>(input) { + <parser.name>::initialize(); + _interpreter = new atn::ParserATNSimulator(this, *<parser.grammarName; format = "lower">ParserStaticData->atn, <parser.grammarName; format = "lower">ParserStaticData->decisionToDFA, <parser.grammarName; format = "lower">ParserStaticData->sharedContextCache, options); +} + +<parser.name>::~<parser.name>() { + delete _interpreter; +} + +const atn::ATN& <parser.name>::getATN() const { + return *<parser.grammarName; format = "lower">ParserStaticData->atn; +} + +std::string <parser.name>::getGrammarFileName() const { + return "<parser.grammarFileName>"; +} + +const std::vector\<std::string>& <parser.name>::getRuleNames() const { + return <parser.grammarName; format = "lower">ParserStaticData->ruleNames; +} + +const dfa::Vocabulary& <parser.name>::getVocabulary() const { + return <parser.grammarName; format = "lower">ParserStaticData->vocabulary; +} + +antlr4::atn::SerializedATNView <parser.name>::getSerializedATN() const { + return <parser.grammarName; format = "lower">ParserStaticData->serializedATN; +} + +<namedActions.definitions> + +<funcs; separator = "\n\n"> + +<if (sempredFuncs)> +bool <parser.name>::sempred(RuleContext *context, size_t ruleIndex, size_t predicateIndex) { + switch (ruleIndex) { + <parser.sempredFuncs.values: {f | + case <f.ruleIndex>: return <f.name>Sempred(antlrcpp::downCast\<<f.ctxType> *>(context), predicateIndex);}; separator="\n"> + + default: + break; + } + return true; +} + +<sempredFuncs.values; separator="\n"><endif> + +void <parser.name>::initialize() { +#if ANTLR4_USE_THREAD_LOCAL_CACHE + <parser.grammarName; format = "lower">ParserInitialize(); +#else + ::antlr4::internal::call_once(<parser.grammarName; format = "lower">ParserOnceFlag, <parser.grammarName; format = "lower">ParserInitialize); +#endif +} +>> + +SerializedATNHeader(model) ::= << +>> + +SerializedATN(model) ::= << +static const int32_t serializedATNSegment[] = { + <model.serialized: {s | <s>}; separator=",", wrap> +}; +staticData->serializedATN = antlr4::atn::SerializedATNView(serializedATNSegment, sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0])); + +antlr4::atn::ATNDeserializer deserializer; +staticData->atn = deserializer.deserialize(staticData->serializedATN); + +const size_t count = staticData->atn->getNumberOfDecisions(); +staticData->decisionToDFA.reserve(count); +for (size_t i = 0; i \< count; i++) { <! Rework class ATN to allow standard iterations. !> + staticData->decisionToDFA.emplace_back(staticData->atn->getDecisionState(i), i); +} +>> + +RuleFunctionHeader(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble, exceptions) ::= << +<ruleCtx> +<! TODO: untested !><if (altLabelCtxs)><altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator="\n"><endif> +<currentRule.ctxType>* <currentRule.escapedName>(<args; separator=",">); + +>> + +RuleFunction(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble, exceptions) ::= << +<ruleCtx> +<! TODO: untested !><altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator = "\n"> +<parser.name>::<currentRule.ctxType>* <parser.name>::<currentRule.escapedName>(<args; separator=",">) { + <currentRule.ctxType> *_localctx = _tracker.createInstance\<<currentRule.ctxType>\>(_ctx, getState()<currentRule.args:{a | , <a.escapedName>}>); + enterRule(_localctx, <currentRule.startState>, <parser.name>::Rule<currentRule.name; format = "cap">); + <namedActions.init> + <locals; separator = "\n"> + +#if __cplusplus > 201703L + auto onExit = finally([=, this] { +#else + auto onExit = finally([=] { +#endif + <finallyAction> + exitRule(); + }); + try { +<! TODO: untested !><if (currentRule.hasLookaheadBlock)> + size_t alt; + <endif> + <code> +<! TODO: untested !> <postamble; separator = "\n"> + <namedActions.after> + } + <if (exceptions)> + <exceptions; separator="\n"> + <else> + catch (RecognitionException &e) { + _errHandler->reportError(this, e); + _localctx->exception = std::current_exception(); + _errHandler->recover(this, _localctx->exception); + } + <endif> + + return _localctx; +} +>> + +LeftRecursiveRuleFunctionHeader(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble) ::= << +<ruleCtx> +<! TODO: untested !><altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator="\n"> +<currentRule.ctxType>* <currentRule.escapedName>(<currentRule.args; separator = ", ">); +<currentRule.ctxType>* <currentRule.escapedName>(int precedence<currentRule.args: {a | , <a>}>); +>> + +LeftRecursiveRuleFunction(currentRule, args, code, locals, ruleCtx, altLabelCtxs, namedActions, finallyAction, postamble) ::= << +<ruleCtx> +<altLabelCtxs: {l | <altLabelCtxs.(l)>}; separator="\n"> + +<parser.name>::<currentRule.ctxType>* <parser.name>::<currentRule.escapedName>(<currentRule.args; separator=", ">) { +<! TODO: currentRule.args untested !> return <currentRule.escapedName>(0<currentRule.args: {a | , <a.escapedName>}>); +} + +<parser.name>::<currentRule.ctxType>* <parser.name>::<currentRule.escapedName>(int precedence<currentRule.args:{a | , <a>}>) { + ParserRuleContext *parentContext = _ctx; + size_t parentState = getState(); + <parser.name>::<currentRule.ctxType> *_localctx = _tracker.createInstance\<<currentRule.ctxType>\>(_ctx, parentState<currentRule.args: {a | , <a.escapedName>}>); + <parser.name>::<currentRule.ctxType> *previousContext = _localctx; + (void)previousContext; // Silence compiler, in case the context is not used by generated code. + size_t startState = <currentRule.startState>; + enterRecursionRule(_localctx, <currentRule.startState>, <parser.name>::Rule<currentRule.name; format = "cap">, precedence); + + <namedActions.init> +<! TODO: untested !> <locals; separator = "\n"> + +#if __cplusplus > 201703L + auto onExit = finally([=, this] { +#else + auto onExit = finally([=] { +#endif + <if (finallyAction)><finallyAction><endif> + unrollRecursionContexts(parentContext); + }); + try { + <if (currentRule.hasLookaheadBlock)>size_t alt;<endif> + <code> +<! TODO: untested !><postamble; separator = "\n"> + <namedActions.after> + } + catch (RecognitionException &e) { + _errHandler->reportError(this, e); + _localctx->exception = std::current_exception(); + _errHandler->recover(this, _localctx->exception); + } + return _localctx; +} +>> + +StructDeclHeader(struct, ctorAttrs, attrs, getters, dispatchMethods, interfaces, extensionMembers) ::= << +class <file.exportMacro> <struct.escapedName> : public <if (contextSuperClass)><contextSuperClass><else>antlr4::ParserRuleContext<endif><if(interfaces)>, <interfaces; separator=", "><endif> { +public: + <attrs: {a | <a>;}; separator = "\n"> + <if (ctorAttrs)><struct.escapedName>(antlr4::ParserRuleContext *parent, size_t invokingState);<endif> + <struct.escapedName>(antlr4::ParserRuleContext *parent, size_t invokingState<ctorAttrs: {a | , <a>}>); +<if (struct.provideCopyFrom)> <! don't need copy unless we have subclasses !> + <struct.escapedName>() = default; + void copyFrom(<struct.escapedName> *context); + using antlr4::ParserRuleContext::copyFrom; +<endif> + + virtual size_t getRuleIndex() const override; + <getters: {g | <g>}; separator = "\n"> + + <dispatchMethods; separator = "\n"> +<! TODO: untested !> <extensionMembers; separator = "\n"> +}; + +>> + +StructDecl(struct, ctorAttrs, attrs, getters, dispatchMethods, interfaces, extensionMembers, signatures) ::= << +//----------------- <struct.escapedName> ------------------------------------------------------------------ + +<if (ctorAttrs)> +<parser.name>::<struct.escapedName>::<struct.escapedName>(ParserRuleContext *parent, size_t invokingState) + : <if (contextSuperClass)><contextSuperClass><else>ParserRuleContext<endif>(parent, invokingState) { +} +<endif> + +<parser.name>::<struct.escapedName>::<struct.escapedName>(ParserRuleContext *parent, size_t invokingState<ctorAttrs: {a | , <a>}>) + : <if (contextSuperClass)><contextSuperClass><else>ParserRuleContext<endif>(parent, invokingState) { + <struct.ctorAttrs: {a | this-><a.escapedName> = <a.escapedName>;}; separator="\n"> +} + +<getters: {g | <g>}; separator = "\n"> + +size_t <parser.name>::<struct.escapedName>::getRuleIndex() const { + return <parser.name>::Rule<struct.derivedFromName; format = "cap">; +} + +<if (struct.provideCopyFrom)> +void <parser.name>::<struct.escapedName>::copyFrom(<struct.escapedName> *ctx) { + <if (contextSuperClass)><contextSuperClass><else>ParserRuleContext<endif>::copyFrom(ctx); + <struct.attrs: {a | this-><a.escapedName> = ctx-><a.escapedName>;}; separator = "\n"> +} +<endif> +<dispatchMethods; separator = "\n\n"> +<! TODO: untested !><extensionMembers; separator = "\n\n"> + +>> + +AltLabelStructDeclHeader(struct, attrs, getters, dispatchMethods) ::= << +class <file.exportMacro> <struct.escapedName> : public <currentRule.name; format = "cap">Context { +public: + <struct.escapedName>(<currentRule.name; format = "cap">Context *ctx); + + <if (attrs)><attrs: {a | <a>;}; separator = "\n"><endif> + <getters: {g | <g>}; separator = "\n"> + <dispatchMethods; separator = "\n"> +}; + +>> + +AltLabelStructDecl(struct, attrs, getters, dispatchMethods) ::= << +//----------------- <struct.escapedName> ------------------------------------------------------------------ + +<! TODO: untested !><if (attrs)><attrs: {a | <a>}; separator = "\n"><endif> +<getters: {g | <g>}; separator = "\n"> +<parser.name>::<struct.escapedName>::<struct.escapedName>(<currentRule.name; format = "cap">Context *ctx) { copyFrom(ctx); } + +<dispatchMethods; separator="\n"> +>> + + +CodeBlockForOuterMostAltHeader(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= "<! Required to exist, but unused. !>" +CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= << +<if (currentOuterMostAltCodeBlock.altLabel)> +_localctx = _tracker.createInstance\<<parser.name>::<currentOuterMostAltCodeBlock.altLabel; format = "cap">Context>(_localctx); +<endif> +enterOuterAlt(_localctx, <currentOuterMostAltCodeBlock.alt.altNum>); +<CodeBlockForAlt(currentAltCodeBlock = currentOuterMostAltCodeBlock, ...)> +>> + +CodeBlockForAltHeader(currentAltCodeBlock, locals, preamble, ops) ::= "<! Required to exist, but unused. !>" +CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= << +<! TODO: untested !><locals; separator = "\n"> +<! TODO: untested !><preamble; separator = "\n"> +<ops; separator = "\n"> +>> + +LL1AltBlockHeader(choice, preamble, alts, error) ::= "<! Required to exist, but unused. !>" +LL1AltBlock(choice, preamble, alts, error) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); +<! TODO: untested !><if (choice.label)>LL1AltBlock(choice, preamble, alts, error) <labelref(choice.label)> = _input->LT(1);<endif> +<preamble; separator="\n"> +switch (_input->LA(1)) { + <choice.altLook, alts: {look, alt | <cases(tokens = look)> { + <alt> + break; +\} +}; separator = "\n"> +default: + <error> +} +>> + +LL1OptionalBlockHeader(choice, alts, error) ::= "<! Required but unused. !>" +LL1OptionalBlock(choice, alts, error) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); +switch (_input->LA(1)) { + <choice.altLook, alts: {look, alt | <cases(tokens = look)> { + <alt> + break; +\} +}; separator="\n"> +default: + break; +} +>> + +LL1OptionalBlockSingleAltHeader(choice, expr, alts, preamble, error, followExpr) ::= "<! Required but unused. !>" +LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); + +<preamble; separator = "\n"> +if (<expr>) { + <alts; separator = "\n"> +} +>> + +LL1StarBlockSingleAltHeader(choice, loopExpr, alts, preamble, iteration) ::= "<! Required but unused. !>" +LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); +<preamble; separator="\n"> +while (<loopExpr>) { + <alts; separator="\n"> + setState(<choice.loopBackStateNumber>); + _errHandler->sync(this); + <iteration> +} +>> + +LL1PlusBlockSingleAltHeader(choice, loopExpr, alts, preamble, iteration) ::= "<! Required but unused. !>" +LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +setState(<choice.blockStartStateNumber>); <! alt block decision !> +_errHandler->sync(this); +<preamble; separator="\n"> +do { + <alts; separator="\n"> + setState(<choice.stateNumber>); <! loopback/exit decision !> + _errHandler->sync(this); + <iteration> +} while (<loopExpr>); +>> + +// LL(*) stuff + +AltBlockHeader(choice, preamble, alts, error) ::= "<! Unused but must be present. !>" +AltBlock(choice, preamble, alts, error) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); +<! TODO: untested !><if (choice.label)><labelref(choice.label)> = _input->LT(1);<endif> +<! TODO: untested !><preamble; separator = "\n"> +switch (getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx)) { +<alts: {alt | case <i>: { + <alt> + break; +\} +}; separator="\n"> +default: + break; +} +>> + +OptionalBlockHeader(choice, alts, error) ::= "<! Unused but must be present. !>" +OptionalBlock(choice, alts, error) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); + +switch (getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx)) { +<alts: {alt | case <i><if (!choice.ast.greedy)> + 1<endif>: { + <alt> + break; +\} +}; separator = "\n"> +default: + break; +} +>> + +StarBlockHeader(choice, alts, sync, iteration) ::= "<! Unused but must be present. !>" +StarBlock(choice, alts, sync, iteration) ::= << +setState(<choice.stateNumber>); +_errHandler->sync(this); +alt = getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx); +while (alt != <choice.exitAlt> && alt != atn::ATN::INVALID_ALT_NUMBER) { + if (alt == 1<if(!choice.ast.greedy)> + 1<endif>) { + <iteration> + <alts> <! should only be one !> + } + setState(<choice.loopBackStateNumber>); + _errHandler->sync(this); + alt = getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx); +} +>> + +PlusBlockHeader(choice, alts, error) ::= "<! Required to exist, but unused. !>" +PlusBlock(choice, alts, error) ::= << +setState(<choice.blockStartStateNumber>); <! alt block decision !> +_errHandler->sync(this); +alt = 1<if(!choice.ast.greedy)> + 1<endif>; +do { + switch (alt) { + <alts: {alt | case <i><if (!choice.ast.greedy)> + 1<endif>: { + <alt> + break; + \} +}; separator="\n"> + default: + <error> + } + setState(<choice.loopBackStateNumber>); <! loopback/exit decision !> + _errHandler->sync(this); + alt = getInterpreter\<atn::ParserATNSimulator>()->adaptivePredict(_input, <choice.decision>, _ctx); +} while (alt != <choice.exitAlt> && alt != atn::ATN::INVALID_ALT_NUMBER); +>> + +Sync(s) ::= "Sync(s) sync(<s.expecting.name>);" + +ThrowNoViableAltHeader(t) ::= "<! Unused but must be present. !>" +ThrowNoViableAlt(t) ::= "throw NoViableAltException(this);" + +TestSetInlineHeader(s) ::= "<! Required but unused. !>" +TestSetInline(s) ::= << +<s.bitsets: {bits | <if (rest(rest(bits.tokens)))><bitsetBitfieldComparison(s, bits)><else><bitsetInlineComparison(s, bits)><endif>}; separator=" || "> +>> + +// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test +testShiftInRange(shiftAmount) ::= << +((<shiftAmount> & ~ 0x3fULL) == 0) +>> + +bitsetBitfieldComparison(s, bits) ::= << +(<testShiftInRange({<offsetShift(s.varName, bits.shift)>})> && + ((1ULL \<\< <offsetShift(s.varName, bits.shift)>) & <bits.calculated>) != 0) +>> + +isZero ::= [ + "0": true, + default: false +] + +offsetShift(shiftAmount, offset, prefix = false) ::= <% +<if (!isZero.(offset))>(<if (prefix)><parser.name>::TOKEN_<endif><shiftAmount> - <offset>)<else><if (prefix)><parser.name>::TOKEN_<endif><shiftAmount><endif> +%> + +bitsetInlineComparison(s, bits) ::= <% +<bits.tokens: {t | <s.varName> == <parser.name>::TOKEN_<t.name>}; separator = "\n\n|| "> +%> + +cases(tokens) ::= << +<tokens: {t | case <parser.name>::TOKEN_<t.name>:}; separator="\n"> +>> + +InvokeRuleHeader(r, argExprsChunks) ::= "InvokeRuleHeader" +InvokeRule(r, argExprsChunks) ::= << +setState(<r.stateNumber>); +<if(r.labels)><r.labels: {l | <labelref(l)> = }><endif><r.escapedName>(<if(r.ast.options.p)><r.ast.options.p><if(argExprsChunks)>,<endif><endif><argExprsChunks>); +>> + +MatchTokenHeader(m) ::= "<! Required but unused. !>" +MatchToken(m) ::= << +setState(<m.stateNumber>); +<if (m.labels)><m.labels: {l | <labelref(l)> = }><endif>match(<parser.name>::TOKEN_<m.escapedName>); +>> + +MatchSetHeader(m, expr, capture) ::= "<! Required but unused. !>" +MatchSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, false)>" + +MatchNotSetHeader(m, expr, capture) ::= "<! Required but unused. !>" +MatchNotSet(m, expr, capture) ::= "<CommonSetStuff(m, expr, capture, true)>" + +CommonSetStuff(m, expr, capture, invert) ::= << +setState(<m.stateNumber>); +<if (m.labels)><m.labels: {l | <labelref(l)> = }>_input->LT(1);<endif> +<capture> +if (<if (invert)><m.varName> == 0 || <m.varName> == Token::EOF || <else>!<endif>(<expr>)) { + <if (m.labels)><m.labels: {l | <labelref(l)> = }><endif>_errHandler->recoverInline(this); +} +else { + _errHandler->reportMatch(this); + consume(); +} +>> + +WildcardHeader(w) ::= "<! Required but unused. !>" +Wildcard(w) ::= << +setState(<w.stateNumber>); +<if (w.labels)><w.labels: {l | <labelref(l)> = }><endif>matchWildcard(); +>> + +// ACTION STUFF + +ActionHeader(a, foo, chunks) ::= "<chunks>" +Action(a, foo, chunks) ::= "<chunks>" + +ArgAction(a, chunks) ::= "ArgAction(a, chunks) <chunks>" + +SemPredHeader(p, chunks, failChunks) ::= "<! Required but unused. !>" +SemPred(p, chunks, failChunks) ::= << +setState(<p.stateNumber>); + +if (!(<chunks>)) throw FailedPredicateException(this, <p.predicate><if (failChunks)>, <failChunks><elseif (p.msg)>, <p.msg><endif>); +>> + +ExceptionClauseHeader(e, catchArg, catchAction) ::= "<! Required but unused. !>" +ExceptionClause(e, catchArg, catchAction) ::= << +catch (<catchArg>) { + <catchAction> +} +>> + +// Lexer actions are not associated with model objects. + +LexerSkipCommand() ::= "skip();" +LexerMoreCommand() ::= "more();" +LexerPopModeCommand() ::= "popMode();" + +LexerTypeCommand(arg, grammar) ::= "type = <grammar.name>::<arg>;" +LexerChannelCommand(arg, grammar) ::= "channel = <arg>;" +LexerModeCommand(arg, grammar) ::= "mode = <grammar.name>Mode::<arg>;" +LexerPushModeCommand(arg, grammar) ::= "pushMode(<grammar.name>Mode::<arg>);" + +ActionTextHeader(t) ::= "<t.text>" +ActionText(t) ::= "<t.text>" + +ActionTemplateHeader(t) ::= "<! Required but unused. !>" +ActionTemplate(t) ::= "<t.st>" + +ArgRefHeader(t) ::= "<! Required but unused. !>" +ArgRef(a) ::= "_localctx-><a.escapedName>" + +LocalRefHeader(t) ::= "<! Required but unused. !>" +LocalRef(a) ::= "_localctx-><a.escapedName>" + +RetValueRefHeader(t) ::= "<! Required but unused. !>" +RetValueRef(a) ::= "_localctx-><a.escapedName>" + +QRetValueRefHeader(t) ::= "<! Required but unused. !>" +QRetValueRef(a) ::= "<ctx(a)>-><a.dict>-><a.escapedName>" +/** How to translate $tokenLabel */ + +TokenRefHeader(t) ::= "<! Required but unused. !>" +TokenRef(t) ::= "<ctx(t)>-><t.escapedName>" + +LabelRefHeader(t) ::= "<! Required but unused. !>" +LabelRef(t) ::= "<ctx(t)>-><t.escapedName>" + +ListLabelRefHeader(t) ::= "<! Required but unused. !>" +ListLabelRef(t) ::= "<ctx(t)>-><ListLabelName(t.escapedName)>" + +SetAttrHeader(t) ::= "<! Required but unused. !>" +SetAttr(s,rhsChunks) ::= "<ctx(s)>-><s.escapedName> = <rhsChunks>;" + +InputSymbolType() ::= "<file.InputSymbolType; null = {Token}> *" + +TokenPropertyRef_textHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_text(t) ::= <<(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getText() : "")>> + +TokenPropertyRef_typeHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_type(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getType() : 0)" + +TokenPropertyRef_lineHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_line(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getLine() : 0)" + +TokenPropertyRef_posHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_pos(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getCharPositionInLine() : 0)" + +TokenPropertyRef_channelHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_channel(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getChannel() : 0)" + +TokenPropertyRef_indexHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_index(t) ::= "(<ctx(t)>-><t.label> != nullptr ? <ctx(t)>-><t.label>->getTokenIndex() : 0)" + +TokenPropertyRef_intHeader(t) ::= "<! Required but unused. !>" +TokenPropertyRef_int(t) ::= "(<ctx(t)>-><t.label> != nullptr ? std::stoi(<ctx(t)>-><t.label>->getText()) : 0)" + +RulePropertyRef_startHeader(r) ::= "<! Required but unused. !>" +RulePropertyRef_start(r) ::= "(<ctx(r)>-><r.label> != nullptr ? (<ctx(r)>-><r.label>->start) : nullptr)" + +RulePropertyRef_stopHeader(r) ::= "<! Required but unused. !>" +RulePropertyRef_stop(r) ::= "(<ctx(r)>-><r.label> != nullptr ? (<ctx(r)>-><r.label>->stop) : nullptr)" + +RulePropertyRef_textHeader(r) ::= "<! Required but unused. !>" +RulePropertyRef_text(r) ::= "(<ctx(r)>-><r.label> != nullptr ? _input->getText(<ctx(r)>-><r.label>->start, <ctx(r)>-><r.label>->stop) : nullptr)" + +RulePropertyRef_ctxHeader(r) ::= "<! Required but unused. !>" +RulePropertyRef_ctx(r) ::= "<ctx(r)>-><r.label>" + +ThisRulePropertyRef_start(r) ::= "ThisRulePropertyRef_start(r) _localctx->start" +ThisRulePropertyRef_stop(r) ::= "ThisRulePropertyRef_stop(r) _localctx->stop" + +ThisRulePropertyRef_textHeader(r) ::= "<! Required but unused. !>" +ThisRulePropertyRef_text(r) ::= "_input->getText(_localctx->start, _input->LT(-1))" + +ThisRulePropertyRef_ctxHeader(r) ::= "<! Required but unused. !>" +ThisRulePropertyRef_ctx(r) ::= "_localctx" + +ThisRulePropertyRef_parserHeader(r) ::= "<! Required but unused. !>" +ThisRulePropertyRef_parser(r) ::= "this" + +NonLocalAttrRef(s) ::= "NonLocalAttrRef(s) ((<s.ruleName; format=\"cap\">Context)getInvokingContext(<s.ruleIndex>)).<s.escapedName>" +SetNonLocalAttr(s, rhsChunks) ::= + "SetNonLocalAttr(s, rhsChunks) ((<s.ruleName; format=\"cap\">Context)getInvokingContext(<s.ruleIndex>)).<s.escapedName> = <rhsChunks>;" + +AddToLabelListHeader(a) ::= "<! Required but unused. !>" +AddToLabelList(a) ::= << +<ctx(a.label)>-><a.listName>.push_back(<labelref(a.label)>); +>> + +TokenLabelType() ::= "<file.TokenLabelType; null = {Token}> *" + +TokenDeclHeader(t) ::= "antlr4::<TokenLabelType()><t.escapedName> = nullptr" +TokenDecl(t) ::= "<! Variable Declaration !>" + +TokenTypeDeclHeader(t) ::= "<! Local Variable !>" +TokenTypeDecl(t) ::= "size_t <t.escapedName> = 0;" + +TokenListDeclHeader(t) ::= "std::vector\<antlr4::Token *> <t.escapedName>" +TokenListDecl(t) ::= "<! Variable Declaration !>" + +RuleContextDeclHeader(r) ::= "<parser.name>::<r.ctxName> *<r.escapedName> = nullptr" +RuleContextDecl(r) ::= "<! Variable Declaration !>" + +RuleContextListDeclHeader(rdecl) ::= "std::vector\<<rdecl.ctxName> *> <rdecl.escapedName>" +RuleContextListDecl(rdecl) ::= "<! Variable Declaration !>" + +ContextTokenGetterDeclHeader(t) ::= "antlr4::tree::TerminalNode *TOKEN_<t.escapedName>();" +ContextTokenGetterDecl(t) ::= << +tree::TerminalNode* <parser.name>::<t.ctx.name>::TOKEN_<t.escapedName>() { + return getToken(<parser.name>::TOKEN_<t.escapedName>, 0); +} + +>> + +ContextTokenListGetterDeclHeader(t) ::= "std::vector\<antlr4::tree::TerminalNode *> TOKEN_<t.escapedName>();" +ContextTokenListGetterDecl(t) ::= << +std::vector\<tree::TerminalNode *> <parser.name>::<t.ctx.name>::TOKEN_<t.escapedName>() { + return getTokens(<parser.name>::TOKEN_<t.escapedName>); +} + +>> + +ContextTokenListIndexedGetterDeclHeader(t) ::= "antlr4::tree::TerminalNode* TOKEN_<t.escapedName>(size_t i);" +ContextTokenListIndexedGetterDecl(t) ::= << +tree::TerminalNode* <parser.name>::<t.ctx.name>::TOKEN_<t.escapedName>(size_t i) { + return getToken(<parser.name>::TOKEN_<t.escapedName>, i); +} + +>> + +ContextRuleGetterDeclHeader(r) ::= "<r.ctxName> *<r.escapedName>();" +ContextRuleGetterDecl(r) ::= << +<! Note: ctxName is the name of the context to return, while ctx is the owning context. !> +<parser.name>::<r.ctxName>* <parser.name>::<r.ctx.name>::<r.escapedName>() { + return getRuleContext\<<parser.name>::<r.ctxName>\>(0); +} + +>> + +ContextRuleListGetterDeclHeader(r) ::= "std::vector\<<r.ctxName> *> <r.escapedName>();" +ContextRuleListGetterDecl(r) ::= << +std::vector\<<parser.name>::<r.ctxName> *> <parser.name>::<r.ctx.name>::<r.escapedName>() { + return getRuleContexts\<<parser.name>::<r.ctxName>\>(); +} + +>> + +ContextRuleListIndexedGetterDeclHeader(r) ::= "<r.ctxName>* <r.escapedName>(size_t i);" +ContextRuleListIndexedGetterDecl(r) ::= << +<parser.name>::<r.ctxName>* <parser.name>::<r.ctx.name>::<r.escapedName>(size_t i) { + return getRuleContext\<<parser.name>::<r.ctxName>\>(i); +} + +>> + +LexerRuleContext() ::= "antlr4::RuleContext" + +// The rule context name is the rule followed by a suffix; e.g. r becomes rContext. +RuleContextNameSuffix() ::= "Context" + +ImplicitTokenLabel(tokenName) ::= << +<tokenName; format = "lower">Token +>> + +ImplicitRuleLabel(ruleName) ::= "<ruleName>Context" +ImplicitSetLabel(id) ::= "_tset<id>" +ListLabelName(label) ::= "<label>" + +CaptureNextToken(d) ::= "CaptureNextToken(d) <d.varName> = _input->LT(1);" + +CaptureNextTokenTypeHeader(d) ::= "<! Required but unused. !>" +CaptureNextTokenType(d) ::= "<d.varName> = _input->LA(1);" + +ListenerDispatchMethodHeader(method) ::= << +virtual void <if (method.isEnter)>enter<else>exit<endif>Rule(antlr4::tree::ParseTreeListener *listener) override; +>> +ListenerDispatchMethod(method) ::= << +void <parser.name>::<struct.escapedName>::<if (method.isEnter)>enter<else>exit<endif>Rule(tree::ParseTreeListener *listener) { + auto parserListener = dynamic_cast\<<parser.grammarName>Listener *>(listener); + if (parserListener != nullptr) + parserListener-><if(method.isEnter)>enter<else>exit<endif><struct.derivedFromName; format="cap">(this); +} +>> + +VisitorDispatchMethodHeader(method) ::= << + +virtual std::any accept(antlr4::tree::ParseTreeVisitor *visitor) override; +>> +VisitorDispatchMethod(method) ::= << + +std::any <parser.name>::<struct.escapedName>::accept(tree::ParseTreeVisitor *visitor) { + if (auto parserVisitor = dynamic_cast\<<parser.grammarName>Visitor*>(visitor)) + return parserVisitor->visit<struct.derivedFromName; format="cap">(this); + else + return visitor->visitChildren(this); +} +>> + +AttributeDeclHeader(d) ::= "<d.type> <d.escapedName><if(d.initValue)> = <d.initValue><endif>" +AttributeDecl(d) ::= "<d.type> <d.escapedName>" + +/** If we don't know location of label def x, use this template */ +labelref(x) ::= "<if (!x.isLocal)>antlrcpp::downCast\<<x.ctx.name> *>(_localctx)-><endif><x.escapedName>" + +/** For any action chunk, what is correctly-typed context struct ptr? */ +ctx(actionChunk) ::= "antlrcpp::downCast\<<actionChunk.ctx.name> *>(_localctx)" + +// used for left-recursive rules +recRuleAltPredicate(ruleName,opPrec) ::= "precpred(_ctx, <opPrec>)" +recRuleSetReturnAction(src,name) ::= "recRuleSetReturnAction(src,name) $<name>=$<src>.<name>;" +recRuleSetStopToken() ::= "_ctx->stop = _input->LT(-1);" + +recRuleAltStartAction(ruleName, ctxName, label, isListLabel) ::= << +_localctx = _tracker.createInstance\<<ctxName>Context>(parentContext, parentState); +<if(label)> +<if(isListLabel)> +_localctx-><label>.push_back(previousContext); +<else> +_localctx-><label> = previousContext; +<endif> +<endif> +pushNewRecursionContext(_localctx, startState, Rule<ruleName; format = "cap">); +>> + +// Separate context variable to avoid frequent pointer type casts. +recRuleLabeledAltStartAction(ruleName, currentAltLabel, label, isListLabel) ::= << +auto newContext = _tracker.createInstance\<<currentAltLabel; format = "cap">Context>(_tracker.createInstance\<<ruleName; format="cap">Context>(parentContext, parentState)); +_localctx = newContext; +<if(label)> +<if(isListLabel)> +newContext-><label>.push_back(previousContext); +<else> +newContext-><label> = previousContext; +<endif> +<endif> +pushNewRecursionContext(newContext, startState, Rule<ruleName; format = "cap">); +>> + +recRuleReplaceContext(ctxName) ::= << +_localctx = _tracker.createInstance\<<ctxName>Context>(_localctx); +_ctx = _localctx; +previousContext = _localctx; +>> + +recRuleSetPrevCtx() ::= << +if (!_parseListeners.empty()) + triggerExitRuleEvent(); +previousContext = _localctx; +>> + +/** Using a type to init value map, try to init a type; if not in table + * must be an object, default value is "null". + */ +initValue(typeName) ::= << +<cppTypeInitMap.(typeName)> +>>
\ No newline at end of file diff --git a/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg new file mode 100644 index 0000000000..c2a36eb758 --- /dev/null +++ b/yql/essentials/parser/antlr_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg @@ -0,0 +1,344 @@ +/* + * [The "BSD license"] + * Copyright (c) 2016, Mike Lischke + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +codeFileExtension() ::= ".cpp" +headerFileExtension() ::= ".h" + +fileHeader(grammarFileName, ANTLRVersion, header) ::= << +<header> + +// Generated from <grammarFileName> by ANTLR <ANTLRVersion> +>> + +LexerFileHeader(file, lexer, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)> + +#pragma once + +<namedActions.preinclude> + +#include "antlr4-runtime.h" + +<namedActions.postinclude> + +<if(file.genPackage)>namespace <file.genPackage> {<endif> + +<lexer> + +<if (file.genPackage)> +} // namespace <file.genPackage> +<endif> +>> + +LexerFile(file, lexer, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)> + +<namedActions.preinclude> + +#include "<file.lexer.name>.h" + +<namedActions.postinclude> + +using namespace antlr4; + +<if (file.genPackage)>using namespace <file.genPackage>;<endif> + +<lexer> + +>> + +ParserFileHeader(file, parser, namedActions, contextSuperClass) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)> + +#pragma once + +<namedActions.preinclude> + +#include "antlr4-runtime.h" + +<namedActions.postinclude> + +<if (file.genPackage)>namespace <file.genPackage> {<endif> + +<parser> + +<if (file.genPackage)> +} // namespace <file.genPackage> +<endif> +>> + +ParserFile(file, parser, namedActions, contextSuperClass) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, namedActions.header)> + +<namedActions.preinclude> + +<if (file.genListener)>#include "<file.grammarName>Listener.h"<endif> +<if (file.genVisitor)>#include "<file.grammarName>Visitor.h"<endif> + +#include "<file.parser.name>.h" + +<namedActions.postinclude> + +using namespace antlrcpp; +<if (file.genPackage)>using namespace <file.genPackage>;<endif> +<parser> + +>> + +BaseListenerFileHeader(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +#pragma once + +<namedActions.baselistenerpreinclude> + +#include "antlr4-runtime.h" +#include "<file.grammarName>Listener.h" + +<namedActions.baselistenerpostinclude> + +<if(file.genPackage)> +namespace <file.genPackage> { +<endif> + +/** + * This class provides an empty implementation of <file.grammarName>Listener, + * which can be extended to create a listener which only needs to handle a subset + * of the available methods. + */ +class <file.exportMacro> <file.grammarName>BaseListener : public <file.grammarName>Listener { +public: +<namedActions.baselistenerdeclarations> + +<file.listenerNames: {lname | + virtual void enter<lname; format="cap">(<file.parserName>::<lname; format = "cap">Context * /*ctx*/) override { \} + virtual void exit<lname; format="cap">(<file.parserName>::<lname; format = "cap">Context * /*ctx*/) override { \} +}; separator="\n"> + + virtual void enterEveryRule(antlr4::ParserRuleContext * /*ctx*/) override { } + virtual void exitEveryRule(antlr4::ParserRuleContext * /*ctx*/) override { } + virtual void visitTerminal(antlr4::tree::TerminalNode * /*node*/) override { } + virtual void visitErrorNode(antlr4::tree::ErrorNode * /*node*/) override { } + +<if (namedActions.baselistenermembers)> +private: +<namedActions.baselistenermembers> +<endif> +}; + +<if (file.genPackage)> +} // namespace <file.genPackage> +<endif> +>> + +BaseListenerFile(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +<namedActions.baselistenerpreinclude> + +#include "<file.grammarName>BaseListener.h" + +<namedActions.baselistenerpostinclude> + +<if(file.genPackage)> +using namespace <file.genPackage>; +<endif> + +<namedActions.baselistenerdefinitions> +>> + +ListenerFileHeader(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +#pragma once + +<namedActions.listenerpreinclude> + +#include "antlr4-runtime.h" +#include "<file.parserName>.h" + +<namedActions.listenerpostinclude> + +<if(file.genPackage)> +namespace <file.genPackage> { +<endif> + +/** + * This interface defines an abstract listener for a parse tree produced by <file.parserName>. + */ +class <file.exportMacro> <file.grammarName>Listener : public antlr4::tree::ParseTreeListener { +public: +<namedActions.listenerdeclarations> + +<file.listenerNames: {lname | + virtual void enter<lname; format = "cap">(<file.parserName>::<lname; format ="cap">Context *ctx) = 0; + virtual void exit<lname; format = "cap">(<file.parserName>::<lname; format="cap">Context *ctx) = 0; +}; separator = "\n"> + +<if (namedActions.listenermembers)> +private: +<namedActions.listenermembers> +<endif> +}; + +<if(file.genPackage)> +} // namespace <file.genPackage> +<endif> +>> + +ListenerFile(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +<namedActions.listenerpreinclude> + +#include "<file.grammarName>Listener.h" + +<namedActions.listenerpostinclude> + +<if(file.genPackage)> +using namespace <file.genPackage>; +<endif> + +<namedActions.listenerdefinitions> +>> + +BaseVisitorFileHeader(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +#pragma once + +<namedActions.basevisitorpreinclude> + +#include "antlr4-runtime.h" +#include "<file.grammarName>Visitor.h" + +<namedActions.basevisitorpostinclude> + +<if(file.genPackage)> +namespace <file.genPackage> { +<endif> + +/** + * This class provides an empty implementation of <file.grammarName>Visitor, which can be + * extended to create a visitor which only needs to handle a subset of the available methods. + */ +class <file.exportMacro> <file.grammarName>BaseVisitor : public <file.grammarName>Visitor { +public: +<namedActions.basevisitordeclarations> + +<file.visitorNames: { lname | + virtual std::any visit<lname; format = "cap">(<file.parserName>::<lname; format = "cap">Context *ctx) override { + return visitChildren(ctx); + \} +}; separator="\n"> + +<if (namedActions.basevisitormembers)> +private: +<namedActions.basevisitormembers> +<endif> +}; + +<if(file.genPackage)> +} // namespace <file.genPackage> +<endif> +>> + +BaseVisitorFile(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +<namedActions.basevisitorpreinclude> + +#include "<file.grammarName>BaseVisitor.h" + +<namedActions.basevisitorpostinclude> + +<if(file.genPackage)> +using namespace <file.genPackage>; +<endif> + +<namedActions.basevisitordefinitions> + +>> + +VisitorFileHeader(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +#pragma once + +<namedActions.visitorpreinclude> + +#include "antlr4-runtime.h" +#include "<file.parserName>.h" + +<namedActions.visitorpostinclude> + +<if(file.genPackage)>namespace <file.genPackage> {<endif> + +/** + * This class defines an abstract visitor for a parse tree + * produced by <file.parserName>. + */ +class <file.exportMacro> <file.grammarName>Visitor : public antlr4::tree::AbstractParseTreeVisitor { +public: + <namedActions.visitordeclarations> + + /** + * Visit parse trees produced by <file.parserName>. + */ + <file.visitorNames: {lname | + virtual std::any visit<lname; format = "cap">(<file.parserName>::<lname; format = "cap">Context *context) = 0; + }; separator="\n"> + +<if (namedActions.visitormembers)> +private: +<namedActions.visitormembers> +<endif> +}; + +<if(file.genPackage)> +} // namespace <file.genPackage> +<endif> +>> + +VisitorFile(file, header, namedActions) ::= << +<fileHeader(file.grammarFileName, file.ANTLRVersion, header)> + +<namedActions.visitorpreinclude> + +#include "<file.grammarName>Visitor.h" + +<namedActions.visitorpostinclude> + +<if(file.genPackage)> +using namespace <file.genPackage>; +<endif> + +<namedActions.visitordefinitions> + +>>
\ No newline at end of file diff --git a/yql/essentials/parser/antlr_ast/ya.make b/yql/essentials/parser/antlr_ast/ya.make new file mode 100644 index 0000000000..d3b33a2dfa --- /dev/null +++ b/yql/essentials/parser/antlr_ast/ya.make @@ -0,0 +1,4 @@ +RECURSE( + gen +) + diff --git a/yql/essentials/parser/ya.make b/yql/essentials/parser/ya.make index 7f1b0fe700..c1cacaa6a8 100644 --- a/yql/essentials/parser/ya.make +++ b/yql/essentials/parser/ya.make @@ -1,4 +1,5 @@ RECURSE( + antlr_ast lexer_common pg_catalog pg_wrapper diff --git a/yql/essentials/sql/v1/complete/c3_engine.h b/yql/essentials/sql/v1/complete/c3_engine.h new file mode 100644 index 0000000000..8b729f880b --- /dev/null +++ b/yql/essentials/sql/v1/complete/c3_engine.h @@ -0,0 +1,116 @@ +#pragma once + +#include "sql_antlr4.h" +#include "string_util.h" + +#include <contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h> +#include <contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h> +#include <contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h> +#include <contrib/libs/antlr4-c3/src/CodeCompletionCore.hpp> + +#include <util/generic/string.h> +#include <util/generic/vector.h> + +#include <vector> +#include <unordered_set> + +namespace NSQLComplete { + + // std::vector is used to prevent copying from c3 results + struct TSuggestedToken { + TTokenId Number; + std::vector<TRuleId> ParserCallStack; + }; + + class IC3Engine { + public: + using TPtr = THolder<IC3Engine>; + + // std::unordered_set is used to prevent copying into c3 core + struct TConfig { + std::unordered_set<TTokenId> IgnoredTokens; + std::unordered_set<TRuleId> PreferredRules; + }; + + virtual TVector<TSuggestedToken> Complete(TStringBuf queryPrefix) = 0; + virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0; + virtual ~IC3Engine() = default; + }; + + template <class Lexer, class Parser> + struct TAntlrGrammar { + using TLexer = Lexer; + using TParser = Parser; + + TAntlrGrammar() = delete; + }; + + template <class G> + class TC3Engine: public IC3Engine { + public: + explicit TC3Engine(TConfig config) + : Chars() + , Lexer(&Chars) + , Tokens(&Lexer) + , Parser(&Tokens) + , CompletionCore(&Parser) + { + Lexer.removeErrorListeners(); + Parser.removeErrorListeners(); + + CompletionCore.ignoredTokens = std::move(config.IgnoredTokens); + CompletionCore.preferredRules = std::move(config.PreferredRules); + } + + TVector<TSuggestedToken> Complete(TStringBuf queryPrefix) override { + Assign(queryPrefix); + const auto caretTokenIndex = CaretTokenIndex(queryPrefix); + auto candidates = CompletionCore.collectCandidates(caretTokenIndex); + return Converted(std::move(candidates)); + } + + const antlr4::dfa::Vocabulary& GetVocabulary() const override { + return Lexer.getVocabulary(); + } + + private: + void Assign(TStringBuf queryPrefix) { + Chars.load(queryPrefix.Data(), queryPrefix.Size(), /* lenient = */ false); + Lexer.reset(); + Tokens.setTokenSource(&Lexer); + + Tokens.fill(); + } + + size_t CaretTokenIndex(TStringBuf queryPrefix) { + const auto tokensCount = Tokens.size(); + if (2 <= tokensCount && !LastWord(queryPrefix).Empty()) { + return tokensCount - 2; + } + return tokensCount - 1; + } + + static TVector<TSuggestedToken> Converted(c3::CandidatesCollection candidates) { + TVector<TSuggestedToken> converted; + for (const auto& [token, _] : candidates.tokens) { + std::vector<TRuleId> parserCallStack; + + if ( + auto rules = candidates.rules.find(token); + rules != std::end(candidates.rules)) { + parserCallStack = std::move(rules->second.ruleList); + } + + converted.emplace_back(token, std::move(parserCallStack)); + } + return converted; + } + + antlr4::ANTLRInputStream Chars; + G::TLexer Lexer; + antlr4::BufferedTokenStream Tokens; + G::TParser Parser; + c3::CodeCompletionCore CompletionCore; + }; + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_antlr4.cpp b/yql/essentials/sql/v1/complete/sql_antlr4.cpp new file mode 100644 index 0000000000..33c847f3e2 --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_antlr4.cpp @@ -0,0 +1,116 @@ +#include "sql_antlr4.h" + +#include <yql/essentials/sql/v1/format/sql_format.h> + +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h> + +#define RULE_(mode, name) NALA##mode##Antlr4::SQLv1Antlr4Parser::Rule##name + +#define RULE(name) RULE_(Default, name) + +#define STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(name) \ + static_assert(RULE_(Default, name) == RULE_(Ansi, name)) + +namespace NSQLComplete { + + class TSqlGrammar: public ISqlGrammar { + public: + TSqlGrammar(bool isAnsiLexer) + : Vocabulary(GetVocabulary(isAnsiLexer)) + , AllTokens(ComputeAllTokens()) + , KeywordTokens(ComputeKeywordTokens()) + { + } + + const antlr4::dfa::Vocabulary& GetVocabulary() const override { + return *Vocabulary; + } + + const std::unordered_set<TTokenId>& GetAllTokens() const override { + return AllTokens; + } + + const std::unordered_set<TTokenId>& GetKeywordTokens() const override { + return KeywordTokens; + } + + const TVector<TRuleId>& GetKeywordRules() const override { + static const TVector<TRuleId> KeywordRules = { + RULE(Keyword), + RULE(Keyword_expr_uncompat), + RULE(Keyword_table_uncompat), + RULE(Keyword_select_uncompat), + RULE(Keyword_alter_uncompat), + RULE(Keyword_in_uncompat), + RULE(Keyword_window_uncompat), + RULE(Keyword_hint_uncompat), + RULE(Keyword_as_compat), + RULE(Keyword_compat), + }; + + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_expr_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_table_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_select_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_alter_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_in_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_window_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_hint_uncompat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_as_compat); + STATIC_ASSERT_RULE_ID_MODE_INDEPENDENT(Keyword_compat); + + return KeywordRules; + } + + private: + static const antlr4::dfa::Vocabulary* GetVocabulary(bool isAnsiLexer) { + if (isAnsiLexer) { // Taking a reference is okay as vocabulary storage is static + return &NALAAnsiAntlr4::SQLv1Antlr4Parser(nullptr).getVocabulary(); + } + return &NALADefaultAntlr4::SQLv1Antlr4Parser(nullptr).getVocabulary(); + } + + std::unordered_set<TTokenId> ComputeAllTokens() { + const auto& vocabulary = GetVocabulary(); + + std::unordered_set<TTokenId> allTokens; + + for (size_t type = 1; type <= vocabulary.getMaxTokenType(); ++type) { + allTokens.emplace(type); + } + + return allTokens; + } + + std::unordered_set<TTokenId> ComputeKeywordTokens() { + const auto& vocabulary = GetVocabulary(); + const auto keywords = NSQLFormat::GetKeywords(); + + auto keywordTokens = GetAllTokens(); + std::erase_if(keywordTokens, [&](TTokenId token) { + return !keywords.contains(vocabulary.getSymbolicName(token)); + }); + keywordTokens.erase(TOKEN_EOF); + + return keywordTokens; + } + + const antlr4::dfa::Vocabulary* Vocabulary; + const std::unordered_set<TTokenId> AllTokens; + const std::unordered_set<TTokenId> KeywordTokens; + }; + + const ISqlGrammar& GetSqlGrammar(bool isAnsiLexer) { + const static TSqlGrammar DefaultSqlGrammar(/* isAnsiLexer = */ false); + const static TSqlGrammar AnsiSqlGrammar(/* isAnsiLexer = */ true); + + if (isAnsiLexer) { + return AnsiSqlGrammar; + } + return DefaultSqlGrammar; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_antlr4.h b/yql/essentials/sql/v1/complete/sql_antlr4.h new file mode 100644 index 0000000000..3d3c4c024a --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_antlr4.h @@ -0,0 +1,28 @@ +#pragma once + +#include "sql_syntax.h" + +#include <contrib/libs/antlr4_cpp_runtime/src/Token.h> +#include <contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h> + +#include <unordered_set> + +namespace NSQLComplete { + + using TTokenId = size_t; + using TRuleId = size_t; + + constexpr TTokenId TOKEN_EOF = antlr4::Token::EOF; + + class ISqlGrammar { + public: + virtual const antlr4::dfa::Vocabulary& GetVocabulary() const = 0; + virtual const std::unordered_set<TTokenId>& GetAllTokens() const = 0; + virtual const std::unordered_set<TTokenId>& GetKeywordTokens() const = 0; + virtual const TVector<TRuleId>& GetKeywordRules() const = 0; + virtual ~ISqlGrammar() = default; + }; + + const ISqlGrammar& GetSqlGrammar(bool isAnsiLexer); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete.cpp b/yql/essentials/sql/v1/complete/sql_complete.cpp new file mode 100644 index 0000000000..2a16a250e5 --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_complete.cpp @@ -0,0 +1,89 @@ +#include "sql_complete.h" + +#include "sql_context.h" +#include "string_util.h" + +#include <util/generic/algorithm.h> +#include <util/charset/utf8.h> + +namespace NSQLComplete { + + class TSqlCompletionEngine: public ISqlCompletionEngine { + public: + TSqlCompletionEngine() + : ContextInference(MakeSqlContextInference()) + { + } + + TCompletion Complete(TCompletionInput input) { + auto prefix = input.Text.Head(input.CursorPosition); + auto completedToken = GetCompletedToken(prefix); + + auto context = ContextInference->Analyze(input); + + TVector<TCandidate> candidates; + EnrichWithKeywords(candidates, context.Keywords); + + FilterByContent(candidates, completedToken.Content); + + RankingSort(candidates); + + return { + .CompletedToken = std::move(completedToken), + .Candidates = std::move(candidates), + }; + } + + private: + TCompletedToken GetCompletedToken(TStringBuf prefix) { + return { + .Content = LastWord(prefix), + .SourcePosition = LastWordIndex(prefix), + }; + } + + void EnrichWithKeywords(TVector<TCandidate>& candidates, TVector<TString> keywords) { + for (auto keyword : keywords) { + candidates.push_back({ + .Kind = ECandidateKind::Keyword, + .Content = std::move(keyword), + }); + } + } + + void FilterByContent(TVector<TCandidate>& candidates, TStringBuf prefix) { + const auto lowerPrefix = ToLowerUTF8(prefix); + auto removed = std::ranges::remove_if(candidates, [&](const auto& candidate) { + return !ToLowerUTF8(candidate.Content).StartsWith(lowerPrefix); + }); + candidates.erase(std::begin(removed), std::end(removed)); + } + + void RankingSort(TVector<TCandidate>& candidates) { + Sort(candidates, [](const TCandidate& lhs, const TCandidate& rhs) { + return std::tie(lhs.Kind, lhs.Content) < std::tie(rhs.Kind, rhs.Content); + }); + } + + ISqlContextInference::TPtr ContextInference; + }; + + ISqlCompletionEngine::TPtr MakeSqlCompletionEngine() { + return ISqlCompletionEngine::TPtr(new TSqlCompletionEngine()); + } + +} // namespace NSQLComplete + +template <> +void Out<NSQLComplete::ECandidateKind>(IOutputStream& out, NSQLComplete::ECandidateKind kind) { + switch (kind) { + case NSQLComplete::ECandidateKind::Keyword: + out << "Keyword"; + break; + } +} + +template <> +void Out<NSQLComplete::TCandidate>(IOutputStream& out, const NSQLComplete::TCandidate& candidate) { + out << "(" << candidate.Kind << ": " << candidate.Content << ")"; +} diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h new file mode 100644 index 0000000000..99e74cce7a --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_complete.h @@ -0,0 +1,44 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/vector.h> + +namespace NSQLComplete { + + struct TCompletionInput { + TStringBuf Text; + size_t CursorPosition = Text.length(); + }; + + struct TCompletedToken { + TStringBuf Content; + size_t SourcePosition; + }; + + enum class ECandidateKind { + Keyword, + }; + + struct TCandidate { + ECandidateKind Kind; + TString Content; + + friend bool operator==(const TCandidate& lhs, const TCandidate& rhs) = default; + }; + + struct TCompletion { + TCompletedToken CompletedToken; + TVector<TCandidate> Candidates; + }; + + class ISqlCompletionEngine { + public: + using TPtr = THolder<ISqlCompletionEngine>; + + virtual TCompletion Complete(TCompletionInput input) = 0; + virtual ~ISqlCompletionEngine() = default; + }; + + ISqlCompletionEngine::TPtr MakeSqlCompletionEngine(); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp new file mode 100644 index 0000000000..e0a012f9f6 --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -0,0 +1,323 @@ +#include "sql_complete.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NSQLComplete; + +Y_UNIT_TEST_SUITE(SqlCompleteTests) { + using ECandidateKind::Keyword; + + TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) { + return engine->Complete({prefix}).Candidates; + } + + Y_UNIT_TEST(Beginning) { + TVector<TCandidate> expected = { + {Keyword, "ALTER"}, + {Keyword, "ANALYZE"}, + {Keyword, "BACKUP"}, + {Keyword, "BATCH"}, + {Keyword, "COMMIT"}, + {Keyword, "CREATE"}, + {Keyword, "DECLARE"}, + {Keyword, "DEFINE"}, + {Keyword, "DELETE"}, + {Keyword, "DISCARD"}, + {Keyword, "DO"}, + {Keyword, "DROP"}, + {Keyword, "EVALUATE"}, + {Keyword, "EXPLAIN"}, + {Keyword, "EXPORT"}, + {Keyword, "FOR"}, + {Keyword, "FROM"}, + {Keyword, "GRANT"}, + {Keyword, "IF"}, + {Keyword, "IMPORT"}, + {Keyword, "INSERT"}, + {Keyword, "PARALLEL"}, + {Keyword, "PRAGMA"}, + {Keyword, "PROCESS"}, + {Keyword, "REDUCE"}, + {Keyword, "REPLACE"}, + {Keyword, "RESTORE"}, + {Keyword, "REVOKE"}, + {Keyword, "ROLLBACK"}, + {Keyword, "SELECT"}, + {Keyword, "SHOW"}, + {Keyword, "UPDATE"}, + {Keyword, "UPSERT"}, + {Keyword, "USE"}, + {Keyword, "VALUES"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {";"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"; "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" ; "}), expected); + } + + Y_UNIT_TEST(Alter) { + TVector<TCandidate> expected = { + {Keyword, "ASYNC"}, + {Keyword, "BACKUP"}, + {Keyword, "DATABASE"}, + {Keyword, "EXTERNAL"}, + {Keyword, "GROUP"}, + {Keyword, "OBJECT"}, + {Keyword, "RESOURCE"}, + {Keyword, "SEQUENCE"}, + {Keyword, "TABLE"}, + {Keyword, "TABLESTORE"}, + {Keyword, "TOPIC"}, + {Keyword, "TRANSFER"}, + {Keyword, "USER"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected); + } + + Y_UNIT_TEST(Create) { + TVector<TCandidate> expected = { + {Keyword, "ASYNC"}, + {Keyword, "BACKUP"}, + {Keyword, "EXTERNAL"}, + {Keyword, "GROUP"}, + {Keyword, "OBJECT"}, + {Keyword, "OR"}, + {Keyword, "RESOURCE"}, + {Keyword, "TABLE"}, + {Keyword, "TABLESTORE"}, + {Keyword, "TEMP"}, + {Keyword, "TEMPORARY"}, + {Keyword, "TOPIC"}, + {Keyword, "TRANSFER"}, + {Keyword, "USER"}, + {Keyword, "VIEW"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected); + } + + Y_UNIT_TEST(Delete) { + TVector<TCandidate> expected = { + {Keyword, "FROM"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected); + } + + Y_UNIT_TEST(Drop) { + TVector<TCandidate> expected = { + {Keyword, "ASYNC"}, + {Keyword, "BACKUP"}, + {Keyword, "EXTERNAL"}, + {Keyword, "GROUP"}, + {Keyword, "OBJECT"}, + {Keyword, "RESOURCE"}, + {Keyword, "TABLE"}, + {Keyword, "TABLESTORE"}, + {Keyword, "TOPIC"}, + {Keyword, "TRANSFER"}, + {Keyword, "USER"}, + {Keyword, "VIEW"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected); + } + + Y_UNIT_TEST(Explain) { + TVector<TCandidate> expected = { + {Keyword, "ALTER"}, + {Keyword, "ANALYZE"}, + {Keyword, "BACKUP"}, + {Keyword, "BATCH"}, + {Keyword, "COMMIT"}, + {Keyword, "CREATE"}, + {Keyword, "DECLARE"}, + {Keyword, "DEFINE"}, + {Keyword, "DELETE"}, + {Keyword, "DISCARD"}, + {Keyword, "DO"}, + {Keyword, "DROP"}, + {Keyword, "EVALUATE"}, + {Keyword, "EXPORT"}, + {Keyword, "FOR"}, + {Keyword, "FROM"}, + {Keyword, "GRANT"}, + {Keyword, "IF"}, + {Keyword, "IMPORT"}, + {Keyword, "INSERT"}, + {Keyword, "PARALLEL"}, + {Keyword, "PRAGMA"}, + {Keyword, "PROCESS"}, + {Keyword, "QUERY"}, + {Keyword, "REDUCE"}, + {Keyword, "REPLACE"}, + {Keyword, "RESTORE"}, + {Keyword, "REVOKE"}, + {Keyword, "ROLLBACK"}, + {Keyword, "SELECT"}, + {Keyword, "SHOW"}, + {Keyword, "UPDATE"}, + {Keyword, "UPSERT"}, + {Keyword, "USE"}, + {Keyword, "VALUES"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected); + } + + Y_UNIT_TEST(Grant) { + TVector<TCandidate> expected = { + {Keyword, "ALL"}, + {Keyword, "ALTER"}, + {Keyword, "CONNECT"}, + {Keyword, "CREATE"}, + {Keyword, "DESCRIBE"}, + {Keyword, "DROP"}, + {Keyword, "ERASE"}, + {Keyword, "FULL"}, + {Keyword, "GRANT"}, + {Keyword, "INSERT"}, + {Keyword, "LIST"}, + {Keyword, "MANAGE"}, + {Keyword, "MODIFY"}, + {Keyword, "REMOVE"}, + {Keyword, "SELECT"}, + {Keyword, "UPDATE"}, + {Keyword, "USE"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected); + } + + Y_UNIT_TEST(Insert) { + TVector<TCandidate> expected = { + {Keyword, "INTO"}, + {Keyword, "OR"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected); + } + + Y_UNIT_TEST(Pragma) { + TVector<TCandidate> expected = { + {Keyword, "ANSI"}, + {Keyword, "CALLABLE"}, + {Keyword, "DICT"}, + {Keyword, "ENUM"}, + {Keyword, "FLOW"}, + {Keyword, "LIST"}, + {Keyword, "OPTIONAL"}, + {Keyword, "RESOURCE"}, + {Keyword, "SET"}, + {Keyword, "STRUCT"}, + {Keyword, "TAGGED"}, + {Keyword, "TUPLE"}, + {Keyword, "VARIANT"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA "}), expected); + } + + Y_UNIT_TEST(Select) { + TVector<TCandidate> expected = { + {Keyword, "ALL"}, + {Keyword, "BITCAST"}, + {Keyword, "CALLABLE"}, + {Keyword, "CASE"}, + {Keyword, "CAST"}, + {Keyword, "CURRENT_DATE"}, + {Keyword, "CURRENT_TIME"}, + {Keyword, "CURRENT_TIMESTAMP"}, + {Keyword, "DICT"}, + {Keyword, "DISTINCT"}, + {Keyword, "EMPTY_ACTION"}, + {Keyword, "ENUM"}, + {Keyword, "EXISTS"}, + {Keyword, "FALSE"}, + {Keyword, "FLOW"}, + {Keyword, "JSON_EXISTS"}, + {Keyword, "JSON_QUERY"}, + {Keyword, "JSON_VALUE"}, + {Keyword, "LIST"}, + {Keyword, "NOT"}, + {Keyword, "NULL"}, + {Keyword, "OPTIONAL"}, + {Keyword, "RESOURCE"}, + {Keyword, "SET"}, + {Keyword, "STREAM"}, + {Keyword, "STRUCT"}, + {Keyword, "TAGGED"}, + {Keyword, "TRUE"}, + {Keyword, "TUPLE"}, + {Keyword, "VARIANT"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected); + } + + Y_UNIT_TEST(Upsert) { + TVector<TCandidate> expected = { + {Keyword, "INTO"}, + {Keyword, "OBJECT"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected); + } + + Y_UNIT_TEST(UTF8Wide) { + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0); + } + + Y_UNIT_TEST(WordBreak) { + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT ("}).size(), 28); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT (1)"}).size(), 30); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT 1;"}).size(), 35); + } + + Y_UNIT_TEST(Typing) { + const auto queryUtf16 = TUtf16String::FromUtf8( + "SELECT \n" + " 123467, \"Hello, {name}! 编码\"}, \n" + " (1 + (5 * 1 / 0)), MIN(identifier), \n" + " Bool(field), Math::Sin(var) \n" + "FROM `local/test/space/table` JOIN test;"); + + auto engine = MakeSqlCompletionEngine(); + + for (std::size_t size = 0; size <= queryUtf16.size(); ++size) { + const TWtringBuf prefixUtf16(queryUtf16, 0, size); + auto completion = engine->Complete({TString::FromUtf16(prefixUtf16)}); + Y_DO_NOT_OPTIMIZE_AWAY(completion); + } + } + + Y_UNIT_TEST(CaseInsensitivity) { + TVector<TCandidate> expected = { + {Keyword, "SELECT"}, + }; + + auto engine = MakeSqlCompletionEngine(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected); + } +} // Y_UNIT_TEST_SUITE(SqlCompleteTests) diff --git a/yql/essentials/sql/v1/complete/sql_context.cpp b/yql/essentials/sql/v1/complete/sql_context.cpp new file mode 100644 index 0000000000..18f676e40b --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_context.cpp @@ -0,0 +1,123 @@ +#include "sql_context.h" + +#include "c3_engine.h" +#include "sql_syntax.h" + +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h> + +#include <util/generic/algorithm.h> +#include <util/stream/output.h> + +namespace NSQLComplete { + + template <bool IsAnsiLexer> + class TSpecializedSqlContextInference: public ISqlContextInference { + private: + using TDefaultYQLGrammar = TAntlrGrammar< + NALADefaultAntlr4::SQLv1Antlr4Lexer, + NALADefaultAntlr4::SQLv1Antlr4Parser>; + + using TAnsiYQLGrammar = TAntlrGrammar< + NALAAnsiAntlr4::SQLv1Antlr4Lexer, + NALAAnsiAntlr4::SQLv1Antlr4Parser>; + + using G = std::conditional_t< + IsAnsiLexer, + TAnsiYQLGrammar, + TDefaultYQLGrammar>; + + public: + TSpecializedSqlContextInference() + : Grammar(&GetSqlGrammar(IsAnsiLexer)) + , C3(ComputeC3Config()) + { + } + + TCompletionContext Analyze(TCompletionInput input) override { + auto prefix = input.Text.Head(input.CursorPosition); + auto tokens = C3.Complete(prefix); + FilterIdKeywords(tokens); + return { + .Keywords = SiftedKeywords(tokens), + }; + } + + private: + IC3Engine::TConfig ComputeC3Config() { + return { + .IgnoredTokens = ComputeIgnoredTokens(), + .PreferredRules = ComputePreferredRules(), + }; + } + + std::unordered_set<TTokenId> ComputeIgnoredTokens() { + auto ignoredTokens = Grammar->GetAllTokens(); + for (auto keywordToken : Grammar->GetKeywordTokens()) { + ignoredTokens.erase(keywordToken); + } + return ignoredTokens; + } + + std::unordered_set<TRuleId> ComputePreferredRules() { + const auto& keywordRules = Grammar->GetKeywordRules(); + + std::unordered_set<TRuleId> preferredRules; + preferredRules.insert(std::begin(keywordRules), std::end(keywordRules)); + return preferredRules; + } + + void FilterIdKeywords(TVector<TSuggestedToken>& tokens) { + const auto& keywordRules = Grammar->GetKeywordRules(); + auto [first, last] = std::ranges::remove_if(tokens, [&](const TSuggestedToken& token) { + return AnyOf(token.ParserCallStack, [&](TRuleId rule) { + return Find(keywordRules, rule) != std::end(keywordRules); + }); + }); + tokens.erase(first, last); + } + + TVector<TString> SiftedKeywords(const TVector<TSuggestedToken>& tokens) { + const auto& vocabulary = Grammar->GetVocabulary(); + const auto& keywordTokens = Grammar->GetKeywordTokens(); + + TVector<TString> keywords; + for (const auto& token : tokens) { + if (keywordTokens.contains(token.Number)) { + keywords.emplace_back(vocabulary.getDisplayName(token.Number)); + } + } + return keywords; + } + + const ISqlGrammar* Grammar; + TC3Engine<G> C3; + }; + + class TSqlContextInference: public ISqlContextInference { + public: + TCompletionContext Analyze(TCompletionInput input) override { + auto isAnsiLexer = IsAnsiQuery(TString(input.Text)); + auto& engine = GetSpecializedEngine(isAnsiLexer); + return engine.Analyze(std::move(input)); + } + + private: + ISqlContextInference& GetSpecializedEngine(bool isAnsiLexer) { + if (isAnsiLexer) { + return AnsiEngine; + } + return DefaultEngine; + } + + TSpecializedSqlContextInference</* IsAnsiLexer = */ false> DefaultEngine; + TSpecializedSqlContextInference</* IsAnsiLexer = */ true> AnsiEngine; + }; + + ISqlContextInference::TPtr MakeSqlContextInference() { + return TSqlContextInference::TPtr(new TSqlContextInference()); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_context.h b/yql/essentials/sql/v1/complete/sql_context.h new file mode 100644 index 0000000000..bc3b8d4840 --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_context.h @@ -0,0 +1,23 @@ +#pragma once + +#include "sql_complete.h" + +#include <util/generic/string.h> + +namespace NSQLComplete { + + struct TCompletionContext { + TVector<TString> Keywords; + }; + + class ISqlContextInference { + public: + using TPtr = THolder<ISqlContextInference>; + + virtual TCompletionContext Analyze(TCompletionInput input) = 0; + virtual ~ISqlContextInference() = default; + }; + + ISqlContextInference::TPtr MakeSqlContextInference(); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_syntax.cpp b/yql/essentials/sql/v1/complete/sql_syntax.cpp new file mode 100644 index 0000000000..ba5a08d371 --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_syntax.cpp @@ -0,0 +1,19 @@ +#include "sql_syntax.h" + +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/sql/settings/translation_settings.h> + +namespace NSQLComplete { + + using NSQLTranslation::ParseTranslationSettings; + using NSQLTranslation::TTranslationSettings; + using NYql::TIssues; + + bool IsAnsiQuery(const TString& query) { + TTranslationSettings settings; + TIssues issues; + ParseTranslationSettings(query, settings, issues); + return settings.AnsiLexer; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_syntax.h b/yql/essentials/sql/v1/complete/sql_syntax.h new file mode 100644 index 0000000000..f03cbc9fb9 --- /dev/null +++ b/yql/essentials/sql/v1/complete/sql_syntax.h @@ -0,0 +1,10 @@ +#pragma once + +#include <util/generic/fwd.h> + +namespace NSQLComplete { + + // Permits invalid special comments + bool IsAnsiQuery(const TString& query); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/string_util.cpp b/yql/essentials/sql/v1/complete/string_util.cpp new file mode 100644 index 0000000000..12a6701065 --- /dev/null +++ b/yql/essentials/sql/v1/complete/string_util.cpp @@ -0,0 +1,29 @@ +#include "string_util.h" + +#include <util/generic/strbuf.h> + +namespace NSQLComplete { + + bool IsWordBoundary(char ch) { // Is optimized into table lookup by clang + for (size_t i = 0; i < sizeof(WordBreakCharacters) - 1; ++i) { + if (WordBreakCharacters[i] == ch) { + return true; + } + } + return false; + } + + size_t LastWordIndex(TStringBuf text) { + for (auto it = std::rbegin(text); it != std::rend(text); std::advance(it, 1)) { + if (IsWordBoundary(*it)) { + return std::distance(it, std::rend(text)); + } + } + return 0; + } + + TStringBuf LastWord(TStringBuf text) { + return text.SubStr(LastWordIndex(text)); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/string_util.h b/yql/essentials/sql/v1/complete/string_util.h new file mode 100644 index 0000000000..bafc578d82 --- /dev/null +++ b/yql/essentials/sql/v1/complete/string_util.h @@ -0,0 +1,17 @@ +#pragma once + +#include <util/charset/unidata.h> + +#include <string_view> + +namespace NSQLComplete { + + static const char WordBreakCharacters[] = " \t\v\f\a\b\r\n`~!@#$%^&*-=+[](){}\\|;:'\".,<>/?"; + + bool IsWordBoundary(char ch); + + size_t LastWordIndex(TStringBuf text); + + TStringBuf LastWord(TStringBuf text); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/string_util_ut.cpp b/yql/essentials/sql/v1/complete/string_util_ut.cpp new file mode 100644 index 0000000000..ca3ed546a3 --- /dev/null +++ b/yql/essentials/sql/v1/complete/string_util_ut.cpp @@ -0,0 +1,21 @@ +#include "string_util.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NSQLComplete; + +Y_UNIT_TEST_SUITE(StringUtilTest) { + Y_UNIT_TEST(Blank) { + UNIT_ASSERT_VALUES_EQUAL(LastWord(""), ""); + UNIT_ASSERT_VALUES_EQUAL(LastWord(" "), ""); + UNIT_ASSERT_VALUES_EQUAL(LastWord(" "), ""); + UNIT_ASSERT_VALUES_EQUAL(LastWord(" "), ""); + } + + Y_UNIT_TEST(Space) { + UNIT_ASSERT_VALUES_EQUAL(LastWord("two "), ""); + UNIT_ASSERT_VALUES_EQUAL(LastWord("one two "), ""); + UNIT_ASSERT_VALUES_EQUAL(LastWord("two"), "two"); + UNIT_ASSERT_VALUES_EQUAL(LastWord("one two"), "two"); + } +} // Y_UNIT_TEST_SUITE(StringUtilTest) diff --git a/yql/essentials/sql/v1/complete/ut/ya.make b/yql/essentials/sql/v1/complete/ut/ya.make new file mode 100644 index 0000000000..91f7da1361 --- /dev/null +++ b/yql/essentials/sql/v1/complete/ut/ya.make @@ -0,0 +1,8 @@ +UNITTEST_FOR(yql/essentials/sql/v1/complete) + +SRCS( + sql_complete_ut.cpp + string_util_ut.cpp +) + +END() diff --git a/yql/essentials/sql/v1/complete/ya.make b/yql/essentials/sql/v1/complete/ya.make new file mode 100644 index 0000000000..70189e5f50 --- /dev/null +++ b/yql/essentials/sql/v1/complete/ya.make @@ -0,0 +1,24 @@ +LIBRARY() + +SRCS( + sql_antlr4.cpp + sql_complete.cpp + sql_context.cpp + sql_syntax.cpp + string_util.cpp +) + +PEERDIR( + contrib/libs/antlr4_cpp_runtime + contrib/libs/antlr4-c3 + yql/essentials/sql/settings + yql/essentials/sql/v1/format + yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4 + yql/essentials/parser/antlr_ast/gen/v1_antlr4 +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/sql/v1/ya.make b/yql/essentials/sql/v1/ya.make index 08fd499b3f..1d2105f6fa 100644 --- a/yql/essentials/sql/v1/ya.make +++ b/yql/essentials/sql/v1/ya.make @@ -56,6 +56,7 @@ GENERATE_ENUM_SERIALIZATION(sql_call_param.h) END() RECURSE( + complete format lexer perf |