diff options
| author | vitya-smirnov <[email protected]> | 2025-10-06 09:00:56 +0300 |
|---|---|---|
| committer | vitya-smirnov <[email protected]> | 2025-10-06 09:18:09 +0300 |
| commit | 96bddf8336a6ca765ee3141049197b8bff1810b5 (patch) | |
| tree | eec486eb50dbb0736245f9f4c12ca387162a3707 /yql/essentials/parser | |
| parent | ff0062834bf261b420f838c43effdf5d49beafd5 (diff) | |
YQL-20116: Introduce ANTLR4 ambiguity detection
- Ignored 2 known ambiguities.
- Tested tools.
commit_hash:9e29bb2f876dabc68293b3e5c26a470d373506ae
Diffstat (limited to 'yql/essentials/parser')
| -rw-r--r-- | yql/essentials/parser/common/antlr4/error_listener.cpp | 68 | ||||
| -rw-r--r-- | yql/essentials/parser/common/antlr4/error_listener.h | 12 | ||||
| -rw-r--r-- | yql/essentials/parser/common/error.cpp | 36 | ||||
| -rw-r--r-- | yql/essentials/parser/common/error.h | 22 | ||||
| -rw-r--r-- | yql/essentials/parser/common/issue.h | 6 | ||||
| -rw-r--r-- | yql/essentials/parser/common/ya.make | 1 | ||||
| -rw-r--r-- | yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp | 11 | ||||
| -rw-r--r-- | yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h | 20 | ||||
| -rw-r--r-- | yql/essentials/parser/proto_ast/gen/jsonpath/ya.make | 4 |
9 files changed, 131 insertions, 49 deletions
diff --git a/yql/essentials/parser/common/antlr4/error_listener.cpp b/yql/essentials/parser/common/antlr4/error_listener.cpp index c8d72abc949..73c14175314 100644 --- a/yql/essentials/parser/common/antlr4/error_listener.cpp +++ b/yql/essentials/parser/common/antlr4/error_listener.cpp @@ -1,10 +1,27 @@ #include "error_listener.h" +#include <yql/essentials/core/issue/yql_issue.h> + +#include <util/generic/vector.h> +#include <util/string/builder.h> +#include <util/string/join.h> + namespace antlr4 { - YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error) + TVector<size_t> ToVector(const antlrcpp::BitSet& ambigAlts) { + TVector<size_t> result; + for (size_t i = 0; i < ambigAlts.size(); ++i) { + if (ambigAlts.test(i)) { + result.push_back(i); + } + } + return result; + } + + YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error, bool isAmbiguityError) : Errors_(errors) , Error_(error) + , IsAmbiguityError_(isAmbiguityError) { } @@ -16,4 +33,53 @@ namespace antlr4 { Errors_->Error(line, charPositionInLine, msg.c_str()); } + void YqlErrorListener::reportAmbiguity( + Parser* recognizer, + const dfa::DFA& dfa, + size_t startIndex, + size_t stopIndex, + bool exact, + const antlrcpp::BitSet& ambigAlts, + atn::ATNConfigSet* configs) + { + Y_UNUSED(configs); + + size_t ruleIndex = dfa.atnStartState->ruleIndex; + std::string_view ruleName = recognizer->getRuleNames()[ruleIndex]; + + if (// FIXME(YQL-20410): It is a known ambiguity, remove it when + // an expression (x NOT NULL) is a syntax error. + ruleName == "xor_subexpr" || + // Known ambiguity, on ANTLR3 syntactic predicates were used. + ruleName == "neq_subexpr") { + return; + } + + TokenStream* tokens = recognizer->getTokenStream(); + Token* start = tokens->get(startIndex); + Token* stop = tokens->get(stopIndex); + + TString alternatives = JoinSeq(", ", ToVector(ambigAlts)); + + NYql::TPosition startPos(start->getCharPositionInLine(), start->getLine(), "unknown"); + NYql::TPosition stopPos(stop->getCharPositionInLine(), stop->getLine(), "unknown"); + + TString message = TStringBuilder() + << "An" << (exact ? " exactly " : " ") + << "ambiguous decision " << dfa.decision + << " at rule '" << ruleName << "'" + << " with conflicted alternatives {" << alternatives << "}"; + + NYql::TIssue issue(std::move(startPos), std::move(stopPos), std::move(message)); + + if (IsAmbiguityError_) { + *Error_ = true; + issue.SetCode(NYql::UNEXPECTED_ERROR, NYql::TSeverityIds::S_FATAL); + } else { + issue.SetCode(NYql::TIssuesIds::YQL_SYNTAX_AMBIGUITY, NYql::TSeverityIds::S_WARNING); + } + + Errors_->Report(std::move(issue)); + } + } // namespace antlr4 diff --git a/yql/essentials/parser/common/antlr4/error_listener.h b/yql/essentials/parser/common/antlr4/error_listener.h index b78e1563cd7..3d8f6af8a6b 100644 --- a/yql/essentials/parser/common/antlr4/error_listener.h +++ b/yql/essentials/parser/common/antlr4/error_listener.h @@ -9,14 +9,24 @@ namespace antlr4 { class ANTLR4CPP_PUBLIC YqlErrorListener: public BaseErrorListener { NAST::IErrorCollector* Errors_; bool* Error_; + const bool IsAmbiguityError_; public: - YqlErrorListener(NAST::IErrorCollector* errors, bool* error); + YqlErrorListener(NAST::IErrorCollector* errors, bool* error, bool isAmbiguityError = false); virtual void syntaxError( Recognizer* recognizer, Token* offendingSymbol, size_t line, size_t charPositionInLine, const std::string& msg, std::exception_ptr e) override; + + void reportAmbiguity( + Parser* recognizer, + const dfa::DFA& dfa, + size_t startIndex, + size_t stopIndex, + bool exact, + const antlrcpp::BitSet& ambigAlts, + atn::ATNConfigSet* configs) override; }; } // namespace antlr4 diff --git a/yql/essentials/parser/common/error.cpp b/yql/essentials/parser/common/error.cpp index 9fad2892213..b6b087631c7 100644 --- a/yql/essentials/parser/common/error.cpp +++ b/yql/essentials/parser/common/error.cpp @@ -14,35 +14,29 @@ namespace NAST { } void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) { - if (NumErrors_ + 1 == MaxErrors_) { - AddError(0, 0, "Too many errors"); - ++NumErrors_; - } - - if (NumErrors_ >= MaxErrors_) { - ythrow TTooManyErrors() << "Too many errors"; - } - + GuardTooManyErrors(); AddError(line, col, message); ++NumErrors_; } - TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors) - : IErrorCollector(maxErrors) - , Err(err) - , Name(name) - { + void IErrorCollector::Report(NYql::TIssue&& issue) { + GuardTooManyErrors(); + bool isError = issue.GetSeverity() >= NYql::TSeverityIds::S_WARNING; + AddIssue(std::forward<NYql::TIssue>(issue)); + if (isError) { + ++NumErrors_; + } } - TErrorOutput::~TErrorOutput() - { - } + void IErrorCollector::GuardTooManyErrors() { + if (NumErrors_ + 1 == MaxErrors_) { + AddError(0, 0, "Too many errors"); + ++NumErrors_; + } - void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) { - if (!Name.empty()) { - Err << "Query " << Name << ": "; + if (NumErrors_ >= MaxErrors_) { + ythrow TTooManyErrors() << "Too many errors"; } - Err << "Line " << line << " column " << col << " error: " << message; } } // namespace NAST diff --git a/yql/essentials/parser/common/error.h b/yql/essentials/parser/common/error.h index 966d996381e..81a920a2809 100644 --- a/yql/essentials/parser/common/error.h +++ b/yql/essentials/parser/common/error.h @@ -1,5 +1,7 @@ #pragma once +#include <yql/essentials/public/issue/yql_issue.h> + #include <util/generic/yexception.h> #include <util/generic/fwd.h> @@ -18,25 +20,19 @@ namespace NAST { // throws TTooManyErrors void Error(ui32 line, ui32 col, const TString& message); + // throws TTooManyErrors + void Report(NYql::TIssue&& issue); + private: + void GuardTooManyErrors(); + virtual void AddError(ui32 line, ui32 col, const TString& message) = 0; + virtual void AddIssue(NYql::TIssue&& issue) = 0; + protected: const size_t MaxErrors_; size_t NumErrors_; }; - class TErrorOutput: public IErrorCollector { - public: - TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors); - virtual ~TErrorOutput(); - - private: - void AddError(ui32 line, ui32 col, const TString& message) override; - - public: - IOutputStream& Err; - TString Name; - }; - } // namespace NAST diff --git a/yql/essentials/parser/common/issue.h b/yql/essentials/parser/common/issue.h index 5573adb03c3..3a17252cbdb 100644 --- a/yql/essentials/parser/common/issue.h +++ b/yql/essentials/parser/common/issue.h @@ -22,6 +22,12 @@ namespace NSQLTranslation { Issues_.AddIssue(NYql::TPosition(col, line, File_), message); } + void AddIssue(NYql::TIssue&& issue) override { + issue.Position.File = File_; + issue.EndPosition.File = File_; + Issues_.AddIssue(std::forward<NYql::TIssue>(issue)); + } + private: NYql::TIssues& Issues_; const TString File_; diff --git a/yql/essentials/parser/common/ya.make b/yql/essentials/parser/common/ya.make index b0ae371dfe4..290ae7be8b5 100644 --- a/yql/essentials/parser/common/ya.make +++ b/yql/essentials/parser/common/ya.make @@ -2,6 +2,7 @@ LIBRARY() PEERDIR( yql/essentials/public/issue + yql/essentials/core/issue ) SRCS( diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp index a66ba70780f..b88b90b0af3 100644 --- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp +++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp @@ -1,12 +1 @@ #include "proto_ast_antlr4.h" - -antlr4::YqlErrorListener::YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error) - : Errors_(errors), Error_(error) -{ -} - -void antlr4::YqlErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, - size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) { - *Error_ = true; - Errors_->Error(line, charPositionInLine, msg.c_str()); -} diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h index 13cbbc6e73b..12b8b87fea1 100644 --- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h +++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h @@ -35,23 +35,38 @@ namespace NProtoAST { class TProtoASTBuilder4 { public: - TProtoASTBuilder4(TStringBuf data, const TString& queryName = "query", google::protobuf::Arena* arena = nullptr) + TProtoASTBuilder4( + TStringBuf data, + const TString& queryName = "query", + google::protobuf::Arena* arena = nullptr, + bool isAmbiguityError = false, + bool isAmbiguityDebugging = false + ) : QueryName_(queryName) + , IsAmbiguityError_(isAmbiguityError) , InputStream_(data) , Lexer_(&InputStream_) , TokenStream_(&Lexer_) , Parser_(&TokenStream_, arena) { + if (isAmbiguityDebugging) { + Parser_ + .template getInterpreter<antlr4::atn::ParserATNSimulator>() + ->setPredictionMode(antlr4::atn::PredictionMode::LL_EXACT_AMBIG_DETECTION); + } } google::protobuf::Message* BuildAST(IErrorCollector& errors) { // TODO: find a better way to break on lexer errors - typename antlr4::YqlErrorListener listener(&errors, &Parser_.error); + typename antlr4::YqlErrorListener listener(&errors, &Parser_.error, IsAmbiguityError_); Parser_.removeErrorListeners(); Parser_.addErrorListener(&listener); try { auto result = Parser_.Parse(&errors); Parser_.removeErrorListener(&listener); + if (Parser_.error) { + result = nullptr; + } Parser_.error = false; return result; } catch (const TTooManyErrors&) { @@ -68,6 +83,7 @@ namespace NProtoAST { private: TString QueryName_; + bool IsAmbiguityError_; antlr4::ANTLRInputStream InputStream_; TLexer Lexer_; diff --git a/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make b/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make index 783b5945feb..c520e141792 100644 --- a/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make +++ b/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make @@ -47,6 +47,10 @@ ENDIF() SRCS(JsonPathParser.proto) +PEERDIR( + yql/essentials/public/issue/protos +) + EXCLUDE_TAGS(GO_PROTO JAVA_PROTO) END() |
