summaryrefslogtreecommitdiffstats
path: root/yql/essentials/parser
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-10-06 09:00:56 +0300
committervitya-smirnov <[email protected]>2025-10-06 09:18:09 +0300
commit96bddf8336a6ca765ee3141049197b8bff1810b5 (patch)
treeeec486eb50dbb0736245f9f4c12ca387162a3707 /yql/essentials/parser
parentff0062834bf261b420f838c43effdf5d49beafd5 (diff)
YQL-20116: Introduce ANTLR4 ambiguity detection
- Ignored 2 known ambiguities. - Tested tools. commit_hash:9e29bb2f876dabc68293b3e5c26a470d373506ae
Diffstat (limited to 'yql/essentials/parser')
-rw-r--r--yql/essentials/parser/common/antlr4/error_listener.cpp68
-rw-r--r--yql/essentials/parser/common/antlr4/error_listener.h12
-rw-r--r--yql/essentials/parser/common/error.cpp36
-rw-r--r--yql/essentials/parser/common/error.h22
-rw-r--r--yql/essentials/parser/common/issue.h6
-rw-r--r--yql/essentials/parser/common/ya.make1
-rw-r--r--yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp11
-rw-r--r--yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h20
-rw-r--r--yql/essentials/parser/proto_ast/gen/jsonpath/ya.make4
9 files changed, 131 insertions, 49 deletions
diff --git a/yql/essentials/parser/common/antlr4/error_listener.cpp b/yql/essentials/parser/common/antlr4/error_listener.cpp
index c8d72abc949..73c14175314 100644
--- a/yql/essentials/parser/common/antlr4/error_listener.cpp
+++ b/yql/essentials/parser/common/antlr4/error_listener.cpp
@@ -1,10 +1,27 @@
#include "error_listener.h"
+#include <yql/essentials/core/issue/yql_issue.h>
+
+#include <util/generic/vector.h>
+#include <util/string/builder.h>
+#include <util/string/join.h>
+
namespace antlr4 {
- YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error)
+ TVector<size_t> ToVector(const antlrcpp::BitSet& ambigAlts) {
+ TVector<size_t> result;
+ for (size_t i = 0; i < ambigAlts.size(); ++i) {
+ if (ambigAlts.test(i)) {
+ result.push_back(i);
+ }
+ }
+ return result;
+ }
+
+ YqlErrorListener::YqlErrorListener(NAST::IErrorCollector* errors, bool* error, bool isAmbiguityError)
: Errors_(errors)
, Error_(error)
+ , IsAmbiguityError_(isAmbiguityError)
{
}
@@ -16,4 +33,53 @@ namespace antlr4 {
Errors_->Error(line, charPositionInLine, msg.c_str());
}
+ void YqlErrorListener::reportAmbiguity(
+ Parser* recognizer,
+ const dfa::DFA& dfa,
+ size_t startIndex,
+ size_t stopIndex,
+ bool exact,
+ const antlrcpp::BitSet& ambigAlts,
+ atn::ATNConfigSet* configs)
+ {
+ Y_UNUSED(configs);
+
+ size_t ruleIndex = dfa.atnStartState->ruleIndex;
+ std::string_view ruleName = recognizer->getRuleNames()[ruleIndex];
+
+ if (// FIXME(YQL-20410): It is a known ambiguity, remove it when
+ // an expression (x NOT NULL) is a syntax error.
+ ruleName == "xor_subexpr" ||
+ // Known ambiguity, on ANTLR3 syntactic predicates were used.
+ ruleName == "neq_subexpr") {
+ return;
+ }
+
+ TokenStream* tokens = recognizer->getTokenStream();
+ Token* start = tokens->get(startIndex);
+ Token* stop = tokens->get(stopIndex);
+
+ TString alternatives = JoinSeq(", ", ToVector(ambigAlts));
+
+ NYql::TPosition startPos(start->getCharPositionInLine(), start->getLine(), "unknown");
+ NYql::TPosition stopPos(stop->getCharPositionInLine(), stop->getLine(), "unknown");
+
+ TString message = TStringBuilder()
+ << "An" << (exact ? " exactly " : " ")
+ << "ambiguous decision " << dfa.decision
+ << " at rule '" << ruleName << "'"
+ << " with conflicted alternatives {" << alternatives << "}";
+
+ NYql::TIssue issue(std::move(startPos), std::move(stopPos), std::move(message));
+
+ if (IsAmbiguityError_) {
+ *Error_ = true;
+ issue.SetCode(NYql::UNEXPECTED_ERROR, NYql::TSeverityIds::S_FATAL);
+ } else {
+ issue.SetCode(NYql::TIssuesIds::YQL_SYNTAX_AMBIGUITY, NYql::TSeverityIds::S_WARNING);
+ }
+
+ Errors_->Report(std::move(issue));
+ }
+
} // namespace antlr4
diff --git a/yql/essentials/parser/common/antlr4/error_listener.h b/yql/essentials/parser/common/antlr4/error_listener.h
index b78e1563cd7..3d8f6af8a6b 100644
--- a/yql/essentials/parser/common/antlr4/error_listener.h
+++ b/yql/essentials/parser/common/antlr4/error_listener.h
@@ -9,14 +9,24 @@ namespace antlr4 {
class ANTLR4CPP_PUBLIC YqlErrorListener: public BaseErrorListener {
NAST::IErrorCollector* Errors_;
bool* Error_;
+ const bool IsAmbiguityError_;
public:
- YqlErrorListener(NAST::IErrorCollector* errors, bool* error);
+ YqlErrorListener(NAST::IErrorCollector* errors, bool* error, bool isAmbiguityError = false);
virtual void syntaxError(
Recognizer* recognizer, Token* offendingSymbol,
size_t line, size_t charPositionInLine,
const std::string& msg, std::exception_ptr e) override;
+
+ void reportAmbiguity(
+ Parser* recognizer,
+ const dfa::DFA& dfa,
+ size_t startIndex,
+ size_t stopIndex,
+ bool exact,
+ const antlrcpp::BitSet& ambigAlts,
+ atn::ATNConfigSet* configs) override;
};
} // namespace antlr4
diff --git a/yql/essentials/parser/common/error.cpp b/yql/essentials/parser/common/error.cpp
index 9fad2892213..b6b087631c7 100644
--- a/yql/essentials/parser/common/error.cpp
+++ b/yql/essentials/parser/common/error.cpp
@@ -14,35 +14,29 @@ namespace NAST {
}
void IErrorCollector::Error(ui32 line, ui32 col, const TString& message) {
- if (NumErrors_ + 1 == MaxErrors_) {
- AddError(0, 0, "Too many errors");
- ++NumErrors_;
- }
-
- if (NumErrors_ >= MaxErrors_) {
- ythrow TTooManyErrors() << "Too many errors";
- }
-
+ GuardTooManyErrors();
AddError(line, col, message);
++NumErrors_;
}
- TErrorOutput::TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors)
- : IErrorCollector(maxErrors)
- , Err(err)
- , Name(name)
- {
+ void IErrorCollector::Report(NYql::TIssue&& issue) {
+ GuardTooManyErrors();
+ bool isError = issue.GetSeverity() >= NYql::TSeverityIds::S_WARNING;
+ AddIssue(std::forward<NYql::TIssue>(issue));
+ if (isError) {
+ ++NumErrors_;
+ }
}
- TErrorOutput::~TErrorOutput()
- {
- }
+ void IErrorCollector::GuardTooManyErrors() {
+ if (NumErrors_ + 1 == MaxErrors_) {
+ AddError(0, 0, "Too many errors");
+ ++NumErrors_;
+ }
- void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) {
- if (!Name.empty()) {
- Err << "Query " << Name << ": ";
+ if (NumErrors_ >= MaxErrors_) {
+ ythrow TTooManyErrors() << "Too many errors";
}
- Err << "Line " << line << " column " << col << " error: " << message;
}
} // namespace NAST
diff --git a/yql/essentials/parser/common/error.h b/yql/essentials/parser/common/error.h
index 966d996381e..81a920a2809 100644
--- a/yql/essentials/parser/common/error.h
+++ b/yql/essentials/parser/common/error.h
@@ -1,5 +1,7 @@
#pragma once
+#include <yql/essentials/public/issue/yql_issue.h>
+
#include <util/generic/yexception.h>
#include <util/generic/fwd.h>
@@ -18,25 +20,19 @@ namespace NAST {
// throws TTooManyErrors
void Error(ui32 line, ui32 col, const TString& message);
+ // throws TTooManyErrors
+ void Report(NYql::TIssue&& issue);
+
private:
+ void GuardTooManyErrors();
+
virtual void AddError(ui32 line, ui32 col, const TString& message) = 0;
+ virtual void AddIssue(NYql::TIssue&& issue) = 0;
+
protected:
const size_t MaxErrors_;
size_t NumErrors_;
};
- class TErrorOutput: public IErrorCollector {
- public:
- TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors);
- virtual ~TErrorOutput();
-
- private:
- void AddError(ui32 line, ui32 col, const TString& message) override;
-
- public:
- IOutputStream& Err;
- TString Name;
- };
-
} // namespace NAST
diff --git a/yql/essentials/parser/common/issue.h b/yql/essentials/parser/common/issue.h
index 5573adb03c3..3a17252cbdb 100644
--- a/yql/essentials/parser/common/issue.h
+++ b/yql/essentials/parser/common/issue.h
@@ -22,6 +22,12 @@ namespace NSQLTranslation {
Issues_.AddIssue(NYql::TPosition(col, line, File_), message);
}
+ void AddIssue(NYql::TIssue&& issue) override {
+ issue.Position.File = File_;
+ issue.EndPosition.File = File_;
+ Issues_.AddIssue(std::forward<NYql::TIssue>(issue));
+ }
+
private:
NYql::TIssues& Issues_;
const TString File_;
diff --git a/yql/essentials/parser/common/ya.make b/yql/essentials/parser/common/ya.make
index b0ae371dfe4..290ae7be8b5 100644
--- a/yql/essentials/parser/common/ya.make
+++ b/yql/essentials/parser/common/ya.make
@@ -2,6 +2,7 @@ LIBRARY()
PEERDIR(
yql/essentials/public/issue
+ yql/essentials/core/issue
)
SRCS(
diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp
index a66ba70780f..b88b90b0af3 100644
--- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp
+++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.cpp
@@ -1,12 +1 @@
#include "proto_ast_antlr4.h"
-
-antlr4::YqlErrorListener::YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error)
- : Errors_(errors), Error_(error)
-{
-}
-
-void antlr4::YqlErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/,
- size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) {
- *Error_ = true;
- Errors_->Error(line, charPositionInLine, msg.c_str());
-}
diff --git a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h
index 13cbbc6e73b..12b8b87fea1 100644
--- a/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h
+++ b/yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h
@@ -35,23 +35,38 @@ namespace NProtoAST {
class TProtoASTBuilder4 {
public:
- TProtoASTBuilder4(TStringBuf data, const TString& queryName = "query", google::protobuf::Arena* arena = nullptr)
+ TProtoASTBuilder4(
+ TStringBuf data,
+ const TString& queryName = "query",
+ google::protobuf::Arena* arena = nullptr,
+ bool isAmbiguityError = false,
+ bool isAmbiguityDebugging = false
+ )
: QueryName_(queryName)
+ , IsAmbiguityError_(isAmbiguityError)
, InputStream_(data)
, Lexer_(&InputStream_)
, TokenStream_(&Lexer_)
, Parser_(&TokenStream_, arena)
{
+ if (isAmbiguityDebugging) {
+ Parser_
+ .template getInterpreter<antlr4::atn::ParserATNSimulator>()
+ ->setPredictionMode(antlr4::atn::PredictionMode::LL_EXACT_AMBIG_DETECTION);
+ }
}
google::protobuf::Message* BuildAST(IErrorCollector& errors) {
// TODO: find a better way to break on lexer errors
- typename antlr4::YqlErrorListener listener(&errors, &Parser_.error);
+ typename antlr4::YqlErrorListener listener(&errors, &Parser_.error, IsAmbiguityError_);
Parser_.removeErrorListeners();
Parser_.addErrorListener(&listener);
try {
auto result = Parser_.Parse(&errors);
Parser_.removeErrorListener(&listener);
+ if (Parser_.error) {
+ result = nullptr;
+ }
Parser_.error = false;
return result;
} catch (const TTooManyErrors&) {
@@ -68,6 +83,7 @@ namespace NProtoAST {
private:
TString QueryName_;
+ bool IsAmbiguityError_;
antlr4::ANTLRInputStream InputStream_;
TLexer Lexer_;
diff --git a/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make b/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make
index 783b5945feb..c520e141792 100644
--- a/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make
+++ b/yql/essentials/parser/proto_ast/gen/jsonpath/ya.make
@@ -47,6 +47,10 @@ ENDIF()
SRCS(JsonPathParser.proto)
+PEERDIR(
+ yql/essentials/public/issue/protos
+)
+
EXCLUDE_TAGS(GO_PROTO JAVA_PROTO)
END()