diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 01:45:21 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 02:42:50 +0300 |
commit | 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch) | |
tree | 9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src | |
parent | 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff) | |
download | ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src')
306 files changed, 29188 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.cpp new file mode 100644 index 0000000000..6ceadb87f9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorListener.h" + +antlr4::ANTLRErrorListener::~ANTLRErrorListener() +{ +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.h new file mode 100644 index 0000000000..6dc66237e4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorListener.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /// How to emit recognition errors (an interface in Java). + class ANTLR4CPP_PUBLIC ANTLRErrorListener { + public: + virtual ~ANTLRErrorListener(); + + /// <summary> + /// Upon syntax error, notify any interested parties. This is not how to + /// recover from errors or compute error messages. <seealso cref="ANTLRErrorStrategy"/> + /// specifies how to recover from syntax errors and how to compute error + /// messages. This listener's job is simply to emit a computed message, + /// though it has enough information to create its own message in many cases. + /// <p/> + /// The <seealso cref="RecognitionException"/> is non-null for all syntax errors except + /// when we discover mismatched token errors that we can recover from + /// in-line, without returning from the surrounding rule (via the single + /// token insertion and deletion mechanism). + /// </summary> + /// <param name="recognizer"> + /// What parser got the error. From this + /// object, you can access the context as well + /// as the input stream. </param> + /// <param name="offendingSymbol"> + /// The offending token in the input token + /// stream, unless recognizer is a lexer (then it's null). If + /// no viable alternative error, {@code e} has token at which we + /// started production for the decision. </param> + /// <param name="line"> + /// The line number in the input where the error occurred. </param> + /// <param name="charPositionInLine"> + /// The character position within that line where the error occurred. </param> + /// <param name="msg"> + /// The message to emit. </param> + /// <param name="e"> + /// The exception generated by the parser that led to + /// the reporting of an error. It is null in the case where + /// the parser was able to recover in line without exiting the + /// surrounding rule. </param> + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) = 0; + + /** + * This method is called by the parser when a full-context prediction + * results in an ambiguity. + * + * <p>Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.</p> + * + * <p>When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.</p> + * + * <p>When {@code exact} is {@code true}, <em>all</em> of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, <em>at least two</em> of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the <em>minimum</em> potentially viable alternative is truly + * viable.</p> + * + * <p>When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.</p> + * + * <p>This method is not used by lexers.</p> + * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + * <p>If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.</p> + * + * <p>This method is not used by lexers.</p> + * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + * <p>Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.</p> + * + * <p>For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.</p> + * + * <p>{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.</p> + * + * <p>Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.</p> + * + * <p>This method is not used by lexers.</p> + * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.cpp new file mode 100644 index 0000000000..1655a5731d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorStrategy.h" + +antlr4::ANTLRErrorStrategy::~ANTLRErrorStrategy() +{ +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.h new file mode 100644 index 0000000000..a3eecd14c4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRErrorStrategy.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + /// <summary> + /// The interface for defining strategies to deal with syntax errors encountered + /// during a parse by ANTLR-generated parsers. We distinguish between three + /// different kinds of errors: + /// + /// <ul> + /// <li>The parser could not figure out which path to take in the ATN (none of + /// the available alternatives could possibly match)</li> + /// <li>The current input does not match what we were looking for</li> + /// <li>A predicate evaluated to false</li> + /// </ul> + /// + /// Implementations of this interface report syntax errors by calling + /// <seealso cref="Parser#notifyErrorListeners"/>. + /// <p/> + /// TODO: what to do about lexers + /// </summary> + class ANTLR4CPP_PUBLIC ANTLRErrorStrategy { + public: + + /// <summary> + /// Reset the error handler state for the specified {@code recognizer}. </summary> + /// <param name="recognizer"> the parser instance </param> + virtual ~ANTLRErrorStrategy(); + + virtual void reset(Parser *recognizer) = 0; + + /** + * This method is called when an unexpected symbol is encountered during an + * inline match operation, such as {@link Parser#match}. If the error + * strategy successfully recovers from the match failure, this method + * returns the {@link Token} instance which should be treated as the + * successful result of the match. + * + * <p>This method handles the consumption of any tokens - the caller should + * <b>not</b> call {@link Parser#consume} after a successful recovery.</p> + * + * <p>Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.</p> + * + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol + */ + virtual Token* recoverInline(Parser *recognizer) = 0; + + /// <summary> + /// This method is called to recover from exception {@code e}. This method is + /// called after <seealso cref="#reportError"/> by the default exception handler + /// generated for a rule method. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception to recover from </param> + /// <exception cref="RecognitionException"> if the error strategy could not recover from + /// the recognition exception </exception> + virtual void recover(Parser *recognizer, std::exception_ptr e) = 0; + + /// <summary> + /// This method provides the error handler with an opportunity to handle + /// syntactic or semantic errors in the input stream before they result in a + /// <seealso cref="RecognitionException"/>. + /// <p/> + /// The generated code currently contains calls to <seealso cref="#sync"/> after + /// entering the decision state of a closure block ({@code (...)*} or + /// {@code (...)+}). + /// <p/> + /// For an implementation based on Jim Idle's "magic sync" mechanism, see + /// <seealso cref="DefaultErrorStrategy#sync"/>. + /// </summary> + /// <seealso cref= DefaultErrorStrategy#sync + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <exception cref="RecognitionException"> if an error is detected by the error + /// strategy but cannot be automatically recovered at the current state in + /// the parsing process </exception> + virtual void sync(Parser *recognizer) = 0; + + /// <summary> + /// Tests whether or not {@code recognizer} is in the process of recovering + /// from an error. In error recovery mode, <seealso cref="Parser#consume"/> adds + /// symbols to the parse tree by calling + /// {@link Parser#createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + /// {@link Parser#createTerminalNode(ParserRuleContext, Token)}. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <returns> {@code true} if the parser is currently recovering from a parse + /// error, otherwise {@code false} </returns> + virtual bool inErrorRecoveryMode(Parser *recognizer) = 0; + + /// <summary> + /// This method is called by when the parser successfully matches an input + /// symbol. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + virtual void reportMatch(Parser *recognizer) = 0; + + /// <summary> + /// Report any kind of <seealso cref="RecognitionException"/>. This method is called by + /// the default exception handler generated for a rule method. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception to report </param> + virtual void reportError(Parser *recognizer, const RecognitionException &e) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.cpp new file mode 100644 index 0000000000..674817ac0e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRFileStream.h" + +using namespace antlr4; + +void ANTLRFileStream::loadFromFile(const std::string &fileName) { + _fileName = fileName; + if (_fileName.empty()) { + return; + } + + std::ifstream stream(fileName, std::ios::binary); + + ANTLRInputStream::load(stream); +} + +std::string ANTLRFileStream::getSourceName() const { + return _fileName; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.h new file mode 100644 index 0000000000..6c7d619a00 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRFileStream.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRInputStream.h" + +namespace antlr4 { + + /// This is an ANTLRInputStream that is loaded from a file all at once + /// when you construct the object (or call load()). + // TODO: this class needs testing. + class ANTLR4CPP_PUBLIC ANTLRFileStream : public ANTLRInputStream { + public: + ANTLRFileStream() = default; + ANTLRFileStream(const std::string &) = delete; + ANTLRFileStream(const char *data, size_t length) = delete; + ANTLRFileStream(std::istream &stream) = delete; + + // Assumes a file name encoded in UTF-8 and file content in the same encoding (with or w/o BOM). + virtual void loadFromFile(const std::string &fileName); + virtual std::string getSourceName() const override; + + private: + std::string _fileName; // UTF-8 encoded file name. + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp new file mode 100644 index 0000000000..b6470af9b7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp @@ -0,0 +1,180 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <string.h> + +#include "Exceptions.h" +#include "misc/Interval.h" +#include "IntStream.h" + +#include "support/Utf8.h" +#include "support/CPPUtils.h" + +#include "ANTLRInputStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +using misc::Interval; + +ANTLRInputStream::ANTLRInputStream() { + InitializeInstanceFields(); +} + +ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() { + load(input.data(), input.length()); +} + +ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) { + load(data, length); +} + +ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() { + load(stream); +} + +void ANTLRInputStream::load(const std::string &input, bool lenient) { + load(input.data(), input.size(), lenient); +} + +void ANTLRInputStream::load(const char *data, size_t length, bool lenient) { + // Remove the UTF-8 BOM if present. + const char *bom = "\xef\xbb\xbf"; + if (length >= 3 && strncmp(data, bom, 3) == 0) { + data += 3; + length -= 3; + } + if (lenient) { + _data = Utf8::lenientDecode(std::string_view(data, length)); + } else { + auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length)); + if (!maybe_utf32.has_value()) { + throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence"); + } + _data = std::move(maybe_utf32).value(); + } + p = 0; +} + +void ANTLRInputStream::load(std::istream &stream, bool lenient) { + if (!stream.good() || stream.eof()) // No fail, bad or EOF. + return; + + _data.clear(); + + std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>()); + load(s.data(), s.length(), lenient); +} + +void ANTLRInputStream::reset() { + p = 0; +} + +void ANTLRInputStream::consume() { + if (p >= _data.size()) { + assert(LA(1) == IntStream::EOF); + throw IllegalStateException("cannot consume EOF"); + } + + if (p < _data.size()) { + p++; + } +} + +size_t ANTLRInputStream::LA(ssize_t i) { + if (i == 0) { + return 0; // undefined + } + + ssize_t position = static_cast<ssize_t>(p); + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1] + if ((position + i - 1) < 0) { + return IntStream::EOF; // invalid; no char before first char + } + } + + if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) { + return IntStream::EOF; + } + + return _data[static_cast<size_t>((position + i - 1))]; +} + +size_t ANTLRInputStream::LT(ssize_t i) { + return LA(i); +} + +size_t ANTLRInputStream::index() { + return p; +} + +size_t ANTLRInputStream::size() { + return _data.size(); +} + +// Mark/release do nothing. We have entire buffer. +ssize_t ANTLRInputStream::mark() { + return -1; +} + +void ANTLRInputStream::release(ssize_t /* marker */) { +} + +void ANTLRInputStream::seek(size_t index) { + if (index <= p) { + p = index; // just jump; don't update stream state (line, ...) + return; + } + // seek forward, consume until p hits index or n (whichever comes first) + index = std::min(index, _data.size()); + while (p < index) { + consume(); + } +} + +std::string ANTLRInputStream::getText(const Interval &interval) { + if (interval.a < 0 || interval.b < 0) { + return ""; + } + + size_t start = static_cast<size_t>(interval.a); + size_t stop = static_cast<size_t>(interval.b); + + + if (stop >= _data.size()) { + stop = _data.size() - 1; + } + + size_t count = stop - start + 1; + if (start >= _data.size()) { + return ""; + } + + auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count)); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +std::string ANTLRInputStream::getSourceName() const { + if (name.empty()) { + return IntStream::UNKNOWN_SOURCE_NAME; + } + return name; +} + +std::string ANTLRInputStream::toString() const { + auto maybeUtf8 = Utf8::strictEncode(_data); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +void ANTLRInputStream::InitializeInstanceFields() { + p = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h new file mode 100644 index 0000000000..413eadefa4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <string_view> + +#include "CharStream.h" + +namespace antlr4 { + + // Vacuum all input from a stream and then treat it + // like a string. Can also pass in a string or char[] to use. + // Input is expected to be encoded in UTF-8 and converted to UTF-32 internally. + class ANTLR4CPP_PUBLIC ANTLRInputStream : public CharStream { + protected: + /// The data being scanned. + // UTF-32 + std::u32string _data; + + /// 0..n-1 index into string of next char </summary> + size_t p; + + public: + /// What is name or source of this char stream? + std::string name; + + ANTLRInputStream(); + + ANTLRInputStream(std::string_view input); + + ANTLRInputStream(const char *data, size_t length); + ANTLRInputStream(std::istream &stream); + + virtual void load(const std::string &input, bool lenient); + virtual void load(const char *data, size_t length, bool lenient); + virtual void load(std::istream &stream, bool lenient); + + virtual void load(const std::string &input) { load(input, false); } + virtual void load(const char *data, size_t length) { load(data, length, false); } + virtual void load(std::istream &stream) { load(stream, false); } + + /// Reset the stream so that it's in the same state it was + /// when the object was created *except* the data array is not + /// touched. + virtual void reset(); + virtual void consume() override; + virtual size_t LA(ssize_t i) override; + virtual size_t LT(ssize_t i); + + /// <summary> + /// Return the current input symbol index 0..n where n indicates the + /// last symbol has been read. The index is the index of char to + /// be returned from LA(1). + /// </summary> + virtual size_t index() override; + virtual size_t size() override; + + /// <summary> + /// mark/release do nothing; we have entire buffer </summary> + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + + /// <summary> + /// consume() ahead until p==index; can't just set p=index as we must + /// update line and charPositionInLine. If we seek backwards, just set p + /// </summary> + virtual void seek(size_t index) override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getSourceName() const override; + virtual std::string toString() const override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.cpp b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.cpp new file mode 100644 index 0000000000..781a13b547 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "ParserRuleContext.h" +#include "InputMismatchException.h" +#include "Parser.h" + +#include "BailErrorStrategy.h" + +using namespace antlr4; + +void BailErrorStrategy::recover(Parser *recognizer, std::exception_ptr e) { + ParserRuleContext *context = recognizer->getContext(); + do { + context->exception = e; + if (context->parent == nullptr) + break; + context = static_cast<ParserRuleContext *>(context->parent); + } while (true); + + try { + std::rethrow_exception(e); // Throw the exception to be able to catch and rethrow nested. +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (RecognitionException &inner) { + throw ParseCancellationException(inner.what()); +#else + } catch (RecognitionException & /*inner*/) { + std::throw_with_nested(ParseCancellationException()); +#endif + } +} + +Token* BailErrorStrategy::recoverInline(Parser *recognizer) { + InputMismatchException e(recognizer); + std::exception_ptr exception = std::make_exception_ptr(e); + + ParserRuleContext *context = recognizer->getContext(); + do { + context->exception = exception; + if (context->parent == nullptr) + break; + context = static_cast<ParserRuleContext *>(context->parent); + } while (true); + + try { + throw e; +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (InputMismatchException &inner) { + throw ParseCancellationException(inner.what()); +#else + } catch (InputMismatchException & /*inner*/) { + std::throw_with_nested(ParseCancellationException()); +#endif + } +} + +void BailErrorStrategy::sync(Parser * /*recognizer*/) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.h b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.h new file mode 100644 index 0000000000..598f993022 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BailErrorStrategy.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "DefaultErrorStrategy.h" + +namespace antlr4 { + + /** + * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors + * by immediately canceling the parse operation with a + * {@link ParseCancellationException}. The implementation ensures that the + * {@link ParserRuleContext#exception} field is set for all parse tree nodes + * that were not completed prior to encountering the error. + * + * <p> + * This error strategy is useful in the following scenarios.</p> + * + * <ul> + * <li><strong>Two-stage parsing:</strong> This error strategy allows the first + * stage of two-stage parsing to immediately terminate if an error is + * encountered, and immediately fall back to the second stage. In addition to + * avoiding wasted work by attempting to recover from errors here, the empty + * implementation of {@link BailErrorStrategy#sync} improves the performance of + * the first stage.</li> + * <li><strong>Silent validation:</strong> When syntax errors are not being + * reported or logged, and the parse result is simply ignored if errors occur, + * the {@link BailErrorStrategy} avoids wasting work on recovering from errors + * when the result will be ignored either way.</li> + * </ul> + * + * <p> + * {@code myparser.setErrorHandler(new BailErrorStrategy());}</p> + * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ + class ANTLR4CPP_PUBLIC BailErrorStrategy : public DefaultErrorStrategy { + /// <summary> + /// Instead of recovering from exception {@code e}, re-throw it wrapped + /// in a <seealso cref="ParseCancellationException"/> so it is not caught by the + /// rule function catches. Use <seealso cref="Exception#getCause()"/> to get the + /// original <seealso cref="RecognitionException"/>. + /// </summary> + public: + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /// Make sure we don't attempt to recover inline; if the parser + /// successfully recovers, it won't throw an exception. + virtual Token* recoverInline(Parser *recognizer) override; + + /// <summary> + /// Make sure we don't attempt to recover from problems in subrules. </summary> + virtual void sync(Parser *recognizer) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.cpp new file mode 100644 index 0000000000..cdcca8bc5c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "BaseErrorListener.h" +#include "RecognitionException.h" + +using namespace antlr4; + +void BaseErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, size_t /*line*/, + size_t /*charPositionInLine*/, const std::string &/*msg*/, std::exception_ptr /*e*/) { +} + +void BaseErrorListener::reportAmbiguity(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, bool /*exact*/, const antlrcpp::BitSet &/*ambigAlts*/, atn::ATNConfigSet * /*configs*/) { +} + +void BaseErrorListener::reportAttemptingFullContext(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, const antlrcpp::BitSet &/*conflictingAlts*/, atn::ATNConfigSet * /*configs*/) { +} + +void BaseErrorListener::reportContextSensitivity(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, size_t /*prediction*/, atn::ATNConfigSet * /*configs*/) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.h new file mode 100644 index 0000000000..317785aa64 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BaseErrorListener.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /** + * Provides an empty default implementation of {@link ANTLRErrorListener}. The + * default implementation of each method does nothing, but can be overridden as + * necessary. + */ + class ANTLR4CPP_PUBLIC BaseErrorListener : public ANTLRErrorListener { + + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp new file mode 100644 index 0000000000..4eaff2c852 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp @@ -0,0 +1,414 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "WritableToken.h" +#include "Lexer.h" +#include "RuleContext.h" +#include "misc/Interval.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "BufferedTokenStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){ + InitializeInstanceFields(); +} + +TokenSource* BufferedTokenStream::getTokenSource() const { + return _tokenSource; +} + +size_t BufferedTokenStream::index() { + return _p; +} + +ssize_t BufferedTokenStream::mark() { + return 0; +} + +void BufferedTokenStream::release(ssize_t /*marker*/) { + // no resources to release +} + +void BufferedTokenStream::reset() { + seek(0); +} + +void BufferedTokenStream::seek(size_t index) { + lazyInit(); + _p = adjustSeekIndex(index); +} + +size_t BufferedTokenStream::size() { + return _tokens.size(); +} + +void BufferedTokenStream::consume() { + bool skipEofCheck = false; + if (!_needSetup) { + if (_fetchedEOF) { + // the last token in tokens is EOF. skip check if p indexes any + // fetched token except the last. + skipEofCheck = _p < _tokens.size() - 1; + } else { + // no EOF token in tokens. skip check if p indexes a fetched token. + skipEofCheck = _p < _tokens.size(); + } + } else { + // not yet initialized + skipEofCheck = false; + } + + if (!skipEofCheck && LA(1) == Token::EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + if (sync(_p + 1)) { + _p = adjustSeekIndex(_p + 1); + } +} + +bool BufferedTokenStream::sync(size_t i) { + if (i + 1 < _tokens.size()) + return true; + size_t n = i - _tokens.size() + 1; // how many more elements we need? + + if (n > 0) { + size_t fetched = fetch(n); + return fetched >= n; + } + + return true; +} + +size_t BufferedTokenStream::fetch(size_t n) { + if (_fetchedEOF) { + return 0; + } + + size_t i = 0; + while (i < n) { + std::unique_ptr<Token> t(_tokenSource->nextToken()); + + if (is<WritableToken *>(t.get())) { + (static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size()); + } + + _tokens.push_back(std::move(t)); + ++i; + + if (_tokens.back()->getType() == Token::EOF) { + _fetchedEOF = true; + break; + } + } + + return i; +} + +Token* BufferedTokenStream::get(size_t i) const { + if (i >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("token index ") + + std::to_string(i) + + std::string(" out of range 0..") + + std::to_string(_tokens.size() - 1)); + } + return _tokens[i].get(); +} + +std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) { + std::vector<Token *> subset; + + lazyInit(); + + if (_tokens.empty()) { + return subset; + } + + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + subset.push_back(t); + } + return subset; +} + +size_t BufferedTokenStream::LA(ssize_t i) { + return LT(i)->getType(); +} + +Token* BufferedTokenStream::LB(size_t k) { + if (k > _p) { + return nullptr; + } + return _tokens[_p - k].get(); +} + +Token* BufferedTokenStream::LT(ssize_t k) { + lazyInit(); + if (k == 0) { + return nullptr; + } + if (k < 0) { + return LB(-k); + } + + size_t i = _p + k - 1; + sync(i); + if (i >= _tokens.size()) { // return EOF token + // EOF must be last token + return _tokens.back().get(); + } + + return _tokens[i].get(); +} + +ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) { + return i; +} + +void BufferedTokenStream::lazyInit() { + if (_needSetup) { + setup(); + } +} + +void BufferedTokenStream::setup() { + _needSetup = false; + sync(0); + _p = adjustSeekIndex(0); +} + +void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) { + _tokenSource = tokenSource; + _tokens.clear(); + _fetchedEOF = false; + _needSetup = true; +} + +std::vector<Token *> BufferedTokenStream::getTokens() { + std::vector<Token *> result; + for (auto &t : _tokens) + result.push_back(t.get()); + return result; +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) { + return getTokens(start, stop, std::vector<size_t>()); +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) { + lazyInit(); + if (stop >= _tokens.size() || start >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("start ") + + std::to_string(start) + + std::string(" or stop ") + + std::to_string(stop) + + std::string(" not in 0..") + + std::to_string(_tokens.size() - 1)); + } + + std::vector<Token *> filteredTokens; + + if (start > stop) { + return filteredTokens; + } + + for (size_t i = start; i <= stop; i++) { + Token *tok = _tokens[i].get(); + + if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) { + filteredTokens.push_back(tok); + } + } + return filteredTokens; +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) { + std::vector<size_t> s; + s.push_back(ttype); + return getTokens(start, stop, s); +} + +ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + return size() - 1; + } + + Token *token = _tokens[i].get(); + while (token->getChannel() != channel) { + if (token->getType() == Token::EOF) { + return i; + } + i++; + sync(i); + token = _tokens[i].get(); + } + return i; +} + +ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + // the EOF token is on every channel + return size() - 1; + } + + while (true) { + Token *token = _tokens[i].get(); + if (token->getType() == Token::EOF || token->getChannel() == channel) { + return i; + } + + if (i == 0) + return -1; + i--; + } + return i; +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL); + size_t to; + size_t from = tokenIndex + 1; + // if none onchannel to right, nextOnChannel=-1 so set to = last token + if (nextOnChannel == -1) { + to = static_cast<ssize_t>(size() - 1); + } else { + to = nextOnChannel; + } + + return filterForChannel(from, to, channel); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) { + return getHiddenTokensToRight(tokenIndex, -1); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + if (tokenIndex == 0) { + // Obviously no tokens can appear before the first token. + return { }; + } + + ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL); + if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) { + return { }; + } + // if none onchannel to left, prevOnChannel=-1 then from=0 + size_t from = static_cast<size_t>(prevOnChannel + 1); + size_t to = tokenIndex - 1; + + return filterForChannel(from, to, channel); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) { + return getHiddenTokensToLeft(tokenIndex, -1); +} + +std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) { + std::vector<Token *> hidden; + for (size_t i = from; i <= to; i++) { + Token *t = _tokens[i].get(); + if (channel == -1) { + if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) { + hidden.push_back(t); + } + } else { + if (t->getChannel() == static_cast<size_t>(channel)) { + hidden.push_back(t); + } + } + } + + return hidden; +} + +bool BufferedTokenStream::isInitialized() const { + return !_needSetup; +} + +/** + * Get the text of all tokens in this buffer. + */ +std::string BufferedTokenStream::getSourceName() const +{ + return _tokenSource->getSourceName(); +} + +std::string BufferedTokenStream::getText() { + fill(); + return getText(misc::Interval(0U, size() - 1)); +} + +std::string BufferedTokenStream::getText(const misc::Interval &interval) { + lazyInit(); + size_t start = interval.a; + size_t stop = interval.b; + if (start == INVALID_INDEX || stop == INVALID_INDEX) { + return ""; + } + sync(stop); + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + + std::stringstream ss; + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + ss << t->getText(); + } + return ss.str(); +} + +std::string BufferedTokenStream::getText(RuleContext *ctx) { + return getText(ctx->getSourceInterval()); +} + +std::string BufferedTokenStream::getText(Token *start, Token *stop) { + if (start != nullptr && stop != nullptr) { + return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); + } + + return ""; +} + +void BufferedTokenStream::fill() { + lazyInit(); + const size_t blockSize = 1000; + while (true) { + size_t fetched = fetch(blockSize); + if (fetched < blockSize) { + return; + } + } +} + +void BufferedTokenStream::InitializeInstanceFields() { + _needSetup = true; + _fetchedEOF = false; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h new file mode 100644 index 0000000000..2161471241 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h @@ -0,0 +1,200 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + /** + * This implementation of {@link TokenStream} loads tokens from a + * {@link TokenSource} on-demand, and places the tokens in a buffer to provide + * access to any previous token by index. + * + * <p> + * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * {@link CommonTokenStream}.</p> + */ + class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream { + public: + BufferedTokenStream(TokenSource *tokenSource); + BufferedTokenStream(const BufferedTokenStream& other) = delete; + + BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete; + + virtual TokenSource* getTokenSource() const override; + virtual size_t index() override; + virtual ssize_t mark() override; + + virtual void release(ssize_t marker) override; + virtual void reset(); + virtual void seek(size_t index) override; + + virtual size_t size() override; + virtual void consume() override; + + virtual Token* get(size_t i) const override; + + /// Get all tokens from start..stop inclusively. + virtual std::vector<Token *> get(size_t start, size_t stop); + + virtual size_t LA(ssize_t i) override; + virtual Token* LT(ssize_t k) override; + + /// Reset this token stream by setting its token source. + virtual void setTokenSource(TokenSource *tokenSource); + virtual std::vector<Token *> getTokens(); + virtual std::vector<Token *> getTokens(size_t start, size_t stop); + + /// <summary> + /// Given a start and stop index, return a List of all tokens in + /// the token type BitSet. Return null if no tokens were found. This + /// method looks at both on and off channel tokens. + /// </summary> + virtual std::vector<Token *> getTokens(size_t start, size_t stop, const std::vector<size_t> &types); + virtual std::vector<Token *> getTokens(size_t start, size_t stop, size_t ttype); + + /// Collect all tokens on specified channel to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or + /// EOF. If channel is -1, find any non default channel token. + virtual std::vector<Token *> getHiddenTokensToRight(size_t tokenIndex, ssize_t channel); + + /// <summary> + /// Collect all hidden tokens (any off-default channel) to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL + /// or EOF. + /// </summary> + virtual std::vector<Token *> getHiddenTokensToRight(size_t tokenIndex); + + /// <summary> + /// Collect all tokens on specified channel to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// If channel is -1, find any non default channel token. + /// </summary> + virtual std::vector<Token *> getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel); + + /// <summary> + /// Collect all hidden tokens (any off-default channel) to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// </summary> + virtual std::vector<Token *> getHiddenTokensToLeft(size_t tokenIndex); + + virtual std::string getSourceName() const override; + virtual std::string getText() override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + /// Get all tokens from lexer until EOF. + virtual void fill(); + + protected: + /** + * The {@link TokenSource} from which tokens for this stream are fetched. + */ + TokenSource *_tokenSource; + + /** + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link #fetchedEOF} is set + * to {@code true}. + */ + std::vector<std::unique_ptr<Token>> _tokens; + + /** + * The index into {@link #tokens} of the current token (next token to + * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be + * {@link #LT LT(1)}. + * + * <p>This field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.</p> + */ + // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead. + // Use bool isInitialized() to find out if this stream has started reading. + size_t _p; + + /** + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: + * + * <ul> + * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent + * consuming the EOF symbol is optimized by checking the values of + * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li> + * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into + * {@link #tokens} is trivial with this field.</li> + * <ul> + */ + bool _fetchedEOF; + + /// <summary> + /// Make sure index {@code i} in tokens has a token. + /// </summary> + /// <returns> {@code true} if a token is located at index {@code i}, otherwise + /// {@code false}. </returns> + /// <seealso cref= #get(int i) </seealso> + virtual bool sync(size_t i); + + /// <summary> + /// Add {@code n} elements to buffer. + /// </summary> + /// <returns> The actual number of elements added to the buffer. </returns> + virtual size_t fetch(size_t n); + + virtual Token* LB(size_t k); + + /// Allowed derived classes to modify the behavior of operations which change + /// the current stream position by adjusting the target token index of a seek + /// operation. The default implementation simply returns {@code i}. If an + /// exception is thrown in this method, the current stream index should not be + /// changed. + /// <p/> + /// For example, <seealso cref="CommonTokenStream"/> overrides this method to ensure that + /// the seek target is always an on-channel token. + /// + /// <param name="i"> The target token index. </param> + /// <returns> The adjusted target token index. </returns> + virtual ssize_t adjustSeekIndex(size_t i); + void lazyInit(); + virtual void setup(); + + /** + * Given a starting index, return the index of the next token on channel. + * Return {@code i} if {@code tokens[i]} is on channel. Return the index of + * the EOF token if there are no tokens on channel between {@code i} and + * EOF. + */ + virtual ssize_t nextTokenOnChannel(size_t i, size_t channel); + + /** + * Given a starting index, return the index of the previous token on + * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1 + * if there are no tokens on channel between {@code i} and 0. + * + * <p> + * If {@code i} specifies an index at or after the EOF token, the EOF token + * index is returned. This is due to the fact that the EOF token is treated + * as though it were on every channel.</p> + */ + virtual ssize_t previousTokenOnChannel(size_t i, size_t channel); + + virtual std::vector<Token *> filterForChannel(size_t from, size_t to, ssize_t channel); + + bool isInitialized() const; + + private: + bool _needSetup; + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CharStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/CharStream.cpp new file mode 100644 index 0000000000..b05874c8bf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CharStream.cpp @@ -0,0 +1,11 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "CharStream.h" + +using namespace antlr4; + +CharStream::~CharStream() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CharStream.h b/contrib/libs/antlr4_cpp_runtime/src/CharStream.h new file mode 100644 index 0000000000..a9952dbbac --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CharStream.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" +#include "misc/Interval.h" + +namespace antlr4 { + + /// A source of characters for an ANTLR lexer. + class ANTLR4CPP_PUBLIC CharStream : public IntStream { + public: + virtual ~CharStream(); + + /// This method returns the text for a range of characters within this input + /// stream. This method is guaranteed to not throw an exception if the + /// specified interval lies entirely within a marked range. For more + /// information about marked ranges, see IntStream::mark. + /// + /// <param name="interval"> an interval within the stream </param> + /// <returns> the text of the specified interval + /// </returns> + /// <exception cref="NullPointerException"> if {@code interval} is {@code null} </exception> + /// <exception cref="IllegalArgumentException"> if {@code interval.a < 0}, or if + /// {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or + /// past the end of the stream </exception> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// getting the text of the specified interval </exception> + virtual std::string getText(const misc::Interval &interval) = 0; + + virtual std::string toString() const = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.cpp new file mode 100644 index 0000000000..6e9f06a249 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.cpp @@ -0,0 +1,193 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "TokenSource.h" +#include "CharStream.h" +#include "Recognizer.h" +#include "Vocabulary.h" + +#include "misc/Interval.h" + +#include "support/CPPUtils.h" +#include "support/StringUtils.h" + +#include "CommonToken.h" + +using namespace antlr4; +using namespace antlr4::misc; + +using namespace antlrcpp; + +const std::pair<TokenSource*, CharStream*> CommonToken::EMPTY_SOURCE; + +CommonToken::CommonToken(size_t type) { + InitializeInstanceFields(); + _type = type; +} + +CommonToken::CommonToken(std::pair<TokenSource*, CharStream*> source, size_t type, size_t channel, size_t start, size_t stop) { + InitializeInstanceFields(); + _source = source; + _type = type; + _channel = channel; + _start = start; + _stop = stop; + if (_source.first != nullptr) { + _line = static_cast<int>(source.first->getLine()); + _charPositionInLine = source.first->getCharPositionInLine(); + } +} + +CommonToken::CommonToken(size_t type, const std::string &text) { + InitializeInstanceFields(); + _type = type; + _channel = DEFAULT_CHANNEL; + _text = text; + _source = EMPTY_SOURCE; +} + +CommonToken::CommonToken(Token *oldToken) { + InitializeInstanceFields(); + _type = oldToken->getType(); + _line = oldToken->getLine(); + _index = oldToken->getTokenIndex(); + _charPositionInLine = oldToken->getCharPositionInLine(); + _channel = oldToken->getChannel(); + _start = oldToken->getStartIndex(); + _stop = oldToken->getStopIndex(); + + if (is<CommonToken *>(oldToken)) { + _text = (static_cast<CommonToken *>(oldToken))->_text; + _source = (static_cast<CommonToken *>(oldToken))->_source; + } else { + _text = oldToken->getText(); + _source = { oldToken->getTokenSource(), oldToken->getInputStream() }; + } +} + +size_t CommonToken::getType() const { + return _type; +} + +void CommonToken::setLine(size_t line) { + _line = line; +} + +std::string CommonToken::getText() const { + if (!_text.empty()) { + return _text; + } + + CharStream *input = getInputStream(); + if (input == nullptr) { + return ""; + } + size_t n = input->size(); + if (_start < n && _stop < n) { + return input->getText(misc::Interval(_start, _stop)); + } else { + return "<EOF>"; + } +} + +void CommonToken::setText(const std::string &text) { + _text = text; +} + +size_t CommonToken::getLine() const { + return _line; +} + +size_t CommonToken::getCharPositionInLine() const { + return _charPositionInLine; +} + +void CommonToken::setCharPositionInLine(size_t charPositionInLine) { + _charPositionInLine = charPositionInLine; +} + +size_t CommonToken::getChannel() const { + return _channel; +} + +void CommonToken::setChannel(size_t channel) { + _channel = channel; +} + +void CommonToken::setType(size_t type) { + _type = type; +} + +size_t CommonToken::getStartIndex() const { + return _start; +} + +void CommonToken::setStartIndex(size_t start) { + _start = start; +} + +size_t CommonToken::getStopIndex() const { + return _stop; +} + +void CommonToken::setStopIndex(size_t stop) { + _stop = stop; +} + +size_t CommonToken::getTokenIndex() const { + return _index; +} + +void CommonToken::setTokenIndex(size_t index) { + _index = index; +} + +antlr4::TokenSource *CommonToken::getTokenSource() const { + return _source.first; +} + +antlr4::CharStream *CommonToken::getInputStream() const { + return _source.second; +} + +std::string CommonToken::toString() const { + return toString(nullptr); +} + +std::string CommonToken::toString(Recognizer *r) const { + std::stringstream ss; + + std::string channelStr; + if (_channel > 0) { + channelStr = ",channel=" + std::to_string(_channel); + } + std::string txt = getText(); + if (!txt.empty()) { + txt = antlrcpp::escapeWhitespace(txt); + } else { + txt = "<no text>"; + } + + std::string typeString = std::to_string(symbolToNumeric(_type)); + if (r != nullptr) + typeString = r->getVocabulary().getDisplayName(_type); + + ss << "[@" << symbolToNumeric(getTokenIndex()) << "," << symbolToNumeric(_start) << ":" << symbolToNumeric(_stop) + << "='" << txt << "',<" << typeString << ">" << channelStr << "," << _line << ":" + << getCharPositionInLine() << "]"; + + return ss.str(); +} + +void CommonToken::InitializeInstanceFields() { + _type = 0; + _line = 0; + _charPositionInLine = INVALID_INDEX; + _channel = DEFAULT_CHANNEL; + _index = INVALID_INDEX; + _start = 0; + _stop = 0; + _source = EMPTY_SOURCE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonToken.h b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.h new file mode 100644 index 0000000000..3fbc2ae4f5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonToken.h @@ -0,0 +1,158 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "WritableToken.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC CommonToken : public WritableToken { + protected: + /** + * An empty {@link Pair} which is used as the default value of + * {@link #source} for tokens that do not have a source. + */ + static const std::pair<TokenSource *, CharStream *> EMPTY_SOURCE; + + /** + * This is the backing field for {@link #getType} and {@link #setType}. + */ + size_t _type; + + /** + * This is the backing field for {@link #getLine} and {@link #setLine}. + */ + size_t _line; + + /** + * This is the backing field for {@link #getCharPositionInLine} and + * {@link #setCharPositionInLine}. + */ + size_t _charPositionInLine; // set to invalid position + + /** + * This is the backing field for {@link #getChannel} and + * {@link #setChannel}. + */ + size_t _channel; + + /** + * This is the backing field for {@link #getTokenSource} and + * {@link #getInputStream}. + * + * <p> + * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.</p> + */ + + std::pair<TokenSource *, CharStream *> _source; // ml: pure references, usually from statically allocated classes. + + /** + * This is the backing field for {@link #getText} when the token text is + * explicitly set in the constructor or via {@link #setText}. + * + * @see #getText() + */ + std::string _text; + + /** + * This is the backing field for {@link #getTokenIndex} and + * {@link #setTokenIndex}. + */ + size_t _index; + + /** + * This is the backing field for {@link #getStartIndex} and + * {@link #setStartIndex}. + */ + size_t _start; + + /** + * This is the backing field for {@link #getStopIndex} and + * {@link #setStopIndex}. + */ + size_t _stop; + + public: + /** + * Constructs a new {@link CommonToken} with the specified token type. + * + * @param type The token type. + */ + CommonToken(size_t type); + CommonToken(std::pair<TokenSource*, CharStream*> source, size_t type, size_t channel, size_t start, size_t stop); + + /** + * Constructs a new {@link CommonToken} with the specified token type and + * text. + * + * @param type The token type. + * @param text The text of the token. + */ + CommonToken(size_t type, const std::string &text); + + /** + * Constructs a new {@link CommonToken} as a copy of another {@link Token}. + * + * <p> + * If {@code oldToken} is also a {@link CommonToken} instance, the newly + * constructed token will share a reference to the {@link #text} field and + * the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will + * be assigned the result of calling {@link #getText}, and {@link #source} + * will be constructed from the result of {@link Token#getTokenSource} and + * {@link Token#getInputStream}.</p> + * + * @param oldToken The token to copy. + */ + CommonToken(Token *oldToken); + + virtual size_t getType() const override; + + /** + * Explicitly set the text for this token. If {code text} is not + * {@code null}, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or {@code null} if the text + * should be obtained from the input along with the start and stop indexes + * of the token. + */ + virtual void setText(const std::string &text) override; + virtual std::string getText() const override; + + virtual void setLine(size_t line) override; + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() const override; + virtual void setCharPositionInLine(size_t charPositionInLine) override; + + virtual size_t getChannel() const override; + virtual void setChannel(size_t channel) override; + + virtual void setType(size_t type) override; + + virtual size_t getStartIndex() const override; + virtual void setStartIndex(size_t start); + + virtual size_t getStopIndex() const override; + virtual void setStopIndex(size_t stop); + + virtual size_t getTokenIndex() const override; + virtual void setTokenIndex(size_t index) override; + + virtual TokenSource *getTokenSource() const override; + virtual CharStream *getInputStream() const override; + + virtual std::string toString() const override; + + virtual std::string toString(Recognizer *r) const; + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.cpp b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.cpp new file mode 100644 index 0000000000..23d8f7003a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "CommonToken.h" +#include "CharStream.h" + +#include "CommonTokenFactory.h" + +using namespace antlr4; + +const std::unique_ptr<TokenFactory<CommonToken>> CommonTokenFactory::DEFAULT(new CommonTokenFactory); + +CommonTokenFactory::CommonTokenFactory(bool copyText_) : copyText(copyText_) { +} + +CommonTokenFactory::CommonTokenFactory() : CommonTokenFactory(false) { +} + +std::unique_ptr<CommonToken> CommonTokenFactory::create(std::pair<TokenSource*, CharStream*> source, size_t type, + const std::string &text, size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) { + + std::unique_ptr<CommonToken> t(new CommonToken(source, type, channel, start, stop)); + t->setLine(line); + t->setCharPositionInLine(charPositionInLine); + if (text != "") { + t->setText(text); + } else if (copyText && source.second != nullptr) { + t->setText(source.second->getText(misc::Interval(start, stop))); + } + + return t; +} + +std::unique_ptr<CommonToken> CommonTokenFactory::create(size_t type, const std::string &text) { + return std::unique_ptr<CommonToken>(new CommonToken(type, text)); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.h b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.h new file mode 100644 index 0000000000..0ae1a0353c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenFactory.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ + class ANTLR4CPP_PUBLIC CommonTokenFactory : public TokenFactory<CommonToken> { + public: + /** + * The default {@link CommonTokenFactory} instance. + * + * <p> + * This token factory does not explicitly copy token text when constructing + * tokens.</p> + */ + static const std::unique_ptr<TokenFactory<CommonToken>> DEFAULT; + + protected: + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + * <p> + * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.</p> + */ + const bool copyText; + + public: + /** + * Constructs a {@link CommonTokenFactory} with the specified value for + * {@link #copyText}. + * + * <p> + * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.</p> + * + * @param copyText The value for {@link #copyText}. + */ + CommonTokenFactory(bool copyText); + + /** + * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to + * {@code false}. + * + * <p> + * The {@link #DEFAULT} instance should be used instead of calling this + * directly.</p> + */ + CommonTokenFactory(); + + virtual std::unique_ptr<CommonToken> create(std::pair<TokenSource*, CharStream*> source, size_t type, + const std::string &text, size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) override; + + virtual std::unique_ptr<CommonToken> create(size_t type, const std::string &text) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.cpp new file mode 100644 index 0000000000..02a2e55af3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" + +#include "CommonTokenStream.h" + +using namespace antlr4; + +CommonTokenStream::CommonTokenStream(TokenSource *tokenSource) : CommonTokenStream(tokenSource, Token::DEFAULT_CHANNEL) { +} + +CommonTokenStream::CommonTokenStream(TokenSource *tokenSource, size_t channel_) +: BufferedTokenStream(tokenSource), channel(channel_) { +} + +ssize_t CommonTokenStream::adjustSeekIndex(size_t i) { + return nextTokenOnChannel(i, channel); +} + +Token* CommonTokenStream::LB(size_t k) { + if (k == 0 || k > _p) { + return nullptr; + } + + ssize_t i = static_cast<ssize_t>(_p); + size_t n = 1; + // find k good tokens looking backwards + while (n <= k) { + // skip off-channel tokens + i = previousTokenOnChannel(i - 1, channel); + n++; + } + if (i < 0) { + return nullptr; + } + + return _tokens[i].get(); +} + +Token* CommonTokenStream::LT(ssize_t k) { + lazyInit(); + if (k == 0) { + return nullptr; + } + if (k < 0) { + return LB(static_cast<size_t>(-k)); + } + size_t i = _p; + ssize_t n = 1; // we know tokens[p] is a good one + // find k good tokens + while (n < k) { + // skip off-channel tokens, but make sure to not look past EOF + if (sync(i + 1)) { + i = nextTokenOnChannel(i + 1, channel); + } + n++; + } + + return _tokens[i].get(); +} + +int CommonTokenStream::getNumberOfOnChannelTokens() { + int n = 0; + fill(); + for (size_t i = 0; i < _tokens.size(); i++) { + Token *t = _tokens[i].get(); + if (t->getChannel() == channel) { + n++; + } + if (t->getType() == Token::EOF) { + break; + } + } + return n; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.h new file mode 100644 index 0000000000..fde72c7386 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/CommonTokenStream.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BufferedTokenStream.h" + +namespace antlr4 { + + /** + * This class extends {@link BufferedTokenStream} with functionality to filter + * token streams to tokens on a particular channel (tokens where + * {@link Token#getChannel} returns a particular value). + * + * <p> + * This token stream provides access to all tokens by index or when calling + * methods like {@link #getText}. The channel filtering is only used for code + * accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and + * {@link #LB}.</p> + * + * <p> + * By default, tokens are placed on the default channel + * ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the + * {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to + * call {@link Lexer#setChannel}. + * </p> + * + * <p> + * Note: lexer rules which use the {@code ->skip} lexer command or call + * {@link Lexer#skip} do not produce tokens at all, so input text matched by + * such a rule will not be available as part of the token stream, regardless of + * channel.</p> + */ + class ANTLR4CPP_PUBLIC CommonTokenStream : public BufferedTokenStream { + public: + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and the default token channel ({@link Token#DEFAULT_CHANNEL}). + * + * @param tokenSource The token source. + */ + CommonTokenStream(TokenSource *tokenSource); + + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and filtering tokens to the specified channel. Only tokens whose + * {@link Token#getChannel} matches {@code channel} or have the + * {@link Token#getType} equal to {@link Token#EOF} will be returned by the + * token stream lookahead methods. + * + * @param tokenSource The token source. + * @param channel The channel to use for filtering tokens. + */ + CommonTokenStream(TokenSource *tokenSource, size_t channel); + + virtual Token* LT(ssize_t k) override; + + /// Count EOF just once. + virtual int getNumberOfOnChannelTokens(); + + protected: + /** + * Specifies the channel to use for filtering tokens. + * + * <p> + * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.</p> + */ + size_t channel; + + virtual ssize_t adjustSeekIndex(size_t i) override; + + virtual Token* LB(size_t k) override; + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.cpp new file mode 100644 index 0000000000..c16f949cd2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ConsoleErrorListener.h" + +using namespace antlr4; + +ConsoleErrorListener ConsoleErrorListener::INSTANCE; + +void ConsoleErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, + size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) { + std::cerr << "line " << line << ":" << charPositionInLine << " " << msg << std::endl; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.h new file mode 100644 index 0000000000..f1d1188667 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ConsoleErrorListener.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC ConsoleErrorListener : public BaseErrorListener { + public: + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + static ConsoleErrorListener INSTANCE; + + /** + * {@inheritDoc} + * + * <p> + * This implementation prints messages to {@link System#err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.</p> + * + * <pre> + * line <em>line</em>:<em>charPositionInLine</em> <em>msg</em> + * </pre> + */ + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.cpp b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.cpp new file mode 100644 index 0000000000..e5a7327859 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.cpp @@ -0,0 +1,336 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "NoViableAltException.h" +#include "misc/IntervalSet.h" +#include "atn/ParserATNSimulator.h" +#include "InputMismatchException.h" +#include "FailedPredicateException.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "support/StringUtils.h" +#include "support/Casts.h" +#include "Parser.h" +#include "CommonToken.h" +#include "Vocabulary.h" + +#include "DefaultErrorStrategy.h" + +using namespace antlr4; +using namespace antlr4::atn; + +using namespace antlrcpp; + +DefaultErrorStrategy::DefaultErrorStrategy() { + InitializeInstanceFields(); +} + +DefaultErrorStrategy::~DefaultErrorStrategy() { +} + +void DefaultErrorStrategy::reset(Parser *recognizer) { + _errorSymbols.clear(); + endErrorCondition(recognizer); +} + +void DefaultErrorStrategy::beginErrorCondition(Parser * /*recognizer*/) { + errorRecoveryMode = true; +} + +bool DefaultErrorStrategy::inErrorRecoveryMode(Parser * /*recognizer*/) { + return errorRecoveryMode; +} + +void DefaultErrorStrategy::endErrorCondition(Parser * /*recognizer*/) { + errorRecoveryMode = false; + lastErrorIndex = -1; +} + +void DefaultErrorStrategy::reportMatch(Parser *recognizer) { + endErrorCondition(recognizer); +} + +void DefaultErrorStrategy::reportError(Parser *recognizer, const RecognitionException &e) { + // If we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if (inErrorRecoveryMode(recognizer)) { + return; // don't report spurious errors + } + + beginErrorCondition(recognizer); + if (is<const NoViableAltException *>(&e)) { + reportNoViableAlternative(recognizer, static_cast<const NoViableAltException &>(e)); + } else if (is<const InputMismatchException *>(&e)) { + reportInputMismatch(recognizer, static_cast<const InputMismatchException &>(e)); + } else if (is<const FailedPredicateException *>(&e)) { + reportFailedPredicate(recognizer, static_cast<const FailedPredicateException &>(e)); + } else if (is<const RecognitionException *>(&e)) { + recognizer->notifyErrorListeners(e.getOffendingToken(), e.what(), std::current_exception()); + } +} + +void DefaultErrorStrategy::recover(Parser *recognizer, std::exception_ptr /*e*/) { + if (lastErrorIndex == static_cast<int>(recognizer->getInputStream()->index()) && + lastErrorStates.contains(recognizer->getState())) { + + // uh oh, another error at same token index and previously-visited + // state in ATN; must be a case where LT(1) is in the recovery + // token set so nothing got consumed. Consume a single token + // at least to prevent an infinite loop; this is a failsafe. + recognizer->consume(); + } + lastErrorIndex = static_cast<int>(recognizer->getInputStream()->index()); + lastErrorStates.add(recognizer->getState()); + misc::IntervalSet followSet = getErrorRecoverySet(recognizer); + consumeUntil(recognizer, followSet); +} + +void DefaultErrorStrategy::sync(Parser *recognizer) { + atn::ATNState *s = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()]; + + // If already recovering, don't try to sync + if (inErrorRecoveryMode(recognizer)) { + return; + } + + TokenStream *tokens = recognizer->getTokenStream(); + size_t la = tokens->LA(1); + + // try cheaper subset first; might get lucky. seems to shave a wee bit off + auto nextTokens = recognizer->getATN().nextTokens(s); + if (nextTokens.contains(Token::EPSILON) || nextTokens.contains(la)) { + return; + } + + switch (s->getStateType()) { + case atn::ATNStateType::BLOCK_START: + case atn::ATNStateType::STAR_BLOCK_START: + case atn::ATNStateType::PLUS_BLOCK_START: + case atn::ATNStateType::STAR_LOOP_ENTRY: + // report error and recover if possible + if (singleTokenDeletion(recognizer) != nullptr) { + return; + } + + throw InputMismatchException(recognizer); + + case atn::ATNStateType::PLUS_LOOP_BACK: + case atn::ATNStateType::STAR_LOOP_BACK: { + reportUnwantedToken(recognizer); + misc::IntervalSet expecting = recognizer->getExpectedTokens(); + misc::IntervalSet whatFollowsLoopIterationOrRule = expecting.Or(getErrorRecoverySet(recognizer)); + consumeUntil(recognizer, whatFollowsLoopIterationOrRule); + } + break; + + default: + // do nothing if we can't identify the exact kind of ATN state + break; + } +} + +void DefaultErrorStrategy::reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e) { + TokenStream *tokens = recognizer->getTokenStream(); + std::string input; + if (tokens != nullptr) { + if (e.getStartToken()->getType() == Token::EOF) { + input = "<EOF>"; + } else { + input = tokens->getText(e.getStartToken(), e.getOffendingToken()); + } + } else { + input = "<unknown input>"; + } + std::string msg = "no viable alternative at input " + escapeWSAndQuote(input); + recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e)); +} + +void DefaultErrorStrategy::reportInputMismatch(Parser *recognizer, const InputMismatchException &e) { + std::string msg = "mismatched input " + getTokenErrorDisplay(e.getOffendingToken()) + + " expecting " + e.getExpectedTokens().toString(recognizer->getVocabulary()); + recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e)); +} + +void DefaultErrorStrategy::reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e) { + const std::string& ruleName = recognizer->getRuleNames()[recognizer->getContext()->getRuleIndex()]; + std::string msg = "rule " + ruleName + " " + e.what(); + recognizer->notifyErrorListeners(e.getOffendingToken(), msg, std::make_exception_ptr(e)); +} + +void DefaultErrorStrategy::reportUnwantedToken(Parser *recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + Token *t = recognizer->getCurrentToken(); + std::string tokenName = getTokenErrorDisplay(t); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + + std::string msg = "extraneous input " + tokenName + " expecting " + expecting.toString(recognizer->getVocabulary()); + recognizer->notifyErrorListeners(t, msg, nullptr); +} + +void DefaultErrorStrategy::reportMissingToken(Parser *recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + Token *t = recognizer->getCurrentToken(); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + std::string expectedText = expecting.toString(recognizer->getVocabulary()); + std::string msg = "missing " + expectedText + " at " + getTokenErrorDisplay(t); + + recognizer->notifyErrorListeners(t, msg, nullptr); +} + +Token* DefaultErrorStrategy::recoverInline(Parser *recognizer) { + // Single token deletion. + Token *matchedSymbol = singleTokenDeletion(recognizer); + if (matchedSymbol) { + // We have deleted the extra token. + // Now, move past ttype token as if all were ok. + recognizer->consume(); + return matchedSymbol; + } + + // Single token insertion. + if (singleTokenInsertion(recognizer)) { + return getMissingSymbol(recognizer); + } + + // Even that didn't work; must throw the exception. + throw InputMismatchException(recognizer); +} + +bool DefaultErrorStrategy::singleTokenInsertion(Parser *recognizer) { + ssize_t currentSymbolType = recognizer->getInputStream()->LA(1); + + // if current token is consistent with what could come after current + // ATN state, then we know we're missing a token; error recovery + // is free to conjure up and insert the missing token + atn::ATNState *currentState = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()]; + atn::ATNState *next = currentState->transitions[0]->target; + const atn::ATN &atn = recognizer->getInterpreter<atn::ATNSimulator>()->atn; + misc::IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer->getContext()); + if (expectingAtLL2.contains(currentSymbolType)) { + reportMissingToken(recognizer); + return true; + } + return false; +} + +Token* DefaultErrorStrategy::singleTokenDeletion(Parser *recognizer) { + size_t nextTokenType = recognizer->getInputStream()->LA(2); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + if (expecting.contains(nextTokenType)) { + reportUnwantedToken(recognizer); + recognizer->consume(); // simply delete extra token + // we want to return the token we're actually matching + Token *matchedSymbol = recognizer->getCurrentToken(); + reportMatch(recognizer); // we know current token is correct + return matchedSymbol; + } + return nullptr; +} + +Token* DefaultErrorStrategy::getMissingSymbol(Parser *recognizer) { + Token *currentSymbol = recognizer->getCurrentToken(); + misc::IntervalSet expecting = getExpectedTokens(recognizer); + size_t expectedTokenType = expecting.getMinElement(); // get any element + std::string tokenText; + if (expectedTokenType == Token::EOF) { + tokenText = "<missing EOF>"; + } else { + tokenText = "<missing " + recognizer->getVocabulary().getDisplayName(expectedTokenType) + ">"; + } + Token *current = currentSymbol; + Token *lookback = recognizer->getTokenStream()->LT(-1); + if (current->getType() == Token::EOF && lookback != nullptr) { + current = lookback; + } + + _errorSymbols.push_back(recognizer->getTokenFactory()->create( + { current->getTokenSource(), current->getTokenSource()->getInputStream() }, + expectedTokenType, tokenText, Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, + current->getLine(), current->getCharPositionInLine())); + + return _errorSymbols.back().get(); +} + +misc::IntervalSet DefaultErrorStrategy::getExpectedTokens(Parser *recognizer) { + return recognizer->getExpectedTokens(); +} + +std::string DefaultErrorStrategy::getTokenErrorDisplay(Token *t) { + if (t == nullptr) { + return "<no Token>"; + } + std::string s = getSymbolText(t); + if (s == "") { + if (getSymbolType(t) == Token::EOF) { + s = "<EOF>"; + } else { + s = "<" + std::to_string(getSymbolType(t)) + ">"; + } + } + return escapeWSAndQuote(s); +} + +std::string DefaultErrorStrategy::getSymbolText(Token *symbol) { + return symbol->getText(); +} + +size_t DefaultErrorStrategy::getSymbolType(Token *symbol) { + return symbol->getType(); +} + +std::string DefaultErrorStrategy::escapeWSAndQuote(const std::string &s) const { + std::string result; + result.reserve(s.size() + 2); + result.push_back('\''); + antlrcpp::escapeWhitespace(result, s); + result.push_back('\''); + result.shrink_to_fit(); + return result; +} + +misc::IntervalSet DefaultErrorStrategy::getErrorRecoverySet(Parser *recognizer) { + const atn::ATN &atn = recognizer->getInterpreter<atn::ATNSimulator>()->atn; + RuleContext *ctx = recognizer->getContext(); + misc::IntervalSet recoverSet; + while (ctx->invokingState != ATNState::INVALID_STATE_NUMBER) { + // compute what follows who invoked us + atn::ATNState *invokingState = atn.states[ctx->invokingState]; + const atn::RuleTransition *rt = downCast<const atn::RuleTransition*>(invokingState->transitions[0].get()); + misc::IntervalSet follow = atn.nextTokens(rt->followState); + recoverSet.addAll(follow); + + if (ctx->parent == nullptr) + break; + ctx = static_cast<RuleContext *>(ctx->parent); + } + recoverSet.remove(Token::EPSILON); + + return recoverSet; +} + +void DefaultErrorStrategy::consumeUntil(Parser *recognizer, const misc::IntervalSet &set) { + size_t ttype = recognizer->getInputStream()->LA(1); + while (ttype != Token::EOF && !set.contains(ttype)) { + recognizer->consume(); + ttype = recognizer->getInputStream()->LA(1); + } +} + +void DefaultErrorStrategy::InitializeInstanceFields() { + errorRecoveryMode = false; + lastErrorIndex = -1; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.h b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.h new file mode 100644 index 0000000000..7b914468cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DefaultErrorStrategy.h @@ -0,0 +1,466 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorStrategy.h" +#include "misc/IntervalSet.h" + +namespace antlr4 { + + /** + * This is the default implementation of {@link ANTLRErrorStrategy} used for + * error reporting and recovery in ANTLR parsers. + */ + class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy { + public: + DefaultErrorStrategy(); + DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete; + virtual ~DefaultErrorStrategy(); + + DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete; + + protected: + /** + * Indicates whether the error strategy is currently "recovering from an + * error". This is used to suppress reporting multiple error messages while + * attempting to recover from a detected syntax error. + * + * @see #inErrorRecoveryMode + */ + bool errorRecoveryMode; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + int lastErrorIndex; + + misc::IntervalSet lastErrorStates; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The default implementation simply calls <seealso cref="#endErrorCondition"/> to + /// ensure that the handler is not in error recovery mode. + /// </summary> + public: + virtual void reset(Parser *recognizer) override; + + /// <summary> + /// This method is called to enter error recovery mode when a recognition + /// exception is reported. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + protected: + virtual void beginErrorCondition(Parser *recognizer); + + /// <summary> + /// {@inheritDoc} + /// </summary> + public: + virtual bool inErrorRecoveryMode(Parser *recognizer) override; + + /// <summary> + /// This method is called to leave error recovery mode after recovering from + /// a recognition exception. + /// </summary> + /// <param name="recognizer"> </param> + protected: + virtual void endErrorCondition(Parser *recognizer); + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The default implementation simply calls <seealso cref="#endErrorCondition"/>. + /// </summary> + public: + virtual void reportMatch(Parser *recognizer) override; + + /// {@inheritDoc} + /// <p/> + /// The default implementation returns immediately if the handler is already + /// in error recovery mode. Otherwise, it calls <seealso cref="#beginErrorCondition"/> + /// and dispatches the reporting task based on the runtime type of {@code e} + /// according to the following table. + /// + /// <ul> + /// <li><seealso cref="NoViableAltException"/>: Dispatches the call to + /// <seealso cref="#reportNoViableAlternative"/></li> + /// <li><seealso cref="InputMismatchException"/>: Dispatches the call to + /// <seealso cref="#reportInputMismatch"/></li> + /// <li><seealso cref="FailedPredicateException"/>: Dispatches the call to + /// <seealso cref="#reportFailedPredicate"/></li> + /// <li>All other types: calls <seealso cref="Parser#notifyErrorListeners"/> to report + /// the exception</li> + /// </ul> + virtual void reportError(Parser *recognizer, const RecognitionException &e) override; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The default implementation resynchronizes the parser by consuming tokens + /// until we find one in the resynchronization set--loosely the set of tokens + /// that can follow the current rule. + /// </summary> + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /** + * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + * that the current lookahead symbol is consistent with what were expecting + * at this point in the ATN. You can call this anytime but ANTLR only + * generates code to check before subrules/loops and each iteration. + * + * <p>Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g.,</p> + * + * <pre> + * a : sync ( stuff sync )* ; + * sync : {consume to what can follow sync} ; + * </pre> + * + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + * <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule.</p> + * + * <p>During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible.</p> + * + * <p><strong>ORIGINS</strong></p> + * + * <p>Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule</p> + * + * <pre> + * classDef : 'class' ID '{' member* '}' + * </pre> + * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + * <p>This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }.</p> + */ + virtual void sync(Parser *recognizer) override; + + /// <summary> + /// This is called by <seealso cref="#reportError"/> when the exception is a + /// <seealso cref="NoViableAltException"/>. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception </param> + protected: + virtual void reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e); + + /// <summary> + /// This is called by <seealso cref="#reportError"/> when the exception is an + /// <seealso cref="InputMismatchException"/>. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception </param> + virtual void reportInputMismatch(Parser *recognizer, const InputMismatchException &e); + + /// <summary> + /// This is called by <seealso cref="#reportError"/> when the exception is a + /// <seealso cref="FailedPredicateException"/>. + /// </summary> + /// <seealso cref= #reportError + /// </seealso> + /// <param name="recognizer"> the parser instance </param> + /// <param name="e"> the recognition exception </param> + virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e); + + /** + * This method is called to report a syntax error which requires the removal + * of a token from the input stream. At the time this method is called, the + * erroneous symbol is current {@code LT(1)} symbol and has not yet been + * removed from the input stream. When this method returns, + * {@code recognizer} is in error recovery mode. + * + * <p>This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error.</p> + * + * <p>The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.</p> + * + * @param recognizer the parser instance + */ + virtual void reportUnwantedToken(Parser *recognizer); + + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + * + * <p>This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error.</p> + * + * <p>The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.</p> + * + * @param recognizer the parser instance + */ + virtual void reportMissingToken(Parser *recognizer); + + public: + /** + * {@inheritDoc} + * + * <p>The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}.</p> + * + * <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> + * + * <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation.</p> + * + * <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> + * + * <p><strong>MISSING TOKEN</strong> (single token insertion)</p> + * + * <p>If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation.</p> + * + * <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> + * + * <p><strong>EXAMPLE</strong></p> + * + * <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain:</p> + * + * <pre> + * stat → expr → atom + * </pre> + * + * and it will be trying to match the {@code ')'} at this point in the + * derivation: + * + * <pre> + * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + * </pre> + * + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. + */ + virtual Token* recoverInline(Parser *recognizer) override; + + /// <summary> + /// This method implements the single-token insertion inline error recovery + /// strategy. It is called by <seealso cref="#recoverInline"/> if the single-token + /// deletion strategy fails to recover from the mismatched input. If this + /// method returns {@code true}, {@code recognizer} will be in error recovery + /// mode. + /// <p/> + /// This method determines whether or not single-token insertion is viable by + /// checking if the {@code LA(1)} input symbol could be successfully matched + /// if it were instead the {@code LA(2)} symbol. If this method returns + /// {@code true}, the caller is responsible for creating and inserting a + /// token with the correct type to produce this behavior. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <returns> {@code true} if single-token insertion is a viable recovery + /// strategy for the current mismatched input, otherwise {@code false} </returns> + protected: + virtual bool singleTokenInsertion(Parser *recognizer); + + /// <summary> + /// This method implements the single-token deletion inline error recovery + /// strategy. It is called by <seealso cref="#recoverInline"/> to attempt to recover + /// from mismatched input. If this method returns null, the parser and error + /// handler state will not have changed. If this method returns non-null, + /// {@code recognizer} will <em>not</em> be in error recovery mode since the + /// returned token was a successful match. + /// <p/> + /// If the single-token deletion is successful, this method calls + /// <seealso cref="#reportUnwantedToken"/> to report the error, followed by + /// <seealso cref="Parser#consume"/> to actually "delete" the extraneous token. Then, + /// before returning <seealso cref="#reportMatch"/> is called to signal a successful + /// match. + /// </summary> + /// <param name="recognizer"> the parser instance </param> + /// <returns> the successfully matched <seealso cref="Token"/> instance if single-token + /// deletion successfully recovers from the mismatched input, otherwise + /// {@code null} </returns> + virtual Token* singleTokenDeletion(Parser *recognizer); + + /// <summary> + /// Conjure up a missing token during error recovery. + /// + /// The recognizer attempts to recover from single missing + /// symbols. But, actions might refer to that missing symbol. + /// For example, x=ID {f($x);}. The action clearly assumes + /// that there has been an identifier matched previously and that + /// $x points at that token. If that token is missing, but + /// the next token in the stream is what we want we assume that + /// this token is missing and we keep going. Because we + /// have to return some token to replace the missing token, + /// we have to conjure one up. This method gives the user control + /// over the tokens returned for missing tokens. Mostly, + /// you will want to create something special for identifier + /// tokens. For literals such as '{' and ',', the default + /// action in the parser or tree parser works. It simply creates + /// a CommonToken of the appropriate type. The text will be the token. + /// If you change what tokens must be created by the lexer, + /// override this method to create the appropriate tokens. + /// </summary> + virtual Token* getMissingSymbol(Parser *recognizer); + + virtual misc::IntervalSet getExpectedTokens(Parser *recognizer); + + /// <summary> + /// How should a token be displayed in an error message? The default + /// is to display just the text, but during development you might + /// want to have a lot of information spit out. Override in that case + /// to use t.toString() (which, for CommonToken, dumps everything about + /// the token). This is better than forcing you to override a method in + /// your token objects because you don't have to go modify your lexer + /// so that it creates a new class. + /// </summary> + virtual std::string getTokenErrorDisplay(Token *t); + + virtual std::string getSymbolText(Token *symbol); + + virtual size_t getSymbolType(Token *symbol); + + virtual std::string escapeWSAndQuote(const std::string &s) const; + + /* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 <EOF> a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + virtual misc::IntervalSet getErrorRecoverySet(Parser *recognizer); + + /// <summary> + /// Consume tokens until one matches the given token set. </summary> + virtual void consumeUntil(Parser *recognizer, const misc::IntervalSet &set); + + private: + std::vector<std::unique_ptr<Token>> _errorSymbols; // Temporarily created token. + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.cpp new file mode 100644 index 0000000000..ef6f64372d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.cpp @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "atn/ATNConfigSet.h" +#include "Parser.h" +#include "misc/Interval.h" +#include "dfa/DFA.h" + +#include "DiagnosticErrorListener.h" + +using namespace antlr4; + +DiagnosticErrorListener::DiagnosticErrorListener() : DiagnosticErrorListener(true) { +} + +DiagnosticErrorListener::DiagnosticErrorListener(bool exactOnly_) : exactOnly(exactOnly_) { +} + +void DiagnosticErrorListener::reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) { + if (exactOnly && !exact) { + return; + } + + std::string decision = getDecisionDescription(recognizer, dfa); + antlrcpp::BitSet conflictingAlts = getConflictingAlts(ambigAlts, configs); + std::string text = recognizer->getTokenStream()->getText(misc::Interval(startIndex, stopIndex)); + std::string message = "reportAmbiguity d=" + decision + ": ambigAlts=" + conflictingAlts.toString() + + ", input='" + text + "'"; + + recognizer->notifyErrorListeners(message); +} + +void DiagnosticErrorListener::reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, + size_t stopIndex, const antlrcpp::BitSet &/*conflictingAlts*/, atn::ATNConfigSet * /*configs*/) { + std::string decision = getDecisionDescription(recognizer, dfa); + std::string text = recognizer->getTokenStream()->getText(misc::Interval(startIndex, stopIndex)); + std::string message = "reportAttemptingFullContext d=" + decision + ", input='" + text + "'"; + recognizer->notifyErrorListeners(message); +} + +void DiagnosticErrorListener::reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, + size_t stopIndex, size_t /*prediction*/, atn::ATNConfigSet * /*configs*/) { + std::string decision = getDecisionDescription(recognizer, dfa); + std::string text = recognizer->getTokenStream()->getText(misc::Interval(startIndex, stopIndex)); + std::string message = "reportContextSensitivity d=" + decision + ", input='" + text + "'"; + recognizer->notifyErrorListeners(message); +} + +std::string DiagnosticErrorListener::getDecisionDescription(Parser *recognizer, const dfa::DFA &dfa) { + size_t decision = dfa.decision; + size_t ruleIndex = (reinterpret_cast<atn::ATNState*>(dfa.atnStartState))->ruleIndex; + + const std::vector<std::string>& ruleNames = recognizer->getRuleNames(); + if (ruleIndex == INVALID_INDEX || ruleIndex >= ruleNames.size()) { + return std::to_string(decision); + } + + std::string ruleName = ruleNames[ruleIndex]; + if (ruleName == "" || ruleName.empty()) { + return std::to_string(decision); + } + + return std::to_string(decision) + " (" + ruleName + ")"; +} + +antlrcpp::BitSet DiagnosticErrorListener::getConflictingAlts(const antlrcpp::BitSet &reportedAlts, + atn::ATNConfigSet *configs) { + if (reportedAlts.count() > 0) { // Not exactly like the original Java code, but this listener is only used + // in the TestRig (where it never provides a good alt set), so it's probably ok so. + return reportedAlts; + } + + antlrcpp::BitSet result; + for (auto &config : configs->configs) { + result.set(config->alt); + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.h new file mode 100644 index 0000000000..ed6d749429 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/DiagnosticErrorListener.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + /// <summary> + /// This implementation of <seealso cref="ANTLRErrorListener"/> can be used to identify + /// certain potential correctness and performance problems in grammars. "Reports" + /// are made by calling <seealso cref="Parser#notifyErrorListeners"/> with the appropriate + /// message. + /// + /// <ul> + /// <li><b>Ambiguities</b>: These are cases where more than one path through the + /// grammar can match the input.</li> + /// <li><b>Weak context sensitivity</b>: These are cases where full-context + /// prediction resolved an SLL conflict to a unique alternative which equaled the + /// minimum alternative of the SLL conflict.</li> + /// <li><b>Strong (forced) context sensitivity</b>: These are cases where the + /// full-context prediction resolved an SLL conflict to a unique alternative, + /// <em>and</em> the minimum alternative of the SLL conflict was found to not be + /// a truly viable alternative. Two-stage parsing cannot be used for inputs where + /// this situation occurs.</li> + /// </ul> + /// + /// @author Sam Harwell + /// </summary> + class ANTLR4CPP_PUBLIC DiagnosticErrorListener : public BaseErrorListener { + /// <summary> + /// When {@code true}, only exactly known ambiguities are reported. + /// </summary> + protected: + const bool exactOnly; + + /// <summary> + /// Initializes a new instance of <seealso cref="DiagnosticErrorListener"/> which only + /// reports exact ambiguities. + /// </summary> + public: + DiagnosticErrorListener(); + + /// <summary> + /// Initializes a new instance of <seealso cref="DiagnosticErrorListener"/>, specifying + /// whether all ambiguities or only exact ambiguities are reported. + /// </summary> + /// <param name="exactOnly"> {@code true} to report only exact ambiguities, otherwise + /// {@code false} to report all ambiguities. </param> + DiagnosticErrorListener(bool exactOnly); + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + + protected: + virtual std::string getDecisionDescription(Parser *recognizer, const dfa::DFA &dfa); + + /// <summary> + /// Computes the set of conflicting or ambiguous alternatives from a + /// configuration set, if that information was not already provided by the + /// parser. + /// </summary> + /// <param name="reportedAlts"> The set of conflicting or ambiguous alternatives, as + /// reported by the parser. </param> + /// <param name="configs"> The conflicting or ambiguous configuration set. </param> + /// <returns> Returns {@code reportedAlts} if it is not {@code null}, otherwise + /// returns the set of alternatives represented in {@code configs}. </returns> + virtual antlrcpp::BitSet getConflictingAlts(const antlrcpp::BitSet &reportedAlts, atn::ATNConfigSet *configs); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Exceptions.cpp b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.cpp new file mode 100644 index 0000000000..24aea29b0c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +using namespace antlr4; + +RuntimeException::RuntimeException(const std::string &msg) : std::exception(), _message(msg) { +} + +const char* RuntimeException::what() const noexcept { + return _message.c_str(); +} + +//------------------ IOException --------------------------------------------------------------------------------------- + +IOException::IOException(const std::string &msg) : std::exception(), _message(msg) { +} + +const char* IOException::what() const noexcept { + return _message.c_str(); +} + +//------------------ IllegalStateException ----------------------------------------------------------------------------- + +IllegalStateException::~IllegalStateException() { +} + +//------------------ IllegalArgumentException -------------------------------------------------------------------------- + +IllegalArgumentException::~IllegalArgumentException() { +} + +//------------------ NullPointerException ------------------------------------------------------------------------------ + +NullPointerException::~NullPointerException() { +} + +//------------------ IndexOutOfBoundsException ------------------------------------------------------------------------- + +IndexOutOfBoundsException::~IndexOutOfBoundsException() { +} + +//------------------ UnsupportedOperationException --------------------------------------------------------------------- + +UnsupportedOperationException::~UnsupportedOperationException() { +} + +//------------------ EmptyStackException ------------------------------------------------------------------------------- + +EmptyStackException::~EmptyStackException() { +} + +//------------------ CancellationException ----------------------------------------------------------------------------- + +CancellationException::~CancellationException() { +} + +//------------------ ParseCancellationException ------------------------------------------------------------------------ + +ParseCancellationException::~ParseCancellationException() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Exceptions.h b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.h new file mode 100644 index 0000000000..35d72b52ee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Exceptions.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + // An exception hierarchy modelled loosely after java.lang.* exceptions. + class ANTLR4CPP_PUBLIC RuntimeException : public std::exception { + private: + std::string _message; + public: + RuntimeException(const std::string &msg = ""); + + virtual const char* what() const noexcept override; + }; + + class ANTLR4CPP_PUBLIC IllegalStateException : public RuntimeException { + public: + IllegalStateException(const std::string &msg = "") : RuntimeException(msg) {} + IllegalStateException(IllegalStateException const&) = default; + ~IllegalStateException(); + IllegalStateException& operator=(IllegalStateException const&) = default; + }; + + class ANTLR4CPP_PUBLIC IllegalArgumentException : public RuntimeException { + public: + IllegalArgumentException(IllegalArgumentException const&) = default; + IllegalArgumentException(const std::string &msg = "") : RuntimeException(msg) {} + ~IllegalArgumentException(); + IllegalArgumentException& operator=(IllegalArgumentException const&) = default; + }; + + class ANTLR4CPP_PUBLIC NullPointerException : public RuntimeException { + public: + NullPointerException(const std::string &msg = "") : RuntimeException(msg) {} + NullPointerException(NullPointerException const&) = default; + ~NullPointerException(); + NullPointerException& operator=(NullPointerException const&) = default; + }; + + class ANTLR4CPP_PUBLIC IndexOutOfBoundsException : public RuntimeException { + public: + IndexOutOfBoundsException(const std::string &msg = "") : RuntimeException(msg) {} + IndexOutOfBoundsException(IndexOutOfBoundsException const&) = default; + ~IndexOutOfBoundsException(); + IndexOutOfBoundsException& operator=(IndexOutOfBoundsException const&) = default; + }; + + class ANTLR4CPP_PUBLIC UnsupportedOperationException : public RuntimeException { + public: + UnsupportedOperationException(const std::string &msg = "") : RuntimeException(msg) {} + UnsupportedOperationException(UnsupportedOperationException const&) = default; + ~UnsupportedOperationException(); + UnsupportedOperationException& operator=(UnsupportedOperationException const&) = default; + + }; + + class ANTLR4CPP_PUBLIC EmptyStackException : public RuntimeException { + public: + EmptyStackException(const std::string &msg = "") : RuntimeException(msg) {} + EmptyStackException(EmptyStackException const&) = default; + ~EmptyStackException(); + EmptyStackException& operator=(EmptyStackException const&) = default; + }; + + // IOException is not a runtime exception (in the java hierarchy). + // Hence we have to duplicate the RuntimeException implementation. + class ANTLR4CPP_PUBLIC IOException : public std::exception { + private: + std::string _message; + + public: + IOException(const std::string &msg = ""); + + virtual const char* what() const noexcept override; + }; + + class ANTLR4CPP_PUBLIC CancellationException : public IllegalStateException { + public: + CancellationException(const std::string &msg = "") : IllegalStateException(msg) {} + CancellationException(CancellationException const&) = default; + ~CancellationException(); + CancellationException& operator=(CancellationException const&) = default; + }; + + class ANTLR4CPP_PUBLIC ParseCancellationException : public CancellationException { + public: + ParseCancellationException(const std::string &msg = "") : CancellationException(msg) {} + ParseCancellationException(ParseCancellationException const&) = default; + ~ParseCancellationException(); + ParseCancellationException& operator=(ParseCancellationException const&) = default; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.cpp b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.cpp new file mode 100644 index 0000000000..ca2537b300 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.cpp @@ -0,0 +1,52 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ParserATNSimulator.h" +#include "Parser.h" +#include "atn/PredicateTransition.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" + +#include "FailedPredicateException.h" + +using namespace antlr4; +using namespace antlrcpp; + +FailedPredicateException::FailedPredicateException(Parser *recognizer) : FailedPredicateException(recognizer, "", "") { +} + +FailedPredicateException::FailedPredicateException(Parser *recognizer, const std::string &predicate): FailedPredicateException(recognizer, predicate, "") { +} + +FailedPredicateException::FailedPredicateException(Parser *recognizer, const std::string &predicate, const std::string &message) + : RecognitionException(!message.empty() ? message : "failed predicate: " + predicate + "?", recognizer, + recognizer->getInputStream(), recognizer->getContext(), recognizer->getCurrentToken()) { + + atn::ATNState *s = recognizer->getInterpreter<atn::ATNSimulator>()->atn.states[recognizer->getState()]; + const atn::Transition *transition = s->transitions[0].get(); + if (transition->getTransitionType() == atn::TransitionType::PREDICATE) { + _ruleIndex = downCast<const atn::PredicateTransition&>(*transition).getRuleIndex(); + _predicateIndex = downCast<const atn::PredicateTransition&>(*transition).getPredIndex(); + } else { + _ruleIndex = 0; + _predicateIndex = 0; + } + + _predicate = predicate; +} + +size_t FailedPredicateException::getRuleIndex() { + return _ruleIndex; +} + +size_t FailedPredicateException::getPredIndex() { + return _predicateIndex; +} + +std::string FailedPredicateException::getPredicate() { + return _predicate; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.h b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.h new file mode 100644 index 0000000000..89bec0fd0b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FailedPredicateException.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlr4 { + + /// A semantic predicate failed during validation. Validation of predicates + /// occurs when normally parsing the alternative just like matching a token. + /// Disambiguating predicate evaluation occurs when we test a predicate during + /// prediction. + class ANTLR4CPP_PUBLIC FailedPredicateException : public RecognitionException { + public: + explicit FailedPredicateException(Parser *recognizer); + FailedPredicateException(Parser *recognizer, const std::string &predicate); + FailedPredicateException(Parser *recognizer, const std::string &predicate, const std::string &message); + + virtual size_t getRuleIndex(); + virtual size_t getPredIndex(); + virtual std::string getPredicate(); + + private: + size_t _ruleIndex; + size_t _predicateIndex; + std::string _predicate; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/FlatHashMap.h b/contrib/libs/antlr4_cpp_runtime/src/FlatHashMap.h new file mode 100644 index 0000000000..ad5ffa2432 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FlatHashMap.h @@ -0,0 +1,57 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#if ANTLR4CPP_USING_ABSEIL +#error #include "absl/container/flat_hash_map.h" +#else +#include <unordered_map> +#endif + +// By default ANTLRv4 uses containers provided by the C++ standard library. In most deployments this +// is fine, however in some using custom containers may be preferred. This header allows that by +// optionally supporting some alternative implementations and allowing for more easier patching of +// other alternatives. + +namespace antlr4 { + +#if ANTLR4CPP_USING_ABSEIL + template <typename Key, typename Value, + typename Hash = typename absl::flat_hash_map<Key, Value>::hasher, + typename Equal = typename absl::flat_hash_map<Key, Value>::key_equal, + typename Allocator = typename absl::flat_hash_map<Key, Value>::allocator_type> + using FlatHashMap = absl::flat_hash_map<Key, Value, Hash, Equal, Allocator>; +#else + template <typename Key, typename Value, + typename Hash = typename std::unordered_map<Key, Value>::hasher, + typename Equal = typename std::unordered_map<Key, Value>::key_equal, + typename Allocator = typename std::unordered_map<Key, Value>::allocator_type> + using FlatHashMap = std::unordered_map<Key, Value, Hash, Equal, Allocator>; +#endif + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/FlatHashSet.h b/contrib/libs/antlr4_cpp_runtime/src/FlatHashSet.h new file mode 100644 index 0000000000..5396c2bd5d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/FlatHashSet.h @@ -0,0 +1,57 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#if ANTLR4CPP_USING_ABSEIL +#error #include "absl/container/flat_hash_set.h" +#else +#include <unordered_set> +#endif + +// By default ANTLRv4 uses containers provided by the C++ standard library. In most deployments this +// is fine, however in some using custom containers may be preferred. This header allows that by +// optionally supporting some alternative implementations and allowing for more easier patching of +// other alternatives. + +namespace antlr4 { + +#if ANTLR4CPP_USING_ABSEIL + template <typename Key, + typename Hash = typename absl::flat_hash_set<Key>::hasher, + typename Equal = typename absl::flat_hash_set<Key>::key_equal, + typename Allocator = typename absl::flat_hash_set<Key>::allocator_type> + using FlatHashSet = absl::flat_hash_set<Key, Hash, Equal, Allocator>; +#else + template <typename Key, + typename Hash = typename std::unordered_set<Key>::hasher, + typename Equal = typename std::unordered_set<Key>::key_equal, + typename Allocator = typename std::unordered_set<Key>::allocator_type> + using FlatHashSet = std::unordered_set<Key, Hash, Equal, Allocator>; +#endif + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.cpp b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.cpp new file mode 100644 index 0000000000..4f4947985d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.cpp @@ -0,0 +1,18 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Parser.h" + +#include "InputMismatchException.h" + +using namespace antlr4; + +InputMismatchException::InputMismatchException(Parser *recognizer) + : RecognitionException(recognizer, recognizer->getInputStream(), recognizer->getContext(), + recognizer->getCurrentToken()) { +} + +InputMismatchException::~InputMismatchException() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.h b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.h new file mode 100644 index 0000000000..8b75420968 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InputMismatchException.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlr4 { + + /// <summary> + /// This signifies any kind of mismatched input exceptions such as + /// when the current input does not match the expected token. + /// </summary> + class ANTLR4CPP_PUBLIC InputMismatchException : public RecognitionException { + public: + InputMismatchException(Parser *recognizer); + InputMismatchException(InputMismatchException const&) = default; + ~InputMismatchException(); + InputMismatchException& operator=(InputMismatchException const&) = default; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/IntStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/IntStream.cpp new file mode 100644 index 0000000000..37a90a7cd9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/IntStream.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "IntStream.h" + +using namespace antlr4; + +const std::string IntStream::UNKNOWN_SOURCE_NAME = "<unknown>"; + +IntStream::~IntStream() = default; diff --git a/contrib/libs/antlr4_cpp_runtime/src/IntStream.h b/contrib/libs/antlr4_cpp_runtime/src/IntStream.h new file mode 100644 index 0000000000..40a0f2a9e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/IntStream.h @@ -0,0 +1,218 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// <summary> + /// A simple stream of symbols whose values are represented as integers. This + /// interface provides <em>marked ranges</em> with support for a minimum level + /// of buffering necessary to implement arbitrary lookahead during prediction. + /// For more information on marked ranges, see <seealso cref="#mark"/>. + /// <p/> + /// <strong>Initializing Methods:</strong> Some methods in this interface have + /// unspecified behavior if no call to an initializing method has occurred after + /// the stream was constructed. The following is a list of initializing methods: + /// + /// <ul> + /// <li><seealso cref="#LA"/></li> + /// <li><seealso cref="#consume"/></li> + /// <li><seealso cref="#size"/></li> + /// </ul> + /// </summary> + class ANTLR4CPP_PUBLIC IntStream { + public: + static constexpr size_t EOF = std::numeric_limits<size_t>::max(); + + /// The value returned by <seealso cref="#LA LA()"/> when the end of the stream is + /// reached. + /// No explicit EOF definition. We got EOF on all platforms. + //static const size_t _EOF = std::ios::eofbit; + + /// <summary> + /// The value returned by <seealso cref="#getSourceName"/> when the actual name of the + /// underlying source is not known. + /// </summary> + static const std::string UNKNOWN_SOURCE_NAME; + + virtual ~IntStream(); + + /// <summary> + /// Consumes the current symbol in the stream. This method has the following + /// effects: + /// + /// <ul> + /// <li><strong>Forward movement:</strong> The value of <seealso cref="#index index()"/> + /// before calling this method is less than the value of {@code index()} + /// after calling this method.</li> + /// <li><strong>Ordered lookahead:</strong> The value of {@code LA(1)} before + /// calling this method becomes the value of {@code LA(-1)} after calling + /// this method.</li> + /// </ul> + /// + /// Note that calling this method does not guarantee that {@code index()} is + /// incremented by exactly 1, as that would preclude the ability to implement + /// filtering streams (e.g. <seealso cref="CommonTokenStream"/> which distinguishes + /// between "on-channel" and "off-channel" tokens). + /// </summary> + /// <exception cref="IllegalStateException"> if an attempt is made to consume the the + /// end of the stream (i.e. if {@code LA(1)==}<seealso cref="#EOF EOF"/> before calling + /// {@code consume}). </exception> + virtual void consume() = 0; + + /// <summary> + /// Gets the value of the symbol at offset {@code i} from the current + /// position. When {@code i==1}, this method returns the value of the current + /// symbol in the stream (which is the next symbol to be consumed). When + /// {@code i==-1}, this method returns the value of the previously read + /// symbol in the stream. It is not valid to call this method with + /// {@code i==0}, but the specific behavior is unspecified because this + /// method is frequently called from performance-critical code. + /// <p/> + /// This method is guaranteed to succeed if any of the following are true: + /// + /// <ul> + /// <li>{@code i>0}</li> + /// <li>{@code i==-1} and <seealso cref="#index index()"/> returns a value greater + /// than the value of {@code index()} after the stream was constructed + /// and {@code LA(1)} was called in that order. Specifying the current + /// {@code index()} relative to the index after the stream was created + /// allows for filtering implementations that do not return every symbol + /// from the underlying source. Specifying the call to {@code LA(1)} + /// allows for lazily initialized streams.</li> + /// <li>{@code LA(i)} refers to a symbol consumed within a marked region + /// that has not yet been released.</li> + /// </ul> + /// + /// If {@code i} represents a position at or beyond the end of the stream, + /// this method returns <seealso cref="#EOF"/>. + /// <p/> + /// The return value is unspecified if {@code i<0} and fewer than {@code -i} + /// calls to <seealso cref="#consume consume()"/> have occurred from the beginning of + /// the stream before calling this method. + /// </summary> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// retrieving the value of the specified symbol </exception> + virtual size_t LA(ssize_t i) = 0; + + /// <summary> + /// A mark provides a guarantee that <seealso cref="#seek seek()"/> operations will be + /// valid over a "marked range" extending from the index where {@code mark()} + /// was called to the current <seealso cref="#index index()"/>. This allows the use of + /// streaming input sources by specifying the minimum buffering requirements + /// to support arbitrary lookahead during prediction. + /// <p/> + /// The returned mark is an opaque handle (type {@code int}) which is passed + /// to <seealso cref="#release release()"/> when the guarantees provided by the marked + /// range are no longer necessary. When calls to + /// {@code mark()}/{@code release()} are nested, the marks must be released + /// in reverse order of which they were obtained. Since marked regions are + /// used during performance-critical sections of prediction, the specific + /// behavior of invalid usage is unspecified (i.e. a mark is not released, or + /// a mark is released twice, or marks are not released in reverse order from + /// which they were created). + /// <p/> + /// The behavior of this method is unspecified if no call to an + /// <seealso cref="IntStream initializing method"/> has occurred after this stream was + /// constructed. + /// <p/> + /// This method does not change the current position in the input stream. + /// <p/> + /// The following example shows the use of <seealso cref="#mark mark()"/>, + /// <seealso cref="#release release(mark)"/>, <seealso cref="#index index()"/>, and + /// <seealso cref="#seek seek(index)"/> as part of an operation to safely work within a + /// marked region, then restore the stream position to its original value and + /// release the mark. + /// <pre> + /// IntStream stream = ...; + /// int index = -1; + /// int mark = stream.mark(); + /// try { + /// index = stream.index(); + /// // perform work here... + /// } finally { + /// if (index != -1) { + /// stream.seek(index); + /// } + /// stream.release(mark); + /// } + /// </pre> + /// </summary> + /// <returns> An opaque marker which should be passed to + /// <seealso cref="#release release()"/> when the marked range is no longer required. </returns> + virtual ssize_t mark() = 0; + + /// <summary> + /// This method releases a marked range created by a call to + /// <seealso cref="#mark mark()"/>. Calls to {@code release()} must appear in the + /// reverse order of the corresponding calls to {@code mark()}. If a mark is + /// released twice, or if marks are not released in reverse order of the + /// corresponding calls to {@code mark()}, the behavior is unspecified. + /// <p/> + /// For more information and an example, see <seealso cref="#mark"/>. + /// </summary> + /// <param name="marker"> A marker returned by a call to {@code mark()}. </param> + /// <seealso cref= #mark </seealso> + virtual void release(ssize_t marker) = 0; + + /// <summary> + /// Return the index into the stream of the input symbol referred to by + /// {@code LA(1)}. + /// <p/> + /// The behavior of this method is unspecified if no call to an + /// <seealso cref="IntStream initializing method"/> has occurred after this stream was + /// constructed. + /// </summary> + virtual size_t index() = 0; + + /// <summary> + /// Set the input cursor to the position indicated by {@code index}. If the + /// specified index lies past the end of the stream, the operation behaves as + /// though {@code index} was the index of the EOF symbol. After this method + /// returns without throwing an exception, the at least one of the following + /// will be true. + /// + /// <ul> + /// <li><seealso cref="#index index()"/> will return the index of the first symbol + /// appearing at or after the specified {@code index}. Specifically, + /// implementations which filter their sources should automatically + /// adjust {@code index} forward the minimum amount required for the + /// operation to target a non-ignored symbol.</li> + /// <li>{@code LA(1)} returns <seealso cref="#EOF"/></li> + /// </ul> + /// + /// This operation is guaranteed to not throw an exception if {@code index} + /// lies within a marked region. For more information on marked regions, see + /// <seealso cref="#mark"/>. The behavior of this method is unspecified if no call to + /// an <seealso cref="IntStream initializing method"/> has occurred after this stream + /// was constructed. + /// </summary> + /// <param name="index"> The absolute index to seek to. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code index} is less than 0 </exception> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// seeking to the specified index </exception> + virtual void seek(size_t index) = 0; + + /// <summary> + /// Returns the total number of symbols in the stream, including a single EOF + /// symbol. + /// </summary> + /// <exception cref="UnsupportedOperationException"> if the size of the stream is + /// unknown. </exception> + virtual size_t size() = 0; + + /// <summary> + /// Gets the name of the underlying symbol source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns <seealso cref="#UNKNOWN_SOURCE_NAME"/>. + /// </summary> + virtual std::string getSourceName() const = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.cpp new file mode 100644 index 0000000000..f2812ba910 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "InterpreterRuleContext.h" + +using namespace antlr4; + +InterpreterRuleContext::InterpreterRuleContext() : ParserRuleContext() { +} + +InterpreterRuleContext::InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex) + : ParserRuleContext(parent, invokingStateNumber), _ruleIndex(ruleIndex) { +} + +size_t InterpreterRuleContext::getRuleIndex() const { + return _ruleIndex; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.h b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.h new file mode 100644 index 0000000000..a34d06b1f1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/InterpreterRuleContext.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ParserRuleContext.h" + +namespace antlr4 { + + /** + * This class extends {@link ParserRuleContext} by allowing the value of + * {@link #getRuleIndex} to be explicitly set for the context. + * + * <p> + * {@link ParserRuleContext} does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.</p> + */ + class ANTLR4CPP_PUBLIC InterpreterRuleContext : public ParserRuleContext { + public: + InterpreterRuleContext(); + + /** + * Constructs a new {@link InterpreterRuleContext} with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ + InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual size_t getRuleIndex() const override; + + protected: + /** This is the backing field for {@link #getRuleIndex}. */ + const size_t _ruleIndex = INVALID_INDEX; +}; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp b/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp new file mode 100644 index 0000000000..b0385c56ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp @@ -0,0 +1,294 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LexerATNSimulator.h" +#include "Exceptions.h" +#include "misc/Interval.h" +#include "CommonTokenFactory.h" +#include "LexerNoViableAltException.h" +#include "ANTLRErrorListener.h" +#include "support/CPPUtils.h" +#include "CommonToken.h" + +#include "Lexer.h" + +#define DEBUG_LEXER 0 + +using namespace antlrcpp; +using namespace antlr4; + +Lexer::Lexer() : Recognizer() { + InitializeInstanceFields(); + _input = nullptr; +} + +Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) { + InitializeInstanceFields(); +} + +void Lexer::reset() { + // wack Lexer state variables + _input->seek(0); // rewind the input + + _syntaxErrors = 0; + token.reset(); + type = Token::INVALID_TYPE; + channel = Token::DEFAULT_CHANNEL; + tokenStartCharIndex = INVALID_INDEX; + tokenStartCharPositionInLine = 0; + tokenStartLine = 0; + type = 0; + _text = ""; + + hitEOF = false; + mode = Lexer::DEFAULT_MODE; + modeStack.clear(); + + getInterpreter<atn::LexerATNSimulator>()->reset(); +} + +std::unique_ptr<Token> Lexer::nextToken() { + // Mark start location in char stream so unbuffered streams are + // guaranteed at least have text of current token + ssize_t tokenStartMarker = _input->mark(); + + auto onExit = finally([this, tokenStartMarker]{ + // make sure we release marker after match or + // unbuffered char stream will keep buffering + _input->release(tokenStartMarker); + }); + + while (true) { + outerContinue: + if (hitEOF) { + emitEOF(); + return std::move(token); + } + + token.reset(); + channel = Token::DEFAULT_CHANNEL; + tokenStartCharIndex = _input->index(); + tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine(); + tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine(); + _text = ""; + do { + type = Token::INVALID_TYPE; + size_t ttype; + try { + ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode); + } catch (LexerNoViableAltException &e) { + notifyListeners(e); // report error + recover(e); + ttype = SKIP; + } + if (_input->LA(1) == EOF) { + hitEOF = true; + } + if (type == Token::INVALID_TYPE) { + type = ttype; + } + if (type == SKIP) { + goto outerContinue; + } + } while (type == MORE); + if (token == nullptr) { + emit(); + } + return std::move(token); + } +} + +void Lexer::skip() { + type = SKIP; +} + +void Lexer::more() { + type = MORE; +} + +void Lexer::setMode(size_t m) { + mode = m; +} + +void Lexer::pushMode(size_t m) { +#if DEBUG_LEXER == 1 + std::cout << "pushMode " << m << std::endl; +#endif + + modeStack.push_back(mode); + setMode(m); +} + +size_t Lexer::popMode() { + if (modeStack.empty()) { + throw EmptyStackException(); + } +#if DEBUG_LEXER == 1 + std::cout << std::string("popMode back to ") << modeStack.back() << std::endl; +#endif + + setMode(modeStack.back()); + modeStack.pop_back(); + return mode; +} + + +TokenFactory<CommonToken>* Lexer::getTokenFactory() { + return _factory; +} + +void Lexer::setInputStream(IntStream *input) { + reset(); + _input = dynamic_cast<CharStream*>(input); +} + +std::string Lexer::getSourceName() { + return _input->getSourceName(); +} + +CharStream* Lexer::getInputStream() { + return _input; +} + +void Lexer::emit(std::unique_ptr<Token> newToken) { + token = std::move(newToken); +} + +Token* Lexer::emit() { + emit(_factory->create({ this, _input }, type, _text, channel, + tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine)); + return token.get(); +} + +Token* Lexer::emitEOF() { + size_t cpos = getCharPositionInLine(); + size_t line = getLine(); + emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos)); + return token.get(); +} + +size_t Lexer::getLine() const { + return getInterpreter<atn::LexerATNSimulator>()->getLine(); +} + +size_t Lexer::getCharPositionInLine() { + return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine(); +} + +void Lexer::setLine(size_t line) { + getInterpreter<atn::LexerATNSimulator>()->setLine(line); +} + +void Lexer::setCharPositionInLine(size_t charPositionInLine) { + getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine); +} + +size_t Lexer::getCharIndex() { + return _input->index(); +} + +std::string Lexer::getText() { + if (!_text.empty()) { + return _text; + } + return getInterpreter<atn::LexerATNSimulator>()->getText(_input); +} + +void Lexer::setText(const std::string &text) { + _text = text; +} + +std::unique_ptr<Token> Lexer::getToken() { + return std::move(token); +} + +void Lexer::setToken(std::unique_ptr<Token> newToken) { + token = std::move(newToken); +} + +void Lexer::setType(size_t ttype) { + type = ttype; +} + +size_t Lexer::getType() { + return type; +} + +void Lexer::setChannel(size_t newChannel) { + channel = newChannel; +} + +size_t Lexer::getChannel() { + return channel; +} + +std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() { + std::vector<std::unique_ptr<Token>> tokens; + std::unique_ptr<Token> t = nextToken(); + while (t->getType() != EOF) { + tokens.push_back(std::move(t)); + t = nextToken(); + } + return tokens; +} + +void Lexer::recover(const LexerNoViableAltException &/*e*/) { + if (_input->LA(1) != EOF) { + // skip a char and try again + getInterpreter<atn::LexerATNSimulator>()->consume(_input); + } +} + +void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) { + ++_syntaxErrors; + std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index())); + std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'"); + + ProxyErrorListener &listener = getErrorListenerDispatch(); + listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception()); +} + +std::string Lexer::getErrorDisplay(const std::string &s) { + std::stringstream ss; + for (auto c : s) { + switch (c) { + case '\n': + ss << "\\n"; + break; + case '\t': + ss << "\\t"; + break; + case '\r': + ss << "\\r"; + break; + default: + ss << c; + break; + } + } + return ss.str(); +} + +void Lexer::recover(RecognitionException * /*re*/) { + // TODO: Do we lose character or line position information? + _input->consume(); +} + +size_t Lexer::getNumberOfSyntaxErrors() { + return _syntaxErrors; +} + +void Lexer::InitializeInstanceFields() { + _syntaxErrors = 0; + token = nullptr; + _factory = CommonTokenFactory::DEFAULT.get(); + tokenStartCharIndex = INVALID_INDEX; + tokenStartLine = 0; + tokenStartCharPositionInLine = 0; + hitEOF = false; + channel = 0; + type = 0; + mode = Lexer::DEFAULT_MODE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Lexer.h b/contrib/libs/antlr4_cpp_runtime/src/Lexer.h new file mode 100644 index 0000000000..77033ad9e6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Lexer.h @@ -0,0 +1,196 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "TokenSource.h" +#include "CharStream.h" +#include "Token.h" + +namespace antlr4 { + + /// A lexer is recognizer that draws input symbols from a character stream. + /// lexer grammars result in a subclass of this object. A Lexer object + /// uses simplified match() and error recovery mechanisms in the interest + /// of speed. + class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource { + public: + static constexpr size_t DEFAULT_MODE = 0; + static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1; + static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2; + + static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL; + static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL; + static constexpr size_t MIN_CHAR_VALUE = 0; + static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF; + + CharStream *_input; // Pure reference, usually from statically allocated instance. + + protected: + /// How to create token objects. + TokenFactory<CommonToken> *_factory; + + public: + /// The goal of all lexer rules/methods is to create a token object. + /// This is an instance variable as multiple rules may collaborate to + /// create a single token. nextToken will return this object after + /// matching lexer rule(s). If you subclass to allow multiple token + /// emissions, then set this to the last token to be matched or + /// something nonnull so that the auto token emit mechanism will not + /// emit another token. + + // Life cycle of a token is this: + // Created by emit() (via the token factory) or by action code, holding ownership of it. + // Ownership is handed over to the token stream when calling nextToken(). + std::unique_ptr<Token> token; + + /// <summary> + /// What character index in the stream did the current token start at? + /// Needed, for example, to get the text for current token. Set at + /// the start of nextToken. + /// </summary> + size_t tokenStartCharIndex; + + /// <summary> + /// The line on which the first character of the token resides </summary> + size_t tokenStartLine; + + /// The character position of first character within the line. + size_t tokenStartCharPositionInLine; + + /// Once we see EOF on char stream, next token will be EOF. + /// If you have DONE : EOF ; then you see DONE EOF. + bool hitEOF; + + /// The channel number for the current token. + size_t channel; + + /// The token type for the current token. + size_t type; + + // Use the vector as a stack. + std::vector<size_t> modeStack; + size_t mode; + + Lexer(); + Lexer(CharStream *input); + virtual ~Lexer() {} + + virtual void reset(); + + /// Return a token from this source; i.e., match a token on the char stream. + virtual std::unique_ptr<Token> nextToken() override; + + /// Instruct the lexer to skip creating a token for current lexer rule + /// and look for another token. nextToken() knows to keep looking when + /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that + /// if token == null at end of any token rule, it creates one for you + /// and emits it. + virtual void skip(); + virtual void more(); + virtual void setMode(size_t m); + virtual void pushMode(size_t m); + virtual size_t popMode(); + + template<typename T1> + void setTokenFactory(TokenFactory<T1> *factory) { + this->_factory = factory; + } + + virtual TokenFactory<CommonToken>* getTokenFactory() override; + + /// Set the char stream and reset the lexer + virtual void setInputStream(IntStream *input) override; + + virtual std::string getSourceName() override; + + virtual CharStream* getInputStream() override; + + /// By default does not support multiple emits per nextToken invocation + /// for efficiency reasons. Subclasses can override this method, nextToken, + /// and getToken (to push tokens into a list and pull from that list + /// rather than a single variable as this implementation does). + virtual void emit(std::unique_ptr<Token> newToken); + + /// The standard method called to automatically emit a token at the + /// outermost lexical rule. The token object should point into the + /// char buffer start..stop. If there is a text override in 'text', + /// use that to set the token's text. Override this method to emit + /// custom Token objects or provide a new factory. + virtual Token* emit(); + + virtual Token* emitEOF(); + + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() override; + + virtual void setLine(size_t line); + + virtual void setCharPositionInLine(size_t charPositionInLine); + + /// What is the index of the current character of lookahead? + virtual size_t getCharIndex(); + + /// Return the text matched so far for the current token or any + /// text override. + virtual std::string getText(); + + /// Set the complete text of this token; it wipes any previous + /// changes to the text. + virtual void setText(const std::string &text); + + /// Override if emitting multiple tokens. + virtual std::unique_ptr<Token> getToken(); + + virtual void setToken(std::unique_ptr<Token> newToken); + + virtual void setType(size_t ttype); + + virtual size_t getType(); + + virtual void setChannel(size_t newChannel); + + virtual size_t getChannel(); + + virtual const std::vector<std::string>& getChannelNames() const = 0; + + virtual const std::vector<std::string>& getModeNames() const = 0; + + /// Return a list of all Token objects in input char stream. + /// Forces load of all tokens. Does not include EOF token. + virtual std::vector<std::unique_ptr<Token>> getAllTokens(); + + virtual void recover(const LexerNoViableAltException &e); + + virtual void notifyListeners(const LexerNoViableAltException &e); + + virtual std::string getErrorDisplay(const std::string &s); + + /// Lexers can normally match any char in it's vocabulary after matching + /// a token, so do the easy thing and just kill a character and hope + /// it all works out. You can instead use the rule invocation stack + /// to do sophisticated error recovery if you are in a fragment rule. + virtual void recover(RecognitionException *re); + + /// <summary> + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. + /// </summary> + /// <seealso cref= #notifyListeners </seealso> + virtual size_t getNumberOfSyntaxErrors(); + + protected: + /// You can set the text for the current token to override what is in + /// the input char buffer (via setText()). + std::string _text; + + private: + size_t _syntaxErrors; + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.cpp b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.cpp new file mode 100644 index 0000000000..38acd09ddd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNType.h" +#include "atn/LexerATNSimulator.h" +#include "dfa/DFA.h" +#include "Exceptions.h" +#include "Vocabulary.h" + +#include "LexerInterpreter.h" + +using namespace antlr4; + +LexerInterpreter::LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames, const std::vector<std::string> &modeNames, + const atn::ATN &atn, CharStream *input) + : Lexer(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), + _channelNames(channelNames), _modeNames(modeNames), + _vocabulary(vocabulary) { + + if (_atn.grammarType != atn::ATNType::LEXER) { + throw IllegalArgumentException("The ATN must be a lexer ATN."); + } + + for (size_t i = 0; i < atn.getNumberOfDecisions(); ++i) { + _decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState(i), i)); + } + _interpreter = new atn::LexerATNSimulator(this, _atn, _decisionToDFA, _sharedContextCache); /* mem-check: deleted in d-tor */ +} + +LexerInterpreter::~LexerInterpreter() +{ + delete _interpreter; +} + +const atn::ATN& LexerInterpreter::getATN() const { + return _atn; +} + +std::string LexerInterpreter::getGrammarFileName() const { + return _grammarFileName; +} + +const std::vector<std::string>& LexerInterpreter::getRuleNames() const { + return _ruleNames; +} + +const std::vector<std::string>& LexerInterpreter::getChannelNames() const { + return _channelNames; +} + +const std::vector<std::string>& LexerInterpreter::getModeNames() const { + return _modeNames; +} + +const dfa::Vocabulary& LexerInterpreter::getVocabulary() const { + return _vocabulary; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.h b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.h new file mode 100644 index 0000000000..3787c1d0d5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerInterpreter.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Lexer.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "Vocabulary.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC LexerInterpreter : public Lexer { + public: + LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames, + const std::vector<std::string> &modeNames, const atn::ATN &atn, CharStream *input); + + ~LexerInterpreter(); + + virtual const atn::ATN& getATN() const override; + virtual std::string getGrammarFileName() const override; + virtual const std::vector<std::string>& getRuleNames() const override; + virtual const std::vector<std::string>& getChannelNames() const override; + virtual const std::vector<std::string>& getModeNames() const override; + + virtual const dfa::Vocabulary& getVocabulary() const override; + + protected: + const std::string _grammarFileName; + const atn::ATN &_atn; + + const std::vector<std::string> &_ruleNames; + const std::vector<std::string> &_channelNames; + const std::vector<std::string> &_modeNames; + std::vector<dfa::DFA> _decisionToDFA; + + atn::PredictionContextCache _sharedContextCache; + + private: + dfa::Vocabulary _vocabulary; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.cpp b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.cpp new file mode 100644 index 0000000000..3304b82b40 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "support/CPPUtils.h" +#include "CharStream.h" +#include "Lexer.h" + +#include "LexerNoViableAltException.h" + +using namespace antlr4; + +LexerNoViableAltException::LexerNoViableAltException(Lexer *lexer, CharStream *input, size_t startIndex, + atn::ATNConfigSet *deadEndConfigs) + : RecognitionException(lexer, input, nullptr, nullptr), _startIndex(startIndex), _deadEndConfigs(deadEndConfigs) { +} + +size_t LexerNoViableAltException::getStartIndex() { + return _startIndex; +} + +atn::ATNConfigSet* LexerNoViableAltException::getDeadEndConfigs() { + return _deadEndConfigs; +} + +std::string LexerNoViableAltException::toString() { + std::string symbol; + if (_startIndex < getInputStream()->size()) { + symbol = static_cast<CharStream *>(getInputStream())->getText(misc::Interval(_startIndex, _startIndex)); + symbol = antlrcpp::escapeWhitespace(symbol, false); + } + std::string format = "LexerNoViableAltException('" + symbol + "')"; + return format; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.h b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.h new file mode 100644 index 0000000000..52eada7cfa --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/LexerNoViableAltException.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC LexerNoViableAltException : public RecognitionException { + public: + LexerNoViableAltException(Lexer *lexer, CharStream *input, size_t startIndex, + atn::ATNConfigSet *deadEndConfigs); + + virtual size_t getStartIndex(); + virtual atn::ATNConfigSet* getDeadEndConfigs(); + virtual std::string toString(); + + private: + /// Matching attempted at what input index? + const size_t _startIndex; + + /// Which configurations did we try at input.index() that couldn't match input.LA(1)? + atn::ATNConfigSet *_deadEndConfigs; + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.cpp b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.cpp new file mode 100644 index 0000000000..45372808e5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.cpp @@ -0,0 +1,92 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "CommonToken.h" +#include "CharStream.h" + +#include "ListTokenSource.h" + +using namespace antlr4; + +ListTokenSource::ListTokenSource(std::vector<std::unique_ptr<Token>> tokens_) : ListTokenSource(std::move(tokens_), "") { +} + +ListTokenSource::ListTokenSource(std::vector<std::unique_ptr<Token>> tokens_, const std::string &sourceName_) + : tokens(std::move(tokens_)), sourceName(sourceName_) { + InitializeInstanceFields(); + if (tokens.empty()) { + throw "tokens cannot be null"; + } + + // Check if there is an eof token and create one if not. + if (tokens.back()->getType() != Token::EOF) { + Token *lastToken = tokens.back().get(); + size_t start = INVALID_INDEX; + size_t previousStop = lastToken->getStopIndex(); + if (previousStop != INVALID_INDEX) { + start = previousStop + 1; + } + + size_t stop = std::max(INVALID_INDEX, start - 1); + tokens.emplace_back((_factory->create({ this, getInputStream() }, Token::EOF, "EOF", + Token::DEFAULT_CHANNEL, start, stop, static_cast<int>(lastToken->getLine()), lastToken->getCharPositionInLine()))); + } +} + +size_t ListTokenSource::getCharPositionInLine() { + if (i < tokens.size()) { + return tokens[i]->getCharPositionInLine(); + } + return 0; +} + +std::unique_ptr<Token> ListTokenSource::nextToken() { + if (i < tokens.size()) { + return std::move(tokens[i++]); + } + return nullptr; +} + +size_t ListTokenSource::getLine() const { + if (i < tokens.size()) { + return tokens[i]->getLine(); + } + + return 1; +} + +CharStream *ListTokenSource::getInputStream() { + if (i < tokens.size()) { + return tokens[i]->getInputStream(); + } else if (!tokens.empty()) { + return tokens.back()->getInputStream(); + } + + // no input stream information is available + return nullptr; +} + +std::string ListTokenSource::getSourceName() { + if (sourceName != "") { + return sourceName; + } + + CharStream *inputStream = getInputStream(); + if (inputStream != nullptr) { + return inputStream->getSourceName(); + } + + return "List"; +} + +TokenFactory<CommonToken>* ListTokenSource::getTokenFactory() { + return _factory; +} + +void ListTokenSource::InitializeInstanceFields() { + i = 0; + _factory = CommonTokenFactory::DEFAULT.get(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.h b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.h new file mode 100644 index 0000000000..542b05cb5a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ListTokenSource.h @@ -0,0 +1,88 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenSource.h" +#include "CommonTokenFactory.h" + +namespace antlr4 { + + /// Provides an implementation of <seealso cref="TokenSource"/> as a wrapper around a list + /// of <seealso cref="Token"/> objects. + /// + /// If the final token in the list is an <seealso cref="Token#EOF"/> token, it will be used + /// as the EOF token for every call to <seealso cref="#nextToken"/> after the end of the + /// list is reached. Otherwise, an EOF token will be created. + class ANTLR4CPP_PUBLIC ListTokenSource : public TokenSource { + protected: + // This list will be emptied token by token as we call nextToken(). + // Token streams can be used to buffer tokens for a while. + std::vector<std::unique_ptr<Token>> tokens; + + private: + /// <summary> + /// The name of the input source. If this value is {@code null}, a call to + /// <seealso cref="#getSourceName"/> should return the source name used to create the + /// the next token in <seealso cref="#tokens"/> (or the previous token if the end of + /// the input has been reached). + /// </summary> + const std::string sourceName; + + protected: + /// The index into <seealso cref="#tokens"/> of token to return by the next call to + /// <seealso cref="#nextToken"/>. The end of the input is indicated by this value + /// being greater than or equal to the number of items in <seealso cref="#tokens"/>. + size_t i; + + private: + /// This is the backing field for <seealso cref="#getTokenFactory"/> and + /// <seealso cref="setTokenFactory"/>. + TokenFactory<CommonToken> *_factory = CommonTokenFactory::DEFAULT.get(); + + public: + /// Constructs a new <seealso cref="ListTokenSource"/> instance from the specified + /// collection of <seealso cref="Token"/> objects. + /// + /// <param name="tokens"> The collection of <seealso cref="Token"/> objects to provide as a + /// <seealso cref="TokenSource"/>. </param> + /// <exception cref="NullPointerException"> if {@code tokens} is {@code null} </exception> + ListTokenSource(std::vector<std::unique_ptr<Token>> tokens); + ListTokenSource(const ListTokenSource& other) = delete; + + ListTokenSource& operator = (const ListTokenSource& other) = delete; + + /// <summary> + /// Constructs a new <seealso cref="ListTokenSource"/> instance from the specified + /// collection of <seealso cref="Token"/> objects and source name. + /// </summary> + /// <param name="tokens"> The collection of <seealso cref="Token"/> objects to provide as a + /// <seealso cref="TokenSource"/>. </param> + /// <param name="sourceName"> The name of the <seealso cref="TokenSource"/>. If this value is + /// {@code null}, <seealso cref="#getSourceName"/> will attempt to infer the name from + /// the next <seealso cref="Token"/> (or the previous token if the end of the input has + /// been reached). + /// </param> + /// <exception cref="NullPointerException"> if {@code tokens} is {@code null} </exception> + ListTokenSource(std::vector<std::unique_ptr<Token>> tokens_, const std::string &sourceName_); + + virtual size_t getCharPositionInLine() override; + virtual std::unique_ptr<Token> nextToken() override; + virtual size_t getLine() const override; + virtual CharStream* getInputStream() override; + virtual std::string getSourceName() override; + + template<typename T1> + void setTokenFactory(TokenFactory<T1> *factory) { + this->_factory = factory; + } + + virtual TokenFactory<CommonToken>* getTokenFactory() override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.cpp b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.cpp new file mode 100644 index 0000000000..273c208c74 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.cpp @@ -0,0 +1,46 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Parser.h" + +#include "NoViableAltException.h" + +using namespace antlr4; + +namespace { + +// Create a normal shared pointer if the configurations are to be deleted. If not, then +// the shared pointer is created with a deleter that does nothing. +Ref<atn::ATNConfigSet> buildConfigsRef(atn::ATNConfigSet *configs, bool deleteConfigs) { + if (deleteConfigs) { + return Ref<atn::ATNConfigSet>(configs); + } else { + return Ref<atn::ATNConfigSet>(configs, [](atn::ATNConfigSet *){}); + } +} + +} + +NoViableAltException::NoViableAltException(Parser *recognizer) + : NoViableAltException(recognizer, recognizer->getTokenStream(), recognizer->getCurrentToken(), + recognizer->getCurrentToken(), nullptr, recognizer->getContext(), false) { +} + +NoViableAltException::NoViableAltException(Parser *recognizer, TokenStream *input,Token *startToken, + Token *offendingToken, atn::ATNConfigSet *deadEndConfigs, ParserRuleContext *ctx, bool deleteConfigs) + : RecognitionException("No viable alternative", recognizer, input, ctx, offendingToken), + _deadEndConfigs(buildConfigsRef(deadEndConfigs, deleteConfigs)), _startToken(startToken) { +} + +NoViableAltException::~NoViableAltException() { +} + +Token* NoViableAltException::getStartToken() const { + return _startToken; +} + +atn::ATNConfigSet* NoViableAltException::getDeadEndConfigs() const { + return _deadEndConfigs.get(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.h b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.h new file mode 100644 index 0000000000..b15039d0cb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/NoViableAltException.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" +#include "Token.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { + + /// Indicates that the parser could not decide which of two or more paths + /// to take based upon the remaining input. It tracks the starting token + /// of the offending input and also knows where the parser was + /// in the various paths when the error. Reported by reportNoViableAlternative() + class ANTLR4CPP_PUBLIC NoViableAltException : public RecognitionException { + public: + NoViableAltException(Parser *recognizer); // LL(1) error + NoViableAltException(Parser *recognizer, TokenStream *input,Token *startToken, + Token *offendingToken, atn::ATNConfigSet *deadEndConfigs, ParserRuleContext *ctx, bool deleteConfigs); + ~NoViableAltException(); + + virtual Token* getStartToken() const; + virtual atn::ATNConfigSet* getDeadEndConfigs() const; + + private: + /// Which configurations did we try at input.index() that couldn't match input.LT(1)? + /// Shared pointer that conditionally deletes the configurations (based on flag + /// passed during construction) + Ref<atn::ATNConfigSet> _deadEndConfigs; + + /// The token object at the start index; the input stream might + /// not be buffering tokens so get a reference to it. (At the + /// time the error occurred, of course the stream needs to keep a + /// buffer all of the tokens but later we might not have access to those.) + Token *_startToken; + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Parser.cpp b/contrib/libs/antlr4_cpp_runtime/src/Parser.cpp new file mode 100644 index 0000000000..337bcba17a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Parser.cpp @@ -0,0 +1,670 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "dfa/DFA.h" +#include "ParserRuleContext.h" +#include "tree/TerminalNode.h" +#include "tree/ErrorNodeImpl.h" +#include "Lexer.h" +#include "atn/ParserATNSimulator.h" +#include "misc/IntervalSet.h" +#include "atn/RuleStartState.h" +#include "DefaultErrorStrategy.h" +#include "atn/ATNDeserializer.h" +#include "atn/RuleTransition.h" +#include "atn/ATN.h" +#include "Exceptions.h" +#include "ANTLRErrorListener.h" +#include "tree/pattern/ParseTreePattern.h" +#include "internal/Synchronization.h" + +#include "atn/ProfilingATNSimulator.h" +#include "atn/ParseInfo.h" + +#include "Parser.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +namespace { + +struct BypassAltsAtnCache final { + std::shared_mutex mutex; + /// This field maps from the serialized ATN string to the deserialized <seealso cref="ATN"/> with + /// bypass alternatives. + /// + /// <seealso cref= ATNDeserializationOptions#isGenerateRuleBypassTransitions() </seealso> + std::map<std::vector<int32_t>, std::unique_ptr<const atn::ATN>, std::less<>> map; +}; + +BypassAltsAtnCache* getBypassAltsAtnCache() { + static BypassAltsAtnCache* const instance = new BypassAltsAtnCache(); + return instance; +} + +} + +Parser::TraceListener::TraceListener(Parser *outerInstance_) : outerInstance(outerInstance_) { +} + +Parser::TraceListener::~TraceListener() { +} + +void Parser::TraceListener::enterEveryRule(ParserRuleContext *ctx) { + std::cout << "enter " << outerInstance->getRuleNames()[ctx->getRuleIndex()] + << ", LT(1)=" << outerInstance->_input->LT(1)->getText() << std::endl; +} + +void Parser::TraceListener::visitTerminal(tree::TerminalNode *node) { + std::cout << "consume " << node->getSymbol() << " rule " + << outerInstance->getRuleNames()[outerInstance->getContext()->getRuleIndex()] << std::endl; +} + +void Parser::TraceListener::visitErrorNode(tree::ErrorNode * /*node*/) { +} + +void Parser::TraceListener::exitEveryRule(ParserRuleContext *ctx) { + std::cout << "exit " << outerInstance->getRuleNames()[ctx->getRuleIndex()] + << ", LT(1)=" << outerInstance->_input->LT(1)->getText() << std::endl; +} + +Parser::TrimToSizeListener Parser::TrimToSizeListener::INSTANCE; + +Parser::TrimToSizeListener::~TrimToSizeListener() { +} + +void Parser::TrimToSizeListener::enterEveryRule(ParserRuleContext * /*ctx*/) { +} + +void Parser::TrimToSizeListener::visitTerminal(tree::TerminalNode * /*node*/) { +} + +void Parser::TrimToSizeListener::visitErrorNode(tree::ErrorNode * /*node*/) { +} + +void Parser::TrimToSizeListener::exitEveryRule(ParserRuleContext * ctx) { + ctx->children.shrink_to_fit(); +} + +Parser::Parser(TokenStream *input) { + InitializeInstanceFields(); + setInputStream(input); +} + +Parser::~Parser() { + _tracker.reset(); + delete _tracer; +} + +void Parser::reset() { + if (getInputStream() != nullptr) { + getInputStream()->seek(0); + } + _errHandler->reset(this); // Watch out, this is not shared_ptr.reset(). + + _matchedEOF = false; + _syntaxErrors = 0; + setTrace(false); + _precedenceStack.clear(); + _precedenceStack.push_back(0); + _ctx = nullptr; + _tracker.reset(); + + atn::ATNSimulator *interpreter = getInterpreter<atn::ParserATNSimulator>(); + if (interpreter != nullptr) { + interpreter->reset(); + } +} + +Token* Parser::match(size_t ttype) { + Token *t = getCurrentToken(); + if (t->getType() == ttype) { + if (ttype == EOF) { + _matchedEOF = true; + } + _errHandler->reportMatch(this); + consume(); + } else { + t = _errHandler->recoverInline(this); + if (_buildParseTrees && t->getTokenIndex() == INVALID_INDEX) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx->addChild(createErrorNode(t)); + } + } + return t; +} + +Token* Parser::matchWildcard() { + Token *t = getCurrentToken(); + if (t->getType() > 0) { + _errHandler->reportMatch(this); + consume(); + } else { + t = _errHandler->recoverInline(this); + if (_buildParseTrees && t->getTokenIndex() == INVALID_INDEX) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx->addChild(createErrorNode(t)); + } + } + + return t; +} + +void Parser::setBuildParseTree(bool buildParseTrees) { + this->_buildParseTrees = buildParseTrees; +} + +bool Parser::getBuildParseTree() { + return _buildParseTrees; +} + +void Parser::setTrimParseTree(bool trimParseTrees) { + if (trimParseTrees) { + if (getTrimParseTree()) { + return; + } + addParseListener(&TrimToSizeListener::INSTANCE); + } else { + removeParseListener(&TrimToSizeListener::INSTANCE); + } +} + +bool Parser::getTrimParseTree() { + return std::find(getParseListeners().begin(), getParseListeners().end(), &TrimToSizeListener::INSTANCE) != getParseListeners().end(); +} + +std::vector<tree::ParseTreeListener *> Parser::getParseListeners() { + return _parseListeners; +} + +void Parser::addParseListener(tree::ParseTreeListener *listener) { + if (!listener) { + throw NullPointerException("listener"); + } + + this->_parseListeners.push_back(listener); +} + +void Parser::removeParseListener(tree::ParseTreeListener *listener) { + if (!_parseListeners.empty()) { + auto it = std::find(_parseListeners.begin(), _parseListeners.end(), listener); + if (it != _parseListeners.end()) { + _parseListeners.erase(it); + } + } +} + +void Parser::removeParseListeners() { + _parseListeners.clear(); +} + +void Parser::triggerEnterRuleEvent() { + for (auto *listener : _parseListeners) { + listener->enterEveryRule(_ctx); + _ctx->enterRule(listener); + } +} + +void Parser::triggerExitRuleEvent() { + // reverse order walk of listeners + for (auto it = _parseListeners.rbegin(); it != _parseListeners.rend(); ++it) { + _ctx->exitRule(*it); + (*it)->exitEveryRule(_ctx); + } +} + +size_t Parser::getNumberOfSyntaxErrors() { + return _syntaxErrors; +} + +TokenFactory<CommonToken>* Parser::getTokenFactory() { + return _input->getTokenSource()->getTokenFactory(); +} + +const atn::ATN& Parser::getATNWithBypassAlts() { + auto serializedAtn = getSerializedATN(); + if (serializedAtn.empty()) { + throw UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives."); + } + // XXX: using the entire serialized ATN as key into the map is a big resource waste. + // How large can that thing become? + auto *cache = getBypassAltsAtnCache(); + { + std::shared_lock<std::shared_mutex> lock(cache->mutex); + auto existing = cache->map.find(serializedAtn); + if (existing != cache->map.end()) { + return *existing->second; + } + } + + std::unique_lock<std::shared_mutex> lock(cache->mutex); + auto existing = cache->map.find(serializedAtn); + if (existing != cache->map.end()) { + return *existing->second; + } + atn::ATNDeserializationOptions deserializationOptions; + deserializationOptions.setGenerateRuleBypassTransitions(true); + atn::ATNDeserializer deserializer(deserializationOptions); + auto atn = deserializer.deserialize(serializedAtn); + return *cache->map.insert(std::make_pair(std::vector<int32_t>(serializedAtn.begin(), serializedAtn.end()), std::move(atn))).first->second; +} + +tree::pattern::ParseTreePattern Parser::compileParseTreePattern(const std::string &pattern, int patternRuleIndex) { + if (getTokenStream() != nullptr) { + TokenSource *tokenSource = getTokenStream()->getTokenSource(); + if (is<Lexer*>(tokenSource)) { + Lexer *lexer = dynamic_cast<Lexer *>(tokenSource); + return compileParseTreePattern(pattern, patternRuleIndex, lexer); + } + } + throw UnsupportedOperationException("Parser can't discover a lexer to use"); +} + +tree::pattern::ParseTreePattern Parser::compileParseTreePattern(const std::string &pattern, int patternRuleIndex, + Lexer *lexer) { + tree::pattern::ParseTreePatternMatcher m(lexer, this); + return m.compile(pattern, patternRuleIndex); +} + +Ref<ANTLRErrorStrategy> Parser::getErrorHandler() { + return _errHandler; +} + +void Parser::setErrorHandler(Ref<ANTLRErrorStrategy> const& handler) { + _errHandler = handler; +} + +IntStream* Parser::getInputStream() { + return getTokenStream(); +} + +void Parser::setInputStream(IntStream *input) { + setTokenStream(static_cast<TokenStream*>(input)); +} + +TokenStream* Parser::getTokenStream() { + return _input; +} + +void Parser::setTokenStream(TokenStream *input) { + _input = nullptr; // Just a reference we don't own. + reset(); + _input = input; +} + +Token* Parser::getCurrentToken() { + return _input->LT(1); +} + +void Parser::notifyErrorListeners(const std::string &msg) { + notifyErrorListeners(getCurrentToken(), msg, nullptr); +} + +void Parser::notifyErrorListeners(Token *offendingToken, const std::string &msg, std::exception_ptr e) { + _syntaxErrors++; + size_t line = offendingToken->getLine(); + size_t charPositionInLine = offendingToken->getCharPositionInLine(); + + ProxyErrorListener &listener = getErrorListenerDispatch(); + listener.syntaxError(this, offendingToken, line, charPositionInLine, msg, e); +} + +Token* Parser::consume() { + Token *o = getCurrentToken(); + if (o->getType() != EOF) { + getInputStream()->consume(); + } + + bool hasListener = _parseListeners.size() > 0 && !_parseListeners.empty(); + if (_buildParseTrees || hasListener) { + if (_errHandler->inErrorRecoveryMode(this)) { + tree::ErrorNode *node = createErrorNode(o); + _ctx->addChild(node); + if (_parseListeners.size() > 0) { + for (auto *listener : _parseListeners) { + listener->visitErrorNode(node); + } + } + } else { + tree::TerminalNode *node = _ctx->addChild(createTerminalNode(o)); + if (_parseListeners.size() > 0) { + for (auto *listener : _parseListeners) { + listener->visitTerminal(node); + } + } + } + } + return o; +} + +void Parser::addContextToParseTree() { + // Add current context to parent if we have a parent. + if (_ctx->parent == nullptr) + return; + + downCast<ParserRuleContext*>(_ctx->parent)->addChild(_ctx); +} + +void Parser::enterRule(ParserRuleContext *localctx, size_t state, size_t /*ruleIndex*/) { + setState(state); + _ctx = localctx; + _ctx->start = _input->LT(1); + if (_buildParseTrees) { + addContextToParseTree(); + } + if (_parseListeners.size() > 0) { + triggerEnterRuleEvent(); + } +} + +void Parser::exitRule() { + if (_matchedEOF) { + // if we have matched EOF, it cannot consume past EOF so we use LT(1) here + _ctx->stop = _input->LT(1); // LT(1) will be end of file + } else { + _ctx->stop = _input->LT(-1); // stop node is what we just matched + } + + // trigger event on ctx, before it reverts to parent + if (_parseListeners.size() > 0) { + triggerExitRuleEvent(); + } + setState(_ctx->invokingState); + _ctx = downCast<ParserRuleContext*>(_ctx->parent); +} + +void Parser::enterOuterAlt(ParserRuleContext *localctx, size_t altNum) { + localctx->setAltNumber(altNum); + + // if we have new localctx, make sure we replace existing ctx + // that is previous child of parse tree + if (_buildParseTrees && _ctx != localctx) { + if (_ctx->parent != nullptr) { + ParserRuleContext *parent = downCast<ParserRuleContext*>(_ctx->parent); + parent->removeLastChild(); + parent->addChild(localctx); + } + } + _ctx = localctx; +} + +int Parser::getPrecedence() const { + if (_precedenceStack.empty()) { + return -1; + } + + return _precedenceStack.back(); +} + +void Parser::enterRecursionRule(ParserRuleContext *localctx, size_t ruleIndex) { + enterRecursionRule(localctx, getATN().ruleToStartState[ruleIndex]->stateNumber, ruleIndex, 0); +} + +void Parser::enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t /*ruleIndex*/, int precedence) { + setState(state); + _precedenceStack.push_back(precedence); + _ctx = localctx; + _ctx->start = _input->LT(1); + if (!_parseListeners.empty()) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } +} + +void Parser::pushNewRecursionContext(ParserRuleContext *localctx, size_t state, size_t /*ruleIndex*/) { + ParserRuleContext *previous = _ctx; + previous->parent = localctx; + previous->invokingState = state; + previous->stop = _input->LT(-1); + + _ctx = localctx; + _ctx->start = previous->start; + if (_buildParseTrees) { + _ctx->addChild(previous); + } + + if (_parseListeners.size() > 0) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } +} + +void Parser::unrollRecursionContexts(ParserRuleContext *parentctx) { + _precedenceStack.pop_back(); + _ctx->stop = _input->LT(-1); + ParserRuleContext *retctx = _ctx; // save current ctx (return value) + + // unroll so ctx is as it was before call to recursive method + if (_parseListeners.size() > 0) { + while (_ctx != parentctx) { + triggerExitRuleEvent(); + _ctx = downCast<ParserRuleContext*>(_ctx->parent); + } + } else { + _ctx = parentctx; + } + + // hook into tree + retctx->parent = parentctx; + + if (_buildParseTrees && parentctx != nullptr) { + // add return ctx into invoking rule's tree + parentctx->addChild(retctx); + } +} + +ParserRuleContext* Parser::getInvokingContext(size_t ruleIndex) { + ParserRuleContext *p = _ctx; + while (p) { + if (p->getRuleIndex() == ruleIndex) { + return p; + } + if (p->parent == nullptr) + break; + p = downCast<ParserRuleContext*>(p->parent); + } + return nullptr; +} + +ParserRuleContext* Parser::getContext() { + return _ctx; +} + +void Parser::setContext(ParserRuleContext *ctx) { + _ctx = ctx; +} + +bool Parser::precpred(RuleContext * /*localctx*/, int precedence) { + return precedence >= _precedenceStack.back(); +} + +bool Parser::inContext(const std::string &/*context*/) { + // TODO: useful in parser? + return false; +} + +bool Parser::isExpectedToken(size_t symbol) { + const atn::ATN &atn = getInterpreter<atn::ParserATNSimulator>()->atn; + ParserRuleContext *ctx = _ctx; + atn::ATNState *s = atn.states[getState()]; + misc::IntervalSet following = atn.nextTokens(s); + + if (following.contains(symbol)) { + return true; + } + + if (!following.contains(Token::EPSILON)) { + return false; + } + + while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) { + atn::ATNState *invokingState = atn.states[ctx->invokingState]; + const atn::RuleTransition *rt = static_cast<const atn::RuleTransition*>(invokingState->transitions[0].get()); + following = atn.nextTokens(rt->followState); + if (following.contains(symbol)) { + return true; + } + + ctx = downCast<ParserRuleContext*>(ctx->parent); + } + + if (following.contains(Token::EPSILON) && symbol == EOF) { + return true; + } + + return false; +} + +bool Parser::isMatchedEOF() const { + return _matchedEOF; +} + +misc::IntervalSet Parser::getExpectedTokens() { + return getATN().getExpectedTokens(getState(), getContext()); +} + +misc::IntervalSet Parser::getExpectedTokensWithinCurrentRule() { + const atn::ATN &atn = getInterpreter<atn::ParserATNSimulator>()->atn; + atn::ATNState *s = atn.states[getState()]; + return atn.nextTokens(s); +} + +size_t Parser::getRuleIndex(const std::string &ruleName) { + const std::map<std::string, size_t> &m = getRuleIndexMap(); + auto iterator = m.find(ruleName); + if (iterator == m.end()) { + return INVALID_INDEX; + } + return iterator->second; +} + +ParserRuleContext* Parser::getRuleContext() { + return _ctx; +} + +std::vector<std::string> Parser::getRuleInvocationStack() { + return getRuleInvocationStack(_ctx); +} + +std::vector<std::string> Parser::getRuleInvocationStack(RuleContext *p) { + std::vector<std::string> const& ruleNames = getRuleNames(); + std::vector<std::string> stack; + RuleContext *run = p; + while (run != nullptr) { + // compute what follows who invoked us + size_t ruleIndex = run->getRuleIndex(); + if (ruleIndex == INVALID_INDEX ) { + stack.push_back("n/a"); + } else { + stack.push_back(ruleNames[ruleIndex]); + } + if (!RuleContext::is(run->parent)) { + break; + } + run = downCast<RuleContext*>(run->parent); + } + return stack; +} + +std::vector<std::string> Parser::getDFAStrings() { + atn::ParserATNSimulator *simulator = getInterpreter<atn::ParserATNSimulator>(); + if (!simulator->decisionToDFA.empty()) { + UniqueLock<Mutex> lck(_mutex); + + std::vector<std::string> s; + for (size_t d = 0; d < simulator->decisionToDFA.size(); d++) { + dfa::DFA &dfa = simulator->decisionToDFA[d]; + s.push_back(dfa.toString(getVocabulary())); + } + return s; + } + return std::vector<std::string>(); +} + +void Parser::dumpDFA() { + atn::ParserATNSimulator *simulator = getInterpreter<atn::ParserATNSimulator>(); + if (!simulator->decisionToDFA.empty()) { + UniqueLock<Mutex> lck(_mutex); + bool seenOne = false; + for (size_t d = 0; d < simulator->decisionToDFA.size(); d++) { + dfa::DFA &dfa = simulator->decisionToDFA[d]; + if (!dfa.states.empty()) { + if (seenOne) { + std::cout << std::endl; + } + std::cout << "Decision " << dfa.decision << ":" << std::endl; + std::cout << dfa.toString(getVocabulary()); + seenOne = true; + } + } + } +} + +std::string Parser::getSourceName() { + return _input->getSourceName(); +} + +atn::ParseInfo Parser::getParseInfo() const { + atn::ParserATNSimulator *simulator = getInterpreter<atn::ParserATNSimulator>(); + return atn::ParseInfo(dynamic_cast<atn::ProfilingATNSimulator*>(simulator)); +} + +void Parser::setProfile(bool profile) { + atn::ParserATNSimulator *interp = getInterpreter<atn::ParserATNSimulator>(); + atn::PredictionMode saveMode = interp != nullptr ? interp->getPredictionMode() : atn::PredictionMode::LL; + if (profile) { + if (!is<atn::ProfilingATNSimulator *>(interp)) { + setInterpreter(new atn::ProfilingATNSimulator(this)); /* mem-check: replacing existing interpreter which gets deleted. */ + } + } else if (is<atn::ProfilingATNSimulator *>(interp)) { + /* mem-check: replacing existing interpreter which gets deleted. */ + atn::ParserATNSimulator *sim = new atn::ParserATNSimulator(this, getATN(), interp->decisionToDFA, interp->getSharedContextCache()); + setInterpreter(sim); + } + getInterpreter<atn::ParserATNSimulator>()->setPredictionMode(saveMode); +} + +void Parser::setTrace(bool trace) { + if (!trace) { + if (_tracer) + removeParseListener(_tracer); + delete _tracer; + _tracer = nullptr; + } else { + if (_tracer) + removeParseListener(_tracer); // Just in case this is triggered multiple times. + _tracer = new TraceListener(this); + addParseListener(_tracer); + } +} + +bool Parser::isTrace() const { + return _tracer != nullptr; +} + +tree::TerminalNode *Parser::createTerminalNode(Token *t) { + return _tracker.createInstance<tree::TerminalNodeImpl>(t); +} + +tree::ErrorNode *Parser::createErrorNode(Token *t) { + return _tracker.createInstance<tree::ErrorNodeImpl>(t); +} + +void Parser::InitializeInstanceFields() { + _errHandler = std::make_shared<DefaultErrorStrategy>(); + _precedenceStack.clear(); + _precedenceStack.push_back(0); + _buildParseTrees = true; + _syntaxErrors = 0; + _matchedEOF = false; + _input = nullptr; + _tracer = nullptr; + _ctx = nullptr; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/Parser.h b/contrib/libs/antlr4_cpp_runtime/src/Parser.h new file mode 100644 index 0000000000..f490b00c38 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Parser.h @@ -0,0 +1,461 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "tree/ParseTreeListener.h" +#include "tree/ParseTree.h" +#include "TokenStream.h" +#include "TokenSource.h" +#include "misc/Interval.h" + +namespace antlr4 { + + /// This is all the parsing support code essentially; most of it is error recovery stuff. + class ANTLR4CPP_PUBLIC Parser : public Recognizer { + public: + + class TraceListener : public tree::ParseTreeListener { + public: + TraceListener(Parser *outerInstance); + virtual ~TraceListener(); + + virtual void enterEveryRule(ParserRuleContext *ctx) override; + virtual void visitTerminal(tree::TerminalNode *node) override; + virtual void visitErrorNode(tree::ErrorNode *node) override; + virtual void exitEveryRule(ParserRuleContext *ctx) override; + + private: + Parser *const outerInstance; + }; + + class TrimToSizeListener : public tree::ParseTreeListener { + public: + static TrimToSizeListener INSTANCE; + + virtual ~TrimToSizeListener(); + + virtual void enterEveryRule(ParserRuleContext *ctx) override; + virtual void visitTerminal(tree::TerminalNode *node) override; + virtual void visitErrorNode(tree::ErrorNode *node) override; + virtual void exitEveryRule(ParserRuleContext *ctx) override; + }; + + Parser(TokenStream *input); + virtual ~Parser(); + + /// reset the parser's state + virtual void reset(); + + /// <summary> + /// Match current input symbol against {@code ttype}. If the symbol type + /// matches, <seealso cref="ANTLRErrorStrategy#reportMatch"/> and <seealso cref="#consume"/> are + /// called to complete the match process. + /// + /// If the symbol type does not match, + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is called on the current error + /// strategy to attempt recovery. If <seealso cref="#getBuildParseTree"/> is + /// {@code true} and the token index of the symbol returned by + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is -1, the symbol is added to + /// the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)}. + /// </summary> + /// <param name="ttype"> the token type to match </param> + /// <returns> the matched symbol </returns> + /// <exception cref="RecognitionException"> if the current input symbol did not match + /// {@code ttype} and the error strategy could not recover from the + /// mismatched symbol </exception> + virtual Token* match(size_t ttype); + + /// <summary> + /// Match current input symbol as a wildcard. If the symbol type matches + /// (i.e. has a value greater than 0), <seealso cref="ANTLRErrorStrategy#reportMatch"/> + /// and <seealso cref="#consume"/> are called to complete the match process. + /// <p/> + /// If the symbol type does not match, + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is called on the current error + /// strategy to attempt recovery. If <seealso cref="#getBuildParseTree"/> is + /// {@code true} and the token index of the symbol returned by + /// <seealso cref="ANTLRErrorStrategy#recoverInline"/> is -1, the symbol is added to + /// the parse tree by calling <seealso cref="ParserRuleContext#addErrorNode"/>. + /// </summary> + /// <returns> the matched symbol </returns> + /// <exception cref="RecognitionException"> if the current input symbol did not match + /// a wildcard and the error strategy could not recover from the mismatched + /// symbol </exception> + virtual Token* matchWildcard(); + + /// <summary> + /// Track the <seealso cref="ParserRuleContext"/> objects during the parse and hook + /// them up using the <seealso cref="ParserRuleContext#children"/> list so that it + /// forms a parse tree. The <seealso cref="ParserRuleContext"/> returned from the start + /// rule represents the root of the parse tree. + /// <p/> + /// Note that if we are not building parse trees, rule contexts only point + /// upwards. When a rule exits, it returns the context but that gets garbage + /// collected if nobody holds a reference. It points upwards but nobody + /// points at it. + /// <p/> + /// When we build parse trees, we are adding all of these contexts to + /// <seealso cref="ParserRuleContext#children"/> list. Contexts are then not candidates + /// for garbage collection. + /// </summary> + virtual void setBuildParseTree(bool buildParseTrees); + + /// <summary> + /// Gets whether or not a complete parse tree will be constructed while + /// parsing. This property is {@code true} for a newly constructed parser. + /// </summary> + /// <returns> {@code true} if a complete parse tree will be constructed while + /// parsing, otherwise {@code false} </returns> + virtual bool getBuildParseTree(); + + /// <summary> + /// Trim the internal lists of the parse tree during parsing to conserve memory. + /// This property is set to {@code false} by default for a newly constructed parser. + /// </summary> + /// <param name="trimParseTrees"> {@code true} to trim the capacity of the <seealso cref="ParserRuleContext#children"/> + /// list to its size after a rule is parsed. </param> + virtual void setTrimParseTree(bool trimParseTrees); + + /// <returns> {@code true} if the <seealso cref="ParserRuleContext#children"/> list is trimmed + /// using the default <seealso cref="Parser.TrimToSizeListener"/> during the parse process. </returns> + virtual bool getTrimParseTree(); + + virtual std::vector<tree::ParseTreeListener *> getParseListeners(); + + /// <summary> + /// Registers {@code listener} to receive events during the parsing process. + /// <p/> + /// To support output-preserving grammar transformations (including but not + /// limited to left-recursion removal, automated left-factoring, and + /// optimized code generation), calls to listener methods during the parse + /// may differ substantially from calls made by + /// <seealso cref="ParseTreeWalker#DEFAULT"/> used after the parse is complete. In + /// particular, rule entry and exit events may occur in a different order + /// during the parse than after the parser. In addition, calls to certain + /// rule entry methods may be omitted. + /// <p/> + /// With the following specific exceptions, calls to listener events are + /// <em>deterministic</em>, i.e. for identical input the calls to listener + /// methods will be the same. + /// + /// <ul> + /// <li>Alterations to the grammar used to generate code may change the + /// behavior of the listener calls.</li> + /// <li>Alterations to the command line options passed to ANTLR 4 when + /// generating the parser may change the behavior of the listener calls.</li> + /// <li>Changing the version of the ANTLR Tool used to generate the parser + /// may change the behavior of the listener calls.</li> + /// </ul> + /// </summary> + /// <param name="listener"> the listener to add + /// </param> + /// <exception cref="NullPointerException"> if {@code} listener is {@code null} </exception> + virtual void addParseListener(tree::ParseTreeListener *listener); + + /// <summary> + /// Remove {@code listener} from the list of parse listeners. + /// <p/> + /// If {@code listener} is {@code null} or has not been added as a parse + /// listener, this method does nothing. + /// </summary> + /// <seealso cref= #addParseListener + /// </seealso> + /// <param name="listener"> the listener to remove </param> + virtual void removeParseListener(tree::ParseTreeListener *listener); + + /// <summary> + /// Remove all parse listeners. + /// </summary> + /// <seealso cref= #addParseListener </seealso> + virtual void removeParseListeners(); + + /// <summary> + /// Notify any parse listeners of an enter rule event. + /// </summary> + /// <seealso cref= #addParseListener </seealso> + virtual void triggerEnterRuleEvent(); + + /// <summary> + /// Notify any parse listeners of an exit rule event. + /// </summary> + /// <seealso cref= #addParseListener </seealso> + virtual void triggerExitRuleEvent(); + + /// <summary> + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. + /// </summary> + /// <seealso cref= #notifyErrorListeners </seealso> + virtual size_t getNumberOfSyntaxErrors(); + + virtual TokenFactory<CommonToken>* getTokenFactory() override; + + /// <summary> + /// Tell our token source and error strategy about a new way to create tokens. </summary> + template<typename T1> + void setTokenFactory(TokenFactory<T1> *factory) { + _input->getTokenSource()->setTokenFactory(factory); + } + + /// The ATN with bypass alternatives is expensive to create so we create it + /// lazily. The ATN is owned by us. + virtual const atn::ATN& getATNWithBypassAlts(); + + /// <summary> + /// The preferred method of getting a tree pattern. For example, here's a + /// sample use: + /// + /// <pre> + /// ParseTree t = parser.expr(); + /// ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr); + /// ParseTreeMatch m = p.match(t); + /// String id = m.get("ID"); + /// </pre> + /// </summary> + virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// The same as <seealso cref="#compileParseTreePattern(String, int)"/> but specify a + /// <seealso cref="Lexer"/> rather than trying to deduce it from this parser. + /// </summary> + virtual tree::pattern::ParseTreePattern compileParseTreePattern(const std::string &pattern, int patternRuleIndex, + Lexer *lexer); + + virtual Ref<ANTLRErrorStrategy> getErrorHandler(); + virtual void setErrorHandler(Ref<ANTLRErrorStrategy> const& handler); + + virtual IntStream* getInputStream() override; + void setInputStream(IntStream *input) override; + + virtual TokenStream* getTokenStream(); + + /// Set the token stream and reset the parser. + virtual void setTokenStream(TokenStream *input); + + /// <summary> + /// Match needs to return the current input symbol, which gets put + /// into the label for the associated token ref; e.g., x=ID. + /// </summary> + virtual Token* getCurrentToken(); + + void notifyErrorListeners(const std::string &msg); + + virtual void notifyErrorListeners(Token *offendingToken, const std::string &msg, std::exception_ptr e); + + /// Consume and return the <seealso cref="#getCurrentToken current symbol"/>. + /// <p/> + /// E.g., given the following input with {@code A} being the current + /// lookahead symbol, this function moves the cursor to {@code B} and returns + /// {@code A}. + /// + /// <pre> + /// A B + /// ^ + /// </pre> + /// + /// If the parser is not in error recovery mode, the consumed symbol is added + /// to the parse tree using <seealso cref="ParserRuleContext#addChild(TerminalNode)"/>, and + /// <seealso cref="ParseTreeListener#visitTerminal"/> is called on any parse listeners. + /// If the parser <em>is</em> in error recovery mode, the consumed symbol is + /// added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} and + /// <seealso cref="ParseTreeListener#visitErrorNode"/> is called on any parse + /// listeners. + virtual Token* consume(); + + /// Always called by generated parsers upon entry to a rule. Access field + /// <seealso cref="#_ctx"/> get the current context. + virtual void enterRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex); + + void exitRule(); + + virtual void enterOuterAlt(ParserRuleContext *localctx, size_t altNum); + + /** + * Get the precedence level for the top-most precedence rule. + * + * @return The precedence level for the top-most precedence rule, or -1 if + * the parser context is not nested within a precedence rule. + */ + int getPrecedence() const; + + /// @deprecated Use + /// <seealso cref="#enterRecursionRule(ParserRuleContext, int, int, int)"/> instead. + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t ruleIndex); + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence); + + /** Like {@link #enterRule} but for recursive rules. + * Make the current context the child of the incoming localctx. + */ + virtual void pushNewRecursionContext(ParserRuleContext *localctx, size_t state, size_t ruleIndex); + virtual void unrollRecursionContexts(ParserRuleContext *parentctx); + virtual ParserRuleContext* getInvokingContext(size_t ruleIndex); + virtual ParserRuleContext* getContext(); + virtual void setContext(ParserRuleContext *ctx); + virtual bool precpred(RuleContext *localctx, int precedence) override; + virtual bool inContext(const std::string &context); + + /// <summary> + /// Checks whether or not {@code symbol} can follow the current state in the + /// ATN. The behavior of this method is equivalent to the following, but is + /// implemented such that the complete context-sensitive follow set does not + /// need to be explicitly constructed. + /// + /// <pre> + /// return getExpectedTokens().contains(symbol); + /// </pre> + /// </summary> + /// <param name="symbol"> the symbol type to check </param> + /// <returns> {@code true} if {@code symbol} can follow the current state in + /// the ATN, otherwise {@code false}. </returns> + virtual bool isExpectedToken(size_t symbol); + + bool isMatchedEOF() const; + + /// <summary> + /// Computes the set of input symbols which could follow the current parser + /// state and context, as given by <seealso cref="#getState"/> and <seealso cref="#getContext"/>, + /// respectively. + /// </summary> + /// <seealso cref= ATN#getExpectedTokens(int, RuleContext) </seealso> + virtual misc::IntervalSet getExpectedTokens(); + + virtual misc::IntervalSet getExpectedTokensWithinCurrentRule(); + + /// Get a rule's index (i.e., {@code RULE_ruleName} field) or INVALID_INDEX if not found. + virtual size_t getRuleIndex(const std::string &ruleName); + + virtual ParserRuleContext* getRuleContext(); + + /// <summary> + /// Return List<String> of the rule names in your parser instance + /// leading up to a call to the current rule. You could override if + /// you want more details such as the file/line info of where + /// in the ATN a rule is invoked. + /// + /// This is very useful for error messages. + /// </summary> + virtual std::vector<std::string> getRuleInvocationStack(); + + virtual std::vector<std::string> getRuleInvocationStack(RuleContext *p); + + /// <summary> + /// For debugging and other purposes. </summary> + virtual std::vector<std::string> getDFAStrings(); + + /// <summary> + /// For debugging and other purposes. </summary> + virtual void dumpDFA(); + + virtual std::string getSourceName(); + + atn::ParseInfo getParseInfo() const; + + /** + * @since 4.3 + */ + void setProfile(bool profile); + + /// <summary> + /// During a parse is sometimes useful to listen in on the rule entry and exit + /// events as well as token matches. This is for quick and dirty debugging. + /// </summary> + virtual void setTrace(bool trace); + + /** + * Gets whether a {@link TraceListener} is registered as a parse listener + * for the parser. + * + * @see #setTrace(boolean) + */ + bool isTrace() const; + + tree::ParseTreeTracker& getTreeTracker() { return _tracker; } + + /** How to create a token leaf node associated with a parent. + * Typically, the terminal node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link TerminalNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.7 + */ + tree::TerminalNode *createTerminalNode(Token *t); + + /** How to create an error node, given a token, associated with a parent. + * Typically, the error node to create is not a function of the parent + * but this method must still set the parent pointer of the terminal node + * returned. I would prefer having {@link ParserRuleContext#addAnyChild(ParseTree)} + * set the parent pointer, but the parent pointer is implementation dependent + * and currently there is no setParent() in {@link ErrorNode} (and can't + * add method in Java 1.7 without breaking backward compatibility). + * + * @since 4.7 + */ + tree::ErrorNode *createErrorNode(Token *t); + + protected: + /// The ParserRuleContext object for the currently executing rule. + /// This is always non-null during the parsing process. + // ml: this is one of the contexts tracked in _allocatedContexts. + ParserRuleContext *_ctx; + + /// The error handling strategy for the parser. The default is DefaultErrorStrategy. + /// See also getErrorHandler. + Ref<ANTLRErrorStrategy> _errHandler; + + /// <summary> + /// The input stream. + /// </summary> + /// <seealso cref= #getInputStream </seealso> + /// <seealso cref= #setInputStream </seealso> + TokenStream *_input; + + std::vector<int> _precedenceStack; + + /// <summary> + /// Specifies whether or not the parser should construct a parse tree during + /// the parsing process. The default value is {@code true}. + /// </summary> + /// <seealso cref= #getBuildParseTree </seealso> + /// <seealso cref= #setBuildParseTree </seealso> + bool _buildParseTrees; + + /// The list of <seealso cref="ParseTreeListener"/> listeners registered to receive + /// events during the parse. + /// <seealso cref= #addParseListener </seealso> + std::vector<tree::ParseTreeListener *> _parseListeners; + + /// <summary> + /// The number of syntax errors reported during parsing. This value is + /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. + /// </summary> + size_t _syntaxErrors; + + /** Indicates parser has match()ed EOF token. See {@link #exitRule()}. */ + bool _matchedEOF; + + virtual void addContextToParseTree(); + + // All rule contexts created during a parse run. This is cleared when calling reset(). + tree::ParseTreeTracker _tracker; + + private: + /// When setTrace(true) is called, a reference to the + /// TraceListener is stored here so it can be easily removed in a + /// later call to setTrace(false). The listener itself is + /// implemented as a parser listener so this field is not directly used by + /// other parser methods. + TraceListener *_tracer; + + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.cpp b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.cpp new file mode 100644 index 0000000000..e1c54a0eb1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.cpp @@ -0,0 +1,294 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "atn/RuleStartState.h" +#include "InterpreterRuleContext.h" +#include "atn/ParserATNSimulator.h" +#include "ANTLRErrorStrategy.h" +#include "atn/LoopEndState.h" +#include "FailedPredicateException.h" +#include "atn/StarLoopEntryState.h" +#include "atn/AtomTransition.h" +#include "atn/RuleTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/ATN.h" +#include "atn/RuleStopState.h" +#include "Lexer.h" +#include "Token.h" +#include "Vocabulary.h" +#include "InputMismatchException.h" +#include "CommonToken.h" +#include "tree/ErrorNode.h" + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "ParserInterpreter.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +using namespace antlrcpp; + +ParserInterpreter::ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input) + : Parser(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _vocabulary(vocabulary) { + + // init decision DFA + for (size_t i = 0; i < atn.getNumberOfDecisions(); ++i) { + atn::DecisionState *decisionState = atn.getDecisionState(i); + _decisionToDFA.push_back(dfa::DFA(decisionState, i)); + } + + // get atn simulator that knows how to do predictions + _interpreter = new atn::ParserATNSimulator(this, atn, _decisionToDFA, _sharedContextCache); /* mem-check: deleted in d-tor */ +} + +ParserInterpreter::~ParserInterpreter() { + delete _interpreter; +} + +void ParserInterpreter::reset() { + Parser::reset(); + _overrideDecisionReached = false; + _overrideDecisionRoot = nullptr; +} + +const atn::ATN& ParserInterpreter::getATN() const { + return _atn; +} + +const dfa::Vocabulary& ParserInterpreter::getVocabulary() const { + return _vocabulary; +} + +const std::vector<std::string>& ParserInterpreter::getRuleNames() const { + return _ruleNames; +} + +std::string ParserInterpreter::getGrammarFileName() const { + return _grammarFileName; +} + +ParserRuleContext* ParserInterpreter::parse(size_t startRuleIndex) { + atn::RuleStartState *startRuleStartState = _atn.ruleToStartState[startRuleIndex]; + + _rootContext = createInterpreterRuleContext(nullptr, atn::ATNState::INVALID_STATE_NUMBER, startRuleIndex); + + if (startRuleStartState->isLeftRecursiveRule) { + enterRecursionRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex, 0); + } else { + enterRule(_rootContext, startRuleStartState->stateNumber, startRuleIndex); + } + + while (true) { + atn::ATNState *p = getATNState(); + switch (p->getStateType()) { + case atn::ATNStateType::RULE_STOP : + // pop; return from rule + if (_ctx->isEmpty()) { + if (startRuleStartState->isLeftRecursiveRule) { + ParserRuleContext *result = _ctx; + auto parentContext = _parentContextStack.top(); + _parentContextStack.pop(); + unrollRecursionContexts(parentContext.first); + return result; + } else { + exitRule(); + return _rootContext; + } + } + + visitRuleStopState(p); + break; + + default : + try { + visitState(p); + } + catch (RecognitionException &e) { + setState(_atn.ruleToStopState[p->ruleIndex]->stateNumber); + getErrorHandler()->reportError(this, e); + getContext()->exception = std::current_exception(); + recover(e); + } + + break; + } + } +} + +void ParserInterpreter::enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) { + _parentContextStack.push({ _ctx, localctx->invokingState }); + Parser::enterRecursionRule(localctx, state, ruleIndex, precedence); +} + +void ParserInterpreter::addDecisionOverride(int decision, int tokenIndex, int forcedAlt) { + _overrideDecision = decision; + _overrideDecisionInputIndex = tokenIndex; + _overrideDecisionAlt = forcedAlt; +} + +Ref<InterpreterRuleContext> ParserInterpreter::getOverrideDecisionRoot() const { + return _overrideDecisionRoot; +} + +InterpreterRuleContext* ParserInterpreter::getRootContext() { + return _rootContext; +} + +atn::ATNState* ParserInterpreter::getATNState() { + return _atn.states[getState()]; +} + +void ParserInterpreter::visitState(atn::ATNState *p) { + size_t predictedAlt = 1; + if (DecisionState::is(p)) { + predictedAlt = visitDecisionState(downCast<DecisionState*>(p)); + } + + const atn::Transition *transition = p->transitions[predictedAlt - 1].get(); + switch (transition->getTransitionType()) { + case atn::TransitionType::EPSILON: + if (p->getStateType() == ATNStateType::STAR_LOOP_ENTRY && + (downCast<StarLoopEntryState *>(p))->isPrecedenceDecision && + !LoopEndState::is(transition->target)) { + // We are at the start of a left recursive rule's (...)* loop + // and we're not taking the exit branch of loop. + InterpreterRuleContext *localctx = createInterpreterRuleContext(_parentContextStack.top().first, + _parentContextStack.top().second, static_cast<int>(_ctx->getRuleIndex())); + pushNewRecursionContext(localctx, _atn.ruleToStartState[p->ruleIndex]->stateNumber, static_cast<int>(_ctx->getRuleIndex())); + } + break; + + case atn::TransitionType::ATOM: + match(static_cast<int>(static_cast<const atn::AtomTransition*>(transition)->_label)); + break; + + case atn::TransitionType::RANGE: + case atn::TransitionType::SET: + case atn::TransitionType::NOT_SET: + if (!transition->matches(static_cast<int>(_input->LA(1)), Token::MIN_USER_TOKEN_TYPE, Lexer::MAX_CHAR_VALUE)) { + recoverInline(); + } + matchWildcard(); + break; + + case atn::TransitionType::WILDCARD: + matchWildcard(); + break; + + case atn::TransitionType::RULE: + { + atn::RuleStartState *ruleStartState = static_cast<atn::RuleStartState*>(transition->target); + size_t ruleIndex = ruleStartState->ruleIndex; + InterpreterRuleContext *newctx = createInterpreterRuleContext(_ctx, p->stateNumber, ruleIndex); + if (ruleStartState->isLeftRecursiveRule) { + enterRecursionRule(newctx, ruleStartState->stateNumber, ruleIndex, static_cast<const atn::RuleTransition*>(transition)->precedence); + } else { + enterRule(newctx, transition->target->stateNumber, ruleIndex); + } + } + break; + + case atn::TransitionType::PREDICATE: + { + const atn::PredicateTransition *predicateTransition = static_cast<const atn::PredicateTransition*>(transition); + if (!sempred(_ctx, predicateTransition->getRuleIndex(), predicateTransition->getPredIndex())) { + throw FailedPredicateException(this); + } + } + break; + + case atn::TransitionType::ACTION: + { + const atn::ActionTransition *actionTransition = static_cast<const atn::ActionTransition*>(transition); + action(_ctx, actionTransition->ruleIndex, actionTransition->actionIndex); + } + break; + + case atn::TransitionType::PRECEDENCE: + { + if (!precpred(_ctx, static_cast<const atn::PrecedencePredicateTransition*>(transition)->getPrecedence())) { + throw FailedPredicateException(this, "precpred(_ctx, " + std::to_string(static_cast<const atn::PrecedencePredicateTransition*>(transition)->getPrecedence()) + ")"); + } + } + break; + + default: + throw UnsupportedOperationException("Unrecognized ATN transition type."); + } + + setState(transition->target->stateNumber); +} + +size_t ParserInterpreter::visitDecisionState(DecisionState *p) { + size_t predictedAlt = 1; + if (p->transitions.size() > 1) { + getErrorHandler()->sync(this); + int decision = p->decision; + if (decision == _overrideDecision && _input->index() == _overrideDecisionInputIndex && !_overrideDecisionReached) { + predictedAlt = _overrideDecisionAlt; + _overrideDecisionReached = true; + } else { + predictedAlt = getInterpreter<ParserATNSimulator>()->adaptivePredict(_input, decision, _ctx); + } + } + return predictedAlt; +} + +InterpreterRuleContext* ParserInterpreter::createInterpreterRuleContext(ParserRuleContext *parent, + size_t invokingStateNumber, size_t ruleIndex) { + return _tracker.createInstance<InterpreterRuleContext>(parent, invokingStateNumber, ruleIndex); +} + +void ParserInterpreter::visitRuleStopState(atn::ATNState *p) { + atn::RuleStartState *ruleStartState = _atn.ruleToStartState[p->ruleIndex]; + if (ruleStartState->isLeftRecursiveRule) { + std::pair<ParserRuleContext *, size_t> parentContext = _parentContextStack.top(); + _parentContextStack.pop(); + + unrollRecursionContexts(parentContext.first); + setState(parentContext.second); + } else { + exitRule(); + } + + const atn::RuleTransition *ruleTransition = static_cast<const atn::RuleTransition*>(_atn.states[getState()]->transitions[0].get()); + setState(ruleTransition->followState->stateNumber); +} + +void ParserInterpreter::recover(RecognitionException &e) { + size_t i = _input->index(); + getErrorHandler()->recover(this, std::make_exception_ptr(e)); + + if (_input->index() == i) { + // no input consumed, better add an error node + if (is<InputMismatchException *>(&e)) { + InputMismatchException &ime = static_cast<InputMismatchException&>(e); + Token *tok = e.getOffendingToken(); + size_t expectedTokenType = ime.getExpectedTokens().getMinElement(); // get any element + _errorToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() }, + expectedTokenType, tok->getText(), Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, // invalid start/stop + tok->getLine(), tok->getCharPositionInLine()); + _ctx->addChild(createErrorNode(_errorToken.get())); + } + else { // NoViableAlt + Token *tok = e.getOffendingToken(); + _errorToken = getTokenFactory()->create({ tok->getTokenSource(), tok->getTokenSource()->getInputStream() }, + Token::INVALID_TYPE, tok->getText(), Token::DEFAULT_CHANNEL, INVALID_INDEX, INVALID_INDEX, // invalid start/stop + tok->getLine(), tok->getCharPositionInLine()); + _ctx->addChild(createErrorNode(_errorToken.get())); + } + } +} + +Token* ParserInterpreter::recoverInline() { + return _errHandler->recoverInline(this); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.h b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.h new file mode 100644 index 0000000000..6d4a679e5b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserInterpreter.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Parser.h" +#include "atn/ATN.h" +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "Vocabulary.h" + +namespace antlr4 { + + /// <summary> + /// A parser simulator that mimics what ANTLR's generated + /// parser code does. A ParserATNSimulator is used to make + /// predictions via adaptivePredict but this class moves a pointer through the + /// ATN to simulate parsing. ParserATNSimulator just + /// makes us efficient rather than having to backtrack, for example. + /// + /// This properly creates parse trees even for left recursive rules. + /// + /// We rely on the left recursive rule invocation and special predicate + /// transitions to make left recursive rules work. + /// + /// See TestParserInterpreter for examples. + /// </summary> + class ANTLR4CPP_PUBLIC ParserInterpreter : public Parser { + public: + ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary, + const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input); + ~ParserInterpreter(); + + virtual void reset() override; + + virtual const atn::ATN& getATN() const override; + + virtual const dfa::Vocabulary& getVocabulary() const override; + + virtual const std::vector<std::string>& getRuleNames() const override; + virtual std::string getGrammarFileName() const override; + + /// Begin parsing at startRuleIndex + virtual ParserRuleContext* parse(size_t startRuleIndex); + + virtual void enterRecursionRule(ParserRuleContext *localctx, size_t state, size_t ruleIndex, int precedence) override; + + + /** Override this parser interpreters normal decision-making process + * at a particular decision and input token index. Instead of + * allowing the adaptive prediction mechanism to choose the + * first alternative within a block that leads to a successful parse, + * force it to take the alternative, 1..n for n alternatives. + * + * As an implementation limitation right now, you can only specify one + * override. This is sufficient to allow construction of different + * parse trees for ambiguous input. It means re-parsing the entire input + * in general because you're never sure where an ambiguous sequence would + * live in the various parse trees. For example, in one interpretation, + * an ambiguous input sequence would be matched completely in expression + * but in another it could match all the way back to the root. + * + * s : e '!'? ; + * e : ID + * | ID '!' + * ; + * + * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first + * case, the ambiguous sequence is fully contained only by the root. + * In the second case, the ambiguous sequences fully contained within just + * e, as in: (e ID !). + * + * Rather than trying to optimize this and make + * some intelligent decisions for optimization purposes, I settled on + * just re-parsing the whole input and then using + * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal + * subtree that contains the ambiguous sequence. I originally tried to + * record the call stack at the point the parser detected and ambiguity but + * left recursive rules create a parse tree stack that does not reflect + * the actual call stack. That impedance mismatch was enough to make + * it it challenging to restart the parser at a deeply nested rule + * invocation. + * + * Only parser interpreters can override decisions so as to avoid inserting + * override checking code in the critical ALL(*) prediction execution path. + * + * @since 4.5.1 + */ + void addDecisionOverride(int decision, int tokenIndex, int forcedAlt); + + Ref<InterpreterRuleContext> getOverrideDecisionRoot() const; + + /** Return the root of the parse, which can be useful if the parser + * bails out. You still can access the top node. Note that, + * because of the way left recursive rules add children, it's possible + * that the root will not have any children if the start rule immediately + * called and left recursive rule that fails. + * + * @since 4.5.1 + */ + InterpreterRuleContext* getRootContext(); + + protected: + const std::string _grammarFileName; + const atn::ATN &_atn; + + std::vector<std::string> _ruleNames; + + std::vector<dfa::DFA> _decisionToDFA; // not shared like it is for generated parsers + atn::PredictionContextCache _sharedContextCache; + + /** This stack corresponds to the _parentctx, _parentState pair of locals + * that would exist on call stack frames with a recursive descent parser; + * in the generated function for a left-recursive rule you'd see: + * + * private EContext e(int _p) throws RecognitionException { + * ParserRuleContext _parentctx = _ctx; // Pair.a + * int _parentState = getState(); // Pair.b + * ... + * } + * + * Those values are used to create new recursive rule invocation contexts + * associated with left operand of an alt like "expr '*' expr". + */ + std::stack<std::pair<ParserRuleContext *, size_t>> _parentContextStack; + + /** We need a map from (decision,inputIndex)->forced alt for computing ambiguous + * parse trees. For now, we allow exactly one override. + */ + int _overrideDecision = -1; + size_t _overrideDecisionInputIndex = INVALID_INDEX; + size_t _overrideDecisionAlt = INVALID_INDEX; + bool _overrideDecisionReached = false; // latch and only override once; error might trigger infinite loop + + /** What is the current context when we override a decision? This tells + * us what the root of the parse tree is when using override + * for an ambiguity/lookahead check. + */ + Ref<InterpreterRuleContext> _overrideDecisionRoot; + InterpreterRuleContext* _rootContext; + + virtual atn::ATNState *getATNState(); + virtual void visitState(atn::ATNState *p); + + /** Method visitDecisionState() is called when the interpreter reaches + * a decision state (instance of DecisionState). It gives an opportunity + * for subclasses to track interesting things. + */ + size_t visitDecisionState(atn::DecisionState *p); + + /** Provide simple "factory" for InterpreterRuleContext's. + * @since 4.5.1 + */ + InterpreterRuleContext* createInterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual void visitRuleStopState(atn::ATNState *p); + + /** Rely on the error handler for this parser but, if no tokens are consumed + * to recover, add an error node. Otherwise, nothing is seen in the parse + * tree. + */ + void recover(RecognitionException &e); + Token* recoverInline(); + + private: + const dfa::Vocabulary &_vocabulary; + std::unique_ptr<Token> _errorToken; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.cpp new file mode 100644 index 0000000000..7eb3e6577f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.cpp @@ -0,0 +1,138 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/TerminalNode.h" +#include "tree/ErrorNode.h" +#include "misc/Interval.h" +#include "Parser.h" +#include "Token.h" + +#include "support/Casts.h" +#include "support/CPPUtils.h" + +#include "ParserRuleContext.h" + +using namespace antlr4; +using namespace antlr4::tree; + +using namespace antlrcpp; + +ParserRuleContext ParserRuleContext::EMPTY; + +ParserRuleContext::ParserRuleContext() + : start(nullptr), stop(nullptr) { +} + +ParserRuleContext::ParserRuleContext(ParserRuleContext *parent, size_t invokingStateNumber) +: RuleContext(parent, invokingStateNumber), start(nullptr), stop(nullptr) { +} + +void ParserRuleContext::copyFrom(ParserRuleContext *ctx) { + // from RuleContext + this->parent = ctx->parent; + this->invokingState = ctx->invokingState; + + this->start = ctx->start; + this->stop = ctx->stop; + + // copy any error nodes to alt label node + if (!ctx->children.empty()) { + for (auto *child : ctx->children) { + if (ErrorNode::is(child)) { + downCast<ErrorNode*>(child)->setParent(this); + children.push_back(child); + } + } + + // Remove the just reparented error nodes from the source context. + ctx->children.erase(std::remove_if(ctx->children.begin(), ctx->children.end(), [this](tree::ParseTree *e) -> bool { + return std::find(children.begin(), children.end(), e) != children.end(); + }), ctx->children.end()); + } +} + +void ParserRuleContext::enterRule(tree::ParseTreeListener * /*listener*/) { +} + +void ParserRuleContext::exitRule(tree::ParseTreeListener * /*listener*/) { +} + +tree::TerminalNode* ParserRuleContext::addChild(tree::TerminalNode *t) { + t->setParent(this); + children.push_back(t); + return t; +} + +RuleContext* ParserRuleContext::addChild(RuleContext *ruleInvocation) { + children.push_back(ruleInvocation); + return ruleInvocation; +} + +void ParserRuleContext::removeLastChild() { + if (!children.empty()) { + children.pop_back(); + } +} + +tree::TerminalNode* ParserRuleContext::getToken(size_t ttype, size_t i) const { + if (i >= children.size()) { + return nullptr; + } + size_t j = 0; // what token with ttype have we found? + for (auto *child : children) { + if (TerminalNode::is(child)) { + tree::TerminalNode *typedChild = downCast<tree::TerminalNode*>(child); + Token *symbol = typedChild->getSymbol(); + if (symbol->getType() == ttype) { + if (j++ == i) { + return typedChild; + } + } + } + } + return nullptr; +} + +std::vector<tree::TerminalNode *> ParserRuleContext::getTokens(size_t ttype) const { + std::vector<tree::TerminalNode*> tokens; + for (auto *child : children) { + if (TerminalNode::is(child)) { + tree::TerminalNode *typedChild = downCast<tree::TerminalNode*>(child); + Token *symbol = typedChild->getSymbol(); + if (symbol->getType() == ttype) { + tokens.push_back(typedChild); + } + } + } + return tokens; +} + +misc::Interval ParserRuleContext::getSourceInterval() { + if (start == nullptr) { + return misc::Interval::INVALID; + } + + if (stop == nullptr || stop->getTokenIndex() < start->getTokenIndex()) { + return misc::Interval(start->getTokenIndex(), start->getTokenIndex() - 1); // empty + } + return misc::Interval(start->getTokenIndex(), stop->getTokenIndex()); +} + +Token* ParserRuleContext::getStart() const { + return start; +} + +Token* ParserRuleContext::getStop() const { + return stop; +} + +std::string ParserRuleContext::toInfoString(Parser *recognizer) { + std::vector<std::string> rules = recognizer->getRuleInvocationStack(this); + std::reverse(rules.begin(), rules.end()); + std::string rulesStr = antlrcpp::arrayToString(rules); + return "ParserRuleContext" + rulesStr + "{start=" + std::to_string(start->getTokenIndex()) + ", stop=" + + std::to_string(stop->getTokenIndex()) + '}'; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.h b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.h new file mode 100644 index 0000000000..63a8466e59 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ParserRuleContext.h @@ -0,0 +1,147 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "support/CPPUtils.h" + +namespace antlr4 { + + /// <summary> + /// A rule invocation record for parsing. + /// + /// Contains all of the information about the current rule not stored in the + /// RuleContext. It handles parse tree children list, Any ATN state + /// tracing, and the default values available for rule invocatons: + /// start, stop, rule index, current alt number. + /// + /// Subclasses made for each rule and grammar track the parameters, + /// return values, locals, and labels specific to that rule. These + /// are the objects that are returned from rules. + /// + /// Note text is not an actual field of a rule return value; it is computed + /// from start and stop using the input stream's toString() method. I + /// could add a ctor to this so that we can pass in and store the input + /// stream, but I'm not sure we want to do that. It would seem to be undefined + /// to get the .text property anyway if the rule matches tokens from multiple + /// input streams. + /// + /// I do not use getters for fields of objects that are used simply to + /// group values such as this aggregate. The getters/setters are there to + /// satisfy the superclass interface. + /// </summary> + class ANTLR4CPP_PUBLIC ParserRuleContext : public RuleContext { + public: + static ParserRuleContext EMPTY; + + /// <summary> + /// For debugging/tracing purposes, we want to track all of the nodes in + /// the ATN traversed by the parser for a particular rule. + /// This list indicates the sequence of ATN nodes used to match + /// the elements of the children list. This list does not include + /// ATN nodes and other rules used to match rule invocations. It + /// traces the rule invocation node itself but nothing inside that + /// other rule's ATN submachine. + /// + /// There is NOT a one-to-one correspondence between the children and + /// states list. There are typically many nodes in the ATN traversed + /// for each element in the children list. For example, for a rule + /// invocation there is the invoking state and the following state. + /// + /// The parser setState() method updates field s and adds it to this list + /// if we are debugging/tracing. + /// + /// This does not trace states visited during prediction. + /// </summary> + // public List<Integer> states; + + Token *start; + Token *stop; + + /// The exception that forced this rule to return. If the rule successfully + /// completed, this is "null exception pointer". + std::exception_ptr exception; + + ParserRuleContext(); + ParserRuleContext(ParserRuleContext *parent, size_t invokingStateNumber); + + /** COPY a ctx (I'm deliberately not using copy constructor) to avoid + * confusion with creating node with parent. Does not copy children + * (except error leaves). + */ + virtual void copyFrom(ParserRuleContext *ctx); + + + // Double dispatch methods for listeners + + virtual void enterRule(tree::ParseTreeListener *listener); + virtual void exitRule(tree::ParseTreeListener *listener); + + /** Add a token leaf node child and force its parent to be this node. */ + tree::TerminalNode* addChild(tree::TerminalNode *t); + RuleContext* addChild(RuleContext *ruleInvocation); + + /// Used by enterOuterAlt to toss out a RuleContext previously added as + /// we entered a rule. If we have # label, we will need to remove + /// generic ruleContext object. + void removeLastChild(); + + tree::TerminalNode* getToken(size_t ttype, std::size_t i) const; + + std::vector<tree::TerminalNode*> getTokens(size_t ttype) const; + + template<typename T> + T* getRuleContext(size_t i) const { + static_assert(std::is_base_of_v<RuleContext, T>, "T must be derived from RuleContext"); + size_t j = 0; // what element have we found with ctxType? + for (auto *child : children) { + if (RuleContext::is(child)) { + if (auto *typedChild = dynamic_cast<T*>(child); typedChild != nullptr) { + if (j++ == i) { + return typedChild; + } + } + } + } + return nullptr; + } + + template<typename T> + std::vector<T*> getRuleContexts() const { + static_assert(std::is_base_of_v<RuleContext, T>, "T must be derived from RuleContext"); + std::vector<T*> contexts; + for (auto *child : children) { + if (RuleContext::is(child)) { + if (auto *typedChild = dynamic_cast<T*>(child); typedChild != nullptr) { + contexts.push_back(typedChild); + } + } + } + return contexts; + } + + virtual misc::Interval getSourceInterval() override; + + /** + * Get the initial token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may exceed stop. + */ + Token* getStart() const; + + /** + * Get the final token in this context. + * Note that the range from start to stop is inclusive, so for rules that do not consume anything + * (for example, zero length or error productions) this token may precede start. + */ + Token* getStop() const; + + /// <summary> + /// Used for rule context info debugging during parse-time, not so much for ATN debugging </summary> + virtual std::string toInfoString(Parser *recognizer); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.cpp new file mode 100644 index 0000000000..34bfd73e26 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ProxyErrorListener.h" + +using namespace antlr4; + +void ProxyErrorListener::addErrorListener(ANTLRErrorListener *listener) { + if (listener == nullptr) { + throw "listener cannot be null."; + } + + _delegates.insert(listener); +} + +void ProxyErrorListener::removeErrorListener(ANTLRErrorListener *listener) { + _delegates.erase(listener); +} + +void ProxyErrorListener::removeErrorListeners() { + _delegates.clear(); +} + +void ProxyErrorListener::syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) { + + for (auto *listener : _delegates) { + listener->syntaxError(recognizer, offendingSymbol, line, charPositionInLine, msg, e); + } +} + +void ProxyErrorListener::reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) { + for (auto *listener : _delegates) { + listener->reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +void ProxyErrorListener::reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, + size_t stopIndex, const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) { + for (auto *listener : _delegates) { + listener->reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs); + } +} + +void ProxyErrorListener::reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) { + for (auto *listener : _delegates) { + listener->reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.h new file mode 100644 index 0000000000..04630ce12c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ProxyErrorListener.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" +#include "Exceptions.h" + +namespace antlr4 { + + /// This implementation of ANTLRErrorListener dispatches all calls to a + /// collection of delegate listeners. This reduces the effort required to support multiple + /// listeners. + class ANTLR4CPP_PUBLIC ProxyErrorListener : public ANTLRErrorListener { + private: + std::set<ANTLRErrorListener *> _delegates; // Not owned. + + public: + void addErrorListener(ANTLRErrorListener *listener); + void removeErrorListener(ANTLRErrorListener *listener); + void removeErrorListeners(); + + void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.cpp b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.cpp new file mode 100644 index 0000000000..5b37f9d2f0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.cpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "Recognizer.h" +#include "ParserRuleContext.h" +#include "misc/IntervalSet.h" + +#include "RecognitionException.h" + +using namespace antlr4; + +RecognitionException::RecognitionException(Recognizer *recognizer, IntStream *input, ParserRuleContext *ctx, + Token *offendingToken) + : RecognitionException("", recognizer, input, ctx, offendingToken) { +} + +RecognitionException::RecognitionException(const std::string &message, Recognizer *recognizer, IntStream *input, + ParserRuleContext *ctx, Token *offendingToken) + : RuntimeException(message), _recognizer(recognizer), _input(input), _ctx(ctx), _offendingToken(offendingToken) { + InitializeInstanceFields(); + if (recognizer != nullptr) { + _offendingState = recognizer->getState(); + } +} + +RecognitionException::~RecognitionException() { +} + +size_t RecognitionException::getOffendingState() const { + return _offendingState; +} + +void RecognitionException::setOffendingState(size_t offendingState) { + _offendingState = offendingState; +} + +misc::IntervalSet RecognitionException::getExpectedTokens() const { + if (_recognizer) { + return _recognizer->getATN().getExpectedTokens(_offendingState, _ctx); + } + return misc::IntervalSet::EMPTY_SET; +} + +RuleContext* RecognitionException::getCtx() const { + return _ctx; +} + +IntStream* RecognitionException::getInputStream() const { + return _input; +} + +Token* RecognitionException::getOffendingToken() const { + return _offendingToken; +} + +Recognizer* RecognitionException::getRecognizer() const { + return _recognizer; +} + +void RecognitionException::InitializeInstanceFields() { + _offendingState = INVALID_INDEX; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.h b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.h new file mode 100644 index 0000000000..9397ab20c8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RecognitionException.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Exceptions.h" + +namespace antlr4 { + + /// The root of the ANTLR exception hierarchy. In general, ANTLR tracks just + /// 3 kinds of errors: prediction errors, failed predicate errors, and + /// mismatched input errors. In each case, the parser knows where it is + /// in the input, where it is in the ATN, the rule invocation stack, + /// and what kind of problem occurred. + class ANTLR4CPP_PUBLIC RecognitionException : public RuntimeException { + private: + /// The Recognizer where this exception originated. + Recognizer *_recognizer; + IntStream *_input; + ParserRuleContext *_ctx; + + /// The current Token when an error occurred. Since not all streams + /// support accessing symbols by index, we have to track the Token + /// instance itself. + Token *_offendingToken; + + size_t _offendingState; + + public: + RecognitionException(Recognizer *recognizer, IntStream *input, ParserRuleContext *ctx, + Token *offendingToken = nullptr); + RecognitionException(const std::string &message, Recognizer *recognizer, IntStream *input, + ParserRuleContext *ctx, Token *offendingToken = nullptr); + RecognitionException(RecognitionException const&) = default; + ~RecognitionException(); + RecognitionException& operator=(RecognitionException const&) = default; + + /// Get the ATN state number the parser was in at the time the error + /// occurred. For NoViableAltException and + /// LexerNoViableAltException exceptions, this is the + /// DecisionState number. For others, it is the state whose outgoing + /// edge we couldn't match. + /// + /// If the state number is not known, this method returns -1. + virtual size_t getOffendingState() const; + + protected: + void setOffendingState(size_t offendingState); + + /// Gets the set of input symbols which could potentially follow the + /// previously matched symbol at the time this exception was thrown. + /// + /// If the set of expected tokens is not known and could not be computed, + /// this method returns an empty set. + /// + /// @returns The set of token types that could potentially follow the current + /// state in the ATN, or an empty set if the information is not available. + public: + virtual misc::IntervalSet getExpectedTokens() const; + + /// <summary> + /// Gets the <seealso cref="RuleContext"/> at the time this exception was thrown. + /// <p/> + /// If the context is not available, this method returns {@code null}. + /// </summary> + /// <returns> The <seealso cref="RuleContext"/> at the time this exception was thrown. + /// If the context is not available, this method returns {@code null}. </returns> + virtual RuleContext* getCtx() const; + + /// <summary> + /// Gets the input stream which is the symbol source for the recognizer where + /// this exception was thrown. + /// <p/> + /// If the input stream is not available, this method returns {@code null}. + /// </summary> + /// <returns> The input stream which is the symbol source for the recognizer + /// where this exception was thrown, or {@code null} if the stream is not + /// available. </returns> + virtual IntStream* getInputStream() const; + + virtual Token* getOffendingToken() const; + + /// <summary> + /// Gets the <seealso cref="Recognizer"/> where this exception occurred. + /// <p/> + /// If the recognizer is not available, this method returns {@code null}. + /// </summary> + /// <returns> The recognizer where this exception occurred, or {@code null} if + /// the recognizer is not available. </returns> + virtual Recognizer* getRecognizer() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Recognizer.cpp b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.cpp new file mode 100644 index 0000000000..c8a183324c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.cpp @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ConsoleErrorListener.h" +#include "RecognitionException.h" +#include "support/CPPUtils.h" +#include "Token.h" +#include "atn/ATN.h" +#include "atn/ATNSimulator.h" +#include "support/CPPUtils.h" +#include "support/StringUtils.h" + +#include "Vocabulary.h" + +#include "Recognizer.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; + +std::map<const dfa::Vocabulary*, std::map<std::string_view, size_t>> Recognizer::_tokenTypeMapCache; +std::map<std::vector<std::string>, std::map<std::string, size_t>> Recognizer::_ruleIndexMapCache; + +Recognizer::Recognizer() { + InitializeInstanceFields(); + _proxListener.addErrorListener(&ConsoleErrorListener::INSTANCE); +} + +Recognizer::~Recognizer() { +} + +std::map<std::string_view, size_t> Recognizer::getTokenTypeMap() { + const dfa::Vocabulary& vocabulary = getVocabulary(); + + UniqueLock<Mutex> lck(_mutex); + std::map<std::string_view, size_t> result; + auto iterator = _tokenTypeMapCache.find(&vocabulary); + if (iterator != _tokenTypeMapCache.end()) { + result = iterator->second; + } else { + for (size_t i = 0; i <= getATN().maxTokenType; ++i) { + std::string_view literalName = vocabulary.getLiteralName(i); + if (!literalName.empty()) { + result[literalName] = i; + } + + std::string_view symbolicName = vocabulary.getSymbolicName(i); + if (!symbolicName.empty()) { + result[symbolicName] = i; + } + } + result["EOF"] = EOF; + _tokenTypeMapCache[&vocabulary] = result; + } + + return result; +} + +std::map<std::string, size_t> Recognizer::getRuleIndexMap() { + const std::vector<std::string>& ruleNames = getRuleNames(); + if (ruleNames.empty()) { + throw "The current recognizer does not provide a list of rule names."; + } + + UniqueLock<Mutex> lck(_mutex); + std::map<std::string, size_t> result; + auto iterator = _ruleIndexMapCache.find(ruleNames); + if (iterator != _ruleIndexMapCache.end()) { + result = iterator->second; + } else { + result = antlrcpp::toMap(ruleNames); + _ruleIndexMapCache[ruleNames] = result; + } + return result; +} + +size_t Recognizer::getTokenType(std::string_view tokenName) { + const std::map<std::string_view, size_t> &map = getTokenTypeMap(); + auto iterator = map.find(tokenName); + if (iterator == map.end()) + return Token::INVALID_TYPE; + + return iterator->second; +} + +void Recognizer::setInterpreter(atn::ATNSimulator *interpreter) { + // Usually the interpreter is set by the descendant (lexer or parser (simulator), but can also be exchanged + // by the profiling ATN simulator. + delete _interpreter; + _interpreter = interpreter; +} + +std::string Recognizer::getErrorHeader(RecognitionException *e) { + // We're having issues with cross header dependencies, these two classes will need to be + // rewritten to remove that. + size_t line = e->getOffendingToken()->getLine(); + size_t charPositionInLine = e->getOffendingToken()->getCharPositionInLine(); + return std::string("line ") + std::to_string(line) + ":" + std::to_string(charPositionInLine); + +} + +std::string Recognizer::getTokenErrorDisplay(Token *t) { + if (t == nullptr) { + return "<no Token>"; + } + std::string s = t->getText(); + if (s == "") { + if (t->getType() == EOF) { + s = "<EOF>"; + } else { + s = std::string("<") + std::to_string(t->getType()) + std::string(">"); + } + } + + std::string result; + result.reserve(s.size() + 2); + result.push_back('\''); + antlrcpp::escapeWhitespace(result, s); + result.push_back('\''); + result.shrink_to_fit(); + return result; +} + +void Recognizer::addErrorListener(ANTLRErrorListener *listener) { + _proxListener.addErrorListener(listener); +} + +void Recognizer::removeErrorListener(ANTLRErrorListener *listener) { + _proxListener.removeErrorListener(listener); +} + +void Recognizer::removeErrorListeners() { + _proxListener.removeErrorListeners(); +} + +ProxyErrorListener& Recognizer::getErrorListenerDispatch() { + return _proxListener; +} + +bool Recognizer::sempred(RuleContext * /*localctx*/, size_t /*ruleIndex*/, size_t /*actionIndex*/) { + return true; +} + +bool Recognizer::precpred(RuleContext * /*localctx*/, int /*precedence*/) { + return true; +} + +void Recognizer::action(RuleContext * /*localctx*/, size_t /*ruleIndex*/, size_t /*actionIndex*/) { +} + +void Recognizer::InitializeInstanceFields() { + _stateNumber = ATNState::INVALID_STATE_NUMBER; + _interpreter = nullptr; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/Recognizer.h b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.h new file mode 100644 index 0000000000..0226a612e1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Recognizer.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ProxyErrorListener.h" +#include "support/Casts.h" +#include "atn/SerializedATNView.h" +#include "internal/Synchronization.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC Recognizer { + public: + static constexpr size_t EOF = std::numeric_limits<size_t>::max(); + + Recognizer(); + Recognizer(Recognizer const&) = delete; + virtual ~Recognizer(); + + Recognizer& operator=(Recognizer const&) = delete; + + virtual std::vector<std::string> const& getRuleNames() const = 0; + + /** + * Get the vocabulary used by the recognizer. + * + * @return A {@link Vocabulary} instance providing information about the + * vocabulary used by the grammar. + */ + virtual dfa::Vocabulary const& getVocabulary() const = 0; + + /// <summary> + /// Get a map from token names to token types. + /// <p/> + /// Used for XPath and tree pattern compilation. + /// </summary> + virtual std::map<std::string_view, size_t> getTokenTypeMap(); + + /// <summary> + /// Get a map from rule names to rule indexes. + /// <p/> + /// Used for XPath and tree pattern compilation. + /// </summary> + virtual std::map<std::string, size_t> getRuleIndexMap(); + + virtual size_t getTokenType(std::string_view tokenName); + + /// <summary> + /// If this recognizer was generated, it will have a serialized ATN + /// representation of the grammar. + /// <p/> + /// For interpreters, we don't know their serialized ATN despite having + /// created the interpreter from it. + /// </summary> + virtual atn::SerializedATNView getSerializedATN() const { + throw "there is no serialized ATN"; + } + + /// <summary> + /// For debugging and other purposes, might want the grammar name. + /// Have ANTLR generate an implementation for this method. + /// </summary> + virtual std::string getGrammarFileName() const = 0; + + /// Get the ATN interpreter (in fact one of it's descendants) used by the recognizer for prediction. + /// @returns The ATN interpreter used by the recognizer for prediction. + template <class T> + T* getInterpreter() const { + return antlrcpp::downCast<T *>(_interpreter); + } + + /** + * Set the ATN interpreter used by the recognizer for prediction. + * + * @param interpreter The ATN interpreter used by the recognizer for + * prediction. + */ + void setInterpreter(atn::ATNSimulator *interpreter); + + /// What is the error header, normally line/character position information? + virtual std::string getErrorHeader(RecognitionException *e); + + /** How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.toString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + * + * @deprecated This method is not called by the ANTLR 4 Runtime. Specific + * implementations of {@link ANTLRErrorStrategy} may provide a similar + * feature when necessary. For example, see + * {@link DefaultErrorStrategy#getTokenErrorDisplay}. + */ + virtual std::string getTokenErrorDisplay(Token *t); + + /// <exception cref="NullPointerException"> if {@code listener} is {@code null}. </exception> + virtual void addErrorListener(ANTLRErrorListener *listener); + + virtual void removeErrorListener(ANTLRErrorListener *listener); + + virtual void removeErrorListeners(); + + virtual ProxyErrorListener& getErrorListenerDispatch(); + + // subclass needs to override these if there are sempreds or actions + // that the ATN interp needs to execute + virtual bool sempred(RuleContext *localctx, size_t ruleIndex, size_t actionIndex); + + virtual bool precpred(RuleContext *localctx, int precedence); + + virtual void action(RuleContext *localctx, size_t ruleIndex, size_t actionIndex); + + size_t getState() const { return _stateNumber; } + + // Get the ATN used by the recognizer for prediction. + virtual const atn::ATN& getATN() const = 0; + + /// <summary> + /// Indicate that the recognizer has changed internal state that is + /// consistent with the ATN state passed in. This way we always know + /// where we are in the ATN as the parser goes along. The rule + /// context objects form a stack that lets us see the stack of + /// invoking rules. Combine this and we have complete ATN + /// configuration information. + /// </summary> + void setState(size_t atnState) { _stateNumber = atnState; } + + virtual IntStream* getInputStream() = 0; + + virtual void setInputStream(IntStream *input) = 0; + + virtual TokenFactory<CommonToken>* getTokenFactory() = 0; + + template<typename T1> + void setTokenFactory(TokenFactory<T1> *input); + + protected: + atn::ATNSimulator *_interpreter; // Set and deleted in descendants (or the profiler). + + // Mutex to manage synchronized access for multithreading. + internal::Mutex _mutex; + + private: + static std::map<const dfa::Vocabulary*, std::map<std::string_view, size_t>> _tokenTypeMapCache; + static std::map<std::vector<std::string>, std::map<std::string, size_t>> _ruleIndexMapCache; + + ProxyErrorListener _proxListener; // Manages a collection of listeners. + + size_t _stateNumber; + + void InitializeInstanceFields(); + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.cpp new file mode 100644 index 0000000000..6d67f9a29a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.cpp @@ -0,0 +1,144 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/Trees.h" +#include "misc/Interval.h" +#include "Parser.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "tree/ParseTreeVisitor.h" + +#include "RuleContext.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::tree; + +RuleContext::RuleContext() : ParseTree(ParseTreeType::RULE) { + InitializeInstanceFields(); +} + +RuleContext::RuleContext(RuleContext *parent_, size_t invokingState_) : ParseTree(ParseTreeType::RULE) { + InitializeInstanceFields(); + this->parent = parent_; + this->invokingState = invokingState_; +} + +int RuleContext::depth() { + int n = 1; + RuleContext *p = this; + while (true) { + if (p->parent == nullptr) + break; + p = static_cast<RuleContext *>(p->parent); + n++; + } + return n; +} + +bool RuleContext::isEmpty() { + return invokingState == ATNState::INVALID_STATE_NUMBER; +} + +misc::Interval RuleContext::getSourceInterval() { + return misc::Interval::INVALID; +} + +std::string RuleContext::getText() { + if (children.empty()) { + return ""; + } + + std::stringstream ss; + for (size_t i = 0; i < children.size(); i++) { + ParseTree *tree = children[i]; + if (tree != nullptr) + ss << tree->getText(); + } + + return ss.str(); +} + +size_t RuleContext::getRuleIndex() const { + return INVALID_INDEX; +} + +size_t RuleContext::getAltNumber() const { + return atn::ATN::INVALID_ALT_NUMBER; +} + +void RuleContext::setAltNumber(size_t /*altNumber*/) { +} + +std::any RuleContext::accept(tree::ParseTreeVisitor *visitor) { + return visitor->visitChildren(this); +} + +std::string RuleContext::toStringTree(Parser *recog, bool pretty) { + return tree::Trees::toStringTree(this, recog, pretty); +} + +std::string RuleContext::toStringTree(std::vector<std::string> &ruleNames, bool pretty) { + return tree::Trees::toStringTree(this, ruleNames, pretty); +} + +std::string RuleContext::toStringTree(bool pretty) { + return toStringTree(nullptr, pretty); +} + + +std::string RuleContext::toString(const std::vector<std::string> &ruleNames) { + return toString(ruleNames, nullptr); +} + + +std::string RuleContext::toString(const std::vector<std::string> &ruleNames, RuleContext *stop) { + std::stringstream ss; + + RuleContext *currentParent = this; + ss << "["; + while (currentParent != stop) { + if (ruleNames.empty()) { + if (!currentParent->isEmpty()) { + ss << currentParent->invokingState; + } + } else { + size_t ruleIndex = currentParent->getRuleIndex(); + + std::string ruleName = (ruleIndex < ruleNames.size()) ? ruleNames[ruleIndex] : std::to_string(ruleIndex); + ss << ruleName; + } + + if (currentParent->parent == nullptr) // No parent anymore. + break; + currentParent = static_cast<RuleContext *>(currentParent->parent); + if (!ruleNames.empty() || !currentParent->isEmpty()) { + ss << " "; + } + } + + ss << "]"; + + return ss.str(); +} + +std::string RuleContext::toString() { + return toString(nullptr); +} + +std::string RuleContext::toString(Recognizer *recog) { + return toString(recog, &ParserRuleContext::EMPTY); +} + +std::string RuleContext::toString(Recognizer *recog, RuleContext *stop) { + if (recog == nullptr) + return toString(std::vector<std::string>(), stop); // Don't use an initializer {} here or we end up calling ourselve recursivly. + return toString(recog->getRuleNames(), stop); +} + +void RuleContext::InitializeInstanceFields() { + invokingState = INVALID_INDEX; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContext.h b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.h new file mode 100644 index 0000000000..a0effa2a02 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContext.h @@ -0,0 +1,141 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" + +namespace antlr4 { + + /** A rule context is a record of a single rule invocation. + * + * We form a stack of these context objects using the parent + * pointer. A parent pointer of null indicates that the current + * context is the bottom of the stack. The ParserRuleContext subclass + * as a children list so that we can turn this data structure into a + * tree. + * + * The root node always has a null pointer and invokingState of -1. + * + * Upon entry to parsing, the first invoked rule function creates a + * context object (asubclass specialized for that rule such as + * SContext) and makes it the root of a parse tree, recorded by field + * Parser._ctx. + * + * public final SContext s() throws RecognitionException { + * SContext _localctx = new SContext(_ctx, getState()); <-- create new node + * enterRule(_localctx, 0, RULE_s); <-- push it + * ... + * exitRule(); <-- pop back to _localctx + * return _localctx; + * } + * + * A subsequent rule invocation of r from the start rule s pushes a + * new context object for r whose parent points at s and use invoking + * state is the state with r emanating as edge label. + * + * The invokingState fields from a context object to the root + * together form a stack of rule indication states where the root + * (bottom of the stack) has a -1 sentinel value. If we invoke start + * symbol s then call r1, which calls r2, the would look like + * this: + * + * SContext[-1] <- root node (bottom of the stack) + * R1Context[p] <- p in rule s called r1 + * R2Context[q] <- q in rule r1 called r2 + * + * So the top of the stack, _ctx, represents a call to the current + * rule and it holds the return address from another rule that invoke + * to this rule. To invoke a rule, we must always have a current context. + * + * The parent contexts are useful for computing lookahead sets and + * getting error information. + * + * These objects are used during parsing and prediction. + * For the special case of parsers, we use the subclass + * ParserRuleContext. + * + * @see ParserRuleContext + */ + class ANTLR4CPP_PUBLIC RuleContext : public tree::ParseTree { + public: + static bool is(const tree::ParseTree &parseTree) { return parseTree.getTreeType() == tree::ParseTreeType::RULE; } + + static bool is(const tree::ParseTree *parseTree) { return parseTree != nullptr && is(*parseTree); } + + /// What state invoked the rule associated with this context? + /// The "return address" is the followState of invokingState + /// If parent is null, this should be -1 and this context object represents the start rule. + size_t invokingState; + + RuleContext(); + RuleContext(RuleContext *parent, size_t invokingState); + + virtual int depth(); + + /// A context is empty if there is no invoking state; meaning nobody called current context. + virtual bool isEmpty(); + + // satisfy the ParseTree / SyntaxTree interface + + virtual misc::Interval getSourceInterval() override; + + virtual std::string getText() override; + + virtual size_t getRuleIndex() const; + + /** For rule associated with this parse tree internal node, return + * the outer alternative number used to match the input. Default + * implementation does not compute nor store this alt num. Create + * a subclass of ParserRuleContext with backing field and set + * option contextSuperClass. + * to set it. + * + * @since 4.5.3 + */ + virtual size_t getAltNumber() const; + + /** Set the outer alternative number for this context node. Default + * implementation does nothing to avoid backing field overhead for + * trees that don't need it. Create + * a subclass of ParserRuleContext with backing field and set + * option contextSuperClass. + * + * @since 4.5.3 + */ + virtual void setAltNumber(size_t altNumber); + + virtual std::any accept(tree::ParseTreeVisitor *visitor) override; + + /// <summary> + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// We have to know the recognizer so we can get rule names. + /// </summary> + virtual std::string toStringTree(Parser *recog, bool pretty = false) override; + + /// <summary> + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// </summary> + virtual std::string toStringTree(std::vector<std::string> &ruleNames, bool pretty = false); + + virtual std::string toStringTree(bool pretty = false) override; + virtual std::string toString() override; + std::string toString(Recognizer *recog); + std::string toString(const std::vector<std::string> &ruleNames); + + // recog null unless ParserRuleContext, in which case we use subclass toString(...) + std::string toString(Recognizer *recog, RuleContext *stop); + + virtual std::string toString(const std::vector<std::string> &ruleNames, RuleContext *stop); + + bool operator == (const RuleContext &other) { return this == &other; } // Simple address comparison. + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.cpp b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.cpp new file mode 100644 index 0000000000..250859fdc0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.cpp @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" + +#include "RuleContextWithAltNum.h" + +using namespace antlr4; +using namespace antlr4::atn; + +RuleContextWithAltNum::RuleContextWithAltNum() : ParserRuleContext() { + altNum = ATN::INVALID_ALT_NUMBER; +} + +RuleContextWithAltNum::RuleContextWithAltNum(ParserRuleContext *parent, int invokingStateNumber) + : ParserRuleContext(parent, invokingStateNumber) { +} + +size_t RuleContextWithAltNum::getAltNumber() const { + return altNum; +} + +void RuleContextWithAltNum::setAltNumber(size_t number) { + altNum = number; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.h b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.h new file mode 100644 index 0000000000..995d9aa7b1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuleContextWithAltNum.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ParserRuleContext.h" + +namespace antlr4 { + + /// A handy class for use with + /// + /// options {contextSuperClass=org.antlr.v4.runtime.RuleContextWithAltNum;} + /// + /// that provides a backing field / impl for the outer alternative number + /// matched for an internal parse tree node. + /// + /// I'm only putting into Java runtime as I'm certain I'm the only one that + /// will really every use this. + class ANTLR4CPP_PUBLIC RuleContextWithAltNum : public ParserRuleContext { + public: + size_t altNum = 0; + + RuleContextWithAltNum(); + RuleContextWithAltNum(ParserRuleContext *parent, int invokingStateNumber); + + virtual size_t getAltNumber() const override; + virtual void setAltNumber(size_t altNum) override; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.cpp b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.cpp new file mode 100644 index 0000000000..cf30d68587 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.cpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "RuntimeMetaData.h" +#include "Version.h" + +using namespace antlr4; + +const std::string RuntimeMetaData::VERSION = ANTLRCPP_VERSION_STRING; + +std::string RuntimeMetaData::getRuntimeVersion() { + return VERSION; +} + +void RuntimeMetaData::checkVersion(const std::string &generatingToolVersion, const std::string &compileTimeVersion) { + std::string runtimeVersion = VERSION; + bool runtimeConflictsWithGeneratingTool = false; + bool runtimeConflictsWithCompileTimeTool = false; + + if (generatingToolVersion != "") { + runtimeConflictsWithGeneratingTool = runtimeVersion != generatingToolVersion + && getMajorMinorVersion(runtimeVersion) != getMajorMinorVersion(generatingToolVersion); + } + + runtimeConflictsWithCompileTimeTool = runtimeVersion != compileTimeVersion + && getMajorMinorVersion(runtimeVersion) != getMajorMinorVersion(compileTimeVersion); + + if (runtimeConflictsWithGeneratingTool) { + std::cerr << "ANTLR Tool version " << generatingToolVersion << " used for code generation does not match " + "the current runtime version " << runtimeVersion << std::endl; + } + if (runtimeConflictsWithCompileTimeTool) { + std::cerr << "ANTLR Runtime version " << compileTimeVersion << " used for parser compilation does not match " + "the current runtime version " << runtimeVersion << std::endl; + } +} + +std::string RuntimeMetaData::getMajorMinorVersion(const std::string &version) { + size_t firstDot = version.find('.'); + size_t secondDot = firstDot != std::string::npos ? version.find('.', firstDot + 1) : std::string::npos; + size_t firstDash = version.find('-'); + size_t referenceLength = version.size(); + if (secondDot != std::string::npos) { + referenceLength = std::min(referenceLength, secondDot); + } + + if (firstDash != std::string::npos) { + referenceLength = std::min(referenceLength, firstDash); + } + + return version.substr(0, referenceLength); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.h b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.h new file mode 100644 index 0000000000..f178cfe9e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/RuntimeMetaData.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// <summary> + /// This class provides access to the current version of the ANTLR 4 runtime + /// library as compile-time and runtime constants, along with methods for + /// checking for matching version numbers and notifying listeners in the case + /// where a version mismatch is detected. + /// + /// <para> + /// The runtime version information is provided by <seealso cref="#VERSION"/> and + /// <seealso cref="#getRuntimeVersion()"/>. Detailed information about these values is + /// provided in the documentation for each member.</para> + /// + /// <para> + /// The runtime version check is implemented by <seealso cref="#checkVersion"/>. Detailed + /// information about incorporating this call into user code, as well as its use + /// in generated code, is provided in the documentation for the method.</para> + /// + /// <para> + /// Version strings x.y and x.y.z are considered "compatible" and no error + /// would be generated. Likewise, version strings x.y-SNAPSHOT and x.y.z are + /// considered "compatible" because the major and minor components x.y + /// are the same in each.</para> + /// + /// <para> + /// To trap any error messages issued by this code, use System.setErr() + /// in your main() startup code. + /// </para> + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC RuntimeMetaData { + public: + /// A compile-time constant containing the current version of the ANTLR 4 + /// runtime library. + /// + /// <para> + /// This compile-time constant value allows generated parsers and other + /// libraries to include a literal reference to the version of the ANTLR 4 + /// runtime library the code was compiled against. At each release, we + /// change this value.</para> + /// + /// <para>Version numbers are assumed to have the form + /// + /// <em>major</em>.<em>minor</em>.<em>patch</em>.<em>revision</em>-<em>suffix</em>, + /// + /// with the individual components defined as follows.</para> + /// + /// <ul> + /// <li><em>major</em> is a required non-negative integer, and is equal to + /// {@code 4} for ANTLR 4.</li> + /// <li><em>minor</em> is a required non-negative integer.</li> + /// <li><em>patch</em> is an optional non-negative integer. When + /// <em>patch</em> is omitted, the {@code .} (dot) appearing before it is + /// also omitted.</li> + /// <li><em>revision</em> is an optional non-negative integer, and may only + /// be included when <em>patch</em> is also included. When <em>revision</em> + /// is omitted, the {@code .} (dot) appearing before it is also omitted.</li> + /// <li><em>suffix</em> is an optional string. When <em>suffix</em> is + /// omitted, the {@code -} (hyphen-minus) appearing before it is also + /// omitted.</li> + /// </ul> + static const std::string VERSION; + + /// <summary> + /// Gets the currently executing version of the ANTLR 4 runtime library. + /// + /// <para> + /// This method provides runtime access to the <seealso cref="#VERSION"/> field, as + /// opposed to directly referencing the field as a compile-time constant.</para> + /// </summary> + /// <returns> The currently executing version of the ANTLR 4 library </returns> + + static std::string getRuntimeVersion(); + + /// <summary> + /// This method provides the ability to detect mismatches between the version + /// of ANTLR 4 used to generate a parser, the version of the ANTLR runtime a + /// parser was compiled against, and the version of the ANTLR runtime which + /// is currently executing. + /// + /// <para> + /// The version check is designed to detect the following two specific + /// scenarios.</para> + /// + /// <ul> + /// <li>The ANTLR Tool version used for code generation does not match the + /// currently executing runtime version.</li> + /// <li>The ANTLR Runtime version referenced at the time a parser was + /// compiled does not match the currently executing runtime version.</li> + /// </ul> + /// + /// <para> + /// Starting with ANTLR 4.3, the code generator emits a call to this method + /// using two constants in each generated lexer and parser: a hard-coded + /// constant indicating the version of the tool used to generate the parser + /// and a reference to the compile-time constant <seealso cref="#VERSION"/>. At + /// runtime, this method is called during the initialization of the generated + /// parser to detect mismatched versions, and notify the registered listeners + /// prior to creating instances of the parser.</para> + /// + /// <para> + /// This method does not perform any detection or filtering of semantic + /// changes between tool and runtime versions. It simply checks for a + /// version match and emits an error to stderr if a difference + /// is detected.</para> + /// + /// <para> + /// Note that some breaking changes between releases could result in other + /// types of runtime exceptions, such as a <seealso cref="LinkageError"/>, prior to + /// calling this method. In these cases, the underlying version mismatch will + /// not be reported here. This method is primarily intended to + /// notify users of potential semantic changes between releases that do not + /// result in binary compatibility problems which would be detected by the + /// class loader. As with semantic changes, changes that break binary + /// compatibility between releases are mentioned in the release notes + /// accompanying the affected release.</para> + /// + /// <para> + /// <strong>Additional note for target developers:</strong> The version check + /// implemented by this class is designed to address specific compatibility + /// concerns that may arise during the execution of Java applications. Other + /// targets should consider the implementation of this method in the context + /// of that target's known execution environment, which may or may not + /// resemble the design provided for the Java target.</para> + /// </summary> + /// <param name="generatingToolVersion"> The version of the tool used to generate a parser. + /// This value may be null when called from user code that was not generated + /// by, and does not reference, the ANTLR 4 Tool itself. </param> + /// <param name="compileTimeVersion"> The version of the runtime the parser was + /// compiled against. This should always be passed using a direct reference + /// to <seealso cref="#VERSION"/>. </param> + static void checkVersion(const std::string &generatingToolVersion, const std::string &compileTimeVersion); + + /// <summary> + /// Gets the major and minor version numbers from a version string. For + /// details about the syntax of the input {@code version}. + /// E.g., from x.y.z return x.y. + /// </summary> + /// <param name="version"> The complete version string. </param> + /// <returns> A string of the form <em>major</em>.<em>minor</em> containing + /// only the major and minor components of the version string. </returns> + static std::string getMajorMinorVersion(const std::string &version); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Token.cpp b/contrib/libs/antlr4_cpp_runtime/src/Token.cpp new file mode 100644 index 0000000000..31266b42d1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Token.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" + +antlr4::Token::~Token() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Token.h b/contrib/libs/antlr4_cpp_runtime/src/Token.h new file mode 100644 index 0000000000..832db740b3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Token.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" + +namespace antlr4 { + + /// A token has properties: text, type, line, character position in the line + /// (so we can ignore tabs), token channel, index, and source from which + /// we obtained this token. + class ANTLR4CPP_PUBLIC Token { + public: + static constexpr size_t INVALID_TYPE = 0; + + /// During lookahead operations, this "token" signifies we hit rule end ATN state + /// and did not follow it despite needing to. + static constexpr size_t EPSILON = std::numeric_limits<size_t>::max() - 1; + static constexpr size_t MIN_USER_TOKEN_TYPE = 1; + static constexpr size_t EOF = IntStream::EOF; + + virtual ~Token(); + + /// All tokens go to the parser (unless skip() is called in that rule) + /// on a particular "channel". The parser tunes to a particular channel + /// so that whitespace etc... can go to the parser on a "hidden" channel. + static constexpr size_t DEFAULT_CHANNEL = 0; + + /// Anything on different channel than DEFAULT_CHANNEL is not parsed + /// by parser. + static constexpr size_t HIDDEN_CHANNEL = 1; + + /** + * This is the minimum constant value which can be assigned to a + * user-defined token channel. + * + * <p> + * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are + * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and + * {@link #HIDDEN_CHANNEL}.</p> + * + * @see Token#getChannel() + */ + static constexpr size_t MIN_USER_CHANNEL_VALUE = 2; + + /// Get the text of the token. + virtual std::string getText() const = 0; + + /// Get the token type of the token + virtual size_t getType() const = 0; + + /// The line number on which the 1st character of this token was matched, line=1..n + virtual size_t getLine() const = 0; + + /// The index of the first character of this token relative to the + /// beginning of the line at which it occurs, 0..n-1 + virtual size_t getCharPositionInLine() const = 0; + + /// Return the channel this token. Each token can arrive at the parser + /// on a different channel, but the parser only "tunes" to a single channel. + /// The parser ignores everything not on DEFAULT_CHANNEL. + virtual size_t getChannel() const = 0; + + /// An index from 0..n-1 of the token object in the input stream. + /// This must be valid in order to print token streams and + /// use TokenRewriteStream. + /// + /// Return INVALID_INDEX to indicate that this token was conjured up since + /// it doesn't have a valid index. + virtual size_t getTokenIndex() const = 0; + + /// The starting character index of the token + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStartIndex() const = 0; + + /// The last character index of the token. + /// This method is optional; return INVALID_INDEX if not implemented. + virtual size_t getStopIndex() const = 0; + + /// Gets the <seealso cref="TokenSource"/> which created this token. + virtual TokenSource *getTokenSource() const = 0; + + /// Gets the <seealso cref="CharStream"/> from which this token was derived. + virtual CharStream *getInputStream() const = 0; + + virtual std::string toString() const = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenFactory.h b/contrib/libs/antlr4_cpp_runtime/src/TokenFactory.h new file mode 100644 index 0000000000..4eef044329 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenFactory.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /// The default mechanism for creating tokens. It's used by default in Lexer and + /// the error handling strategy (to create missing tokens). Notifying the parser + /// of a new factory means that it notifies it's token source and error strategy. + template<typename Symbol> + class ANTLR4CPP_PUBLIC TokenFactory { + public: + virtual ~TokenFactory() {} + + /// This is the method used to create tokens in the lexer and in the + /// error handling strategy. If text!=null, than the start and stop positions + /// are wiped to -1 in the text override is set in the CommonToken. + virtual std::unique_ptr<Symbol> create(std::pair<TokenSource *, CharStream *> source, size_t type, const std::string &text, + size_t channel, size_t start, size_t stop, size_t line, size_t charPositionInLine) = 0; + + /// Generically useful + virtual std::unique_ptr<Symbol> create(size_t type, const std::string &text) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenSource.cpp b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.cpp new file mode 100644 index 0000000000..6b9d7af2f7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "TokenSource.h" + +antlr4::TokenSource::~TokenSource() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenSource.h b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.h new file mode 100644 index 0000000000..f05c27efac --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenSource.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenFactory.h" + +namespace antlr4 { + + /// <summary> + /// A source of tokens must provide a sequence of tokens via <seealso cref="#nextToken()"/> + /// and also must reveal it's source of characters; <seealso cref="CommonToken"/>'s text is + /// computed from a <seealso cref="CharStream"/>; it only store indices into the char + /// stream. + /// <p/> + /// Errors from the lexer are never passed to the parser. Either you want to keep + /// going or you do not upon token recognition error. If you do not want to + /// continue lexing then you do not want to continue parsing. Just throw an + /// exception not under <seealso cref="RecognitionException"/> and Java will naturally toss + /// you all the way out of the recognizers. If you want to continue lexing then + /// you should not throw an exception to the parser--it has already requested a + /// token. Keep lexing until you get a valid one. Just report errors and keep + /// going, looking for a valid token. + /// </summary> + class ANTLR4CPP_PUBLIC TokenSource { + public: + virtual ~TokenSource(); + + /// Return a <seealso cref="Token"/> object from your input stream (usually a + /// <seealso cref="CharStream"/>). Do not fail/return upon lexing error; keep chewing + /// on the characters until you get a good one; errors are not passed through + /// to the parser. + virtual std::unique_ptr<Token> nextToken() = 0; + + /// <summary> + /// Get the line number for the current position in the input stream. The + /// first line in the input is line 1. + /// </summary> + /// <returns> The line number for the current position in the input stream, or + /// 0 if the current token source does not track line numbers. </returns> + virtual size_t getLine() const = 0; + + /// <summary> + /// Get the index into the current line for the current position in the input + /// stream. The first character on a line has position 0. + /// </summary> + /// <returns> The line number for the current position in the input stream, or + /// (sze_t)-1 if the current token source does not track character positions. </returns> + virtual size_t getCharPositionInLine() = 0; + + /// <summary> + /// Get the <seealso cref="CharStream"/> from which this token source is currently + /// providing tokens. + /// </summary> + /// <returns> The <seealso cref="CharStream"/> associated with the current position in + /// the input, or {@code null} if no input stream is available for the token + /// source. </returns> + virtual CharStream* getInputStream() = 0; + + /// <summary> + /// Gets the name of the underlying input source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns <seealso cref="IntStream#UNKNOWN_SOURCE_NAME"/>. + /// </summary> + virtual std::string getSourceName() = 0; + + /// <summary> + /// Set the <seealso cref="TokenFactory"/> this token source should use for creating + /// <seealso cref="Token"/> objects from the input. + /// </summary> + /// <param name="factory"> The <seealso cref="TokenFactory"/> to use for creating tokens. </param> + template<typename T1> + void setTokenFactory(TokenFactory<T1> * /*factory*/) {} + + /// <summary> + /// Gets the <seealso cref="TokenFactory"/> this token source is currently using for + /// creating <seealso cref="Token"/> objects from the input. + /// </summary> + /// <returns> The <seealso cref="TokenFactory"/> currently used by this token source. </returns> + virtual TokenFactory<CommonToken>* getTokenFactory() = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.cpp new file mode 100644 index 0000000000..fbb1ab788a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.cpp @@ -0,0 +1,11 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "TokenStream.h" + +using namespace antlr4; + +TokenStream::~TokenStream() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.h new file mode 100644 index 0000000000..15b4f367a6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStream.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "IntStream.h" + +namespace antlr4 { + + /// <summary> + /// An <seealso cref="IntStream"/> whose symbols are <seealso cref="Token"/> instances. + /// </summary> + class ANTLR4CPP_PUBLIC TokenStream : public IntStream { + /// <summary> + /// Get the <seealso cref="Token"/> instance associated with the value returned by + /// <seealso cref="#LA LA(k)"/>. This method has the same pre- and post-conditions as + /// <seealso cref="IntStream#LA"/>. In addition, when the preconditions of this method + /// are met, the return value is non-null and the value of + /// {@code LT(k).getType()==LA(k)}. + /// </summary> + /// <seealso cref= IntStream#LA </seealso> + public: + virtual ~TokenStream(); + + virtual Token* LT(ssize_t k) = 0; + + /// <summary> + /// Gets the <seealso cref="Token"/> at the specified {@code index} in the stream. When + /// the preconditions of this method are met, the return value is non-null. + /// <p/> + /// The preconditions for this method are the same as the preconditions of + /// <seealso cref="IntStream#seek"/>. If the behavior of {@code seek(index)} is + /// unspecified for the current state and given {@code index}, then the + /// behavior of this method is also unspecified. + /// <p/> + /// The symbol referred to by {@code index} differs from {@code seek()} only + /// in the case of filtering streams where {@code index} lies before the end + /// of the stream. Unlike {@code seek()}, this method does not adjust + /// {@code index} to point to a non-ignored symbol. + /// </summary> + /// <exception cref="IllegalArgumentException"> if {code index} is less than 0 </exception> + /// <exception cref="UnsupportedOperationException"> if the stream does not support + /// retrieving the token at the specified index </exception> + virtual Token* get(size_t index) const = 0; + + /// Gets the underlying TokenSource which provides tokens for this stream. + virtual TokenSource* getTokenSource() const = 0; + + /// <summary> + /// Return the text of all tokens within the specified {@code interval}. This + /// method behaves like the following code (including potential exceptions + /// for violating preconditions of <seealso cref="#get"/>, but may be optimized by the + /// specific implementation. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = ""; + /// for (int i = interval.a; i <= interval.b; i++) { + /// text += stream.get(i).getText(); + /// } + /// </pre> + /// </summary> + /// <param name="interval"> The interval of tokens within this stream to get text + /// for. </param> + /// <returns> The text of all tokens within the specified interval in this + /// stream. + /// </returns> + /// <exception cref="NullPointerException"> if {@code interval} is {@code null} </exception> + virtual std::string getText(const misc::Interval &interval) = 0; + + /// <summary> + /// Return the text of all tokens in the stream. This method behaves like the + /// following code, including potential exceptions from the calls to + /// <seealso cref="IntStream#size"/> and <seealso cref="#getText(Interval)"/>, but may be + /// optimized by the specific implementation. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = stream.getText(new Interval(0, stream.size())); + /// </pre> + /// </summary> + /// <returns> The text of all tokens in the stream. </returns> + virtual std::string getText() = 0; + + /// <summary> + /// Return the text of all tokens in the source interval of the specified + /// context. This method behaves like the following code, including potential + /// exceptions from the call to <seealso cref="#getText(Interval)"/>, but may be + /// optimized by the specific implementation. + /// </p> + /// If {@code ctx.getSourceInterval()} does not return a valid interval of + /// tokens provided by this stream, the behavior is unspecified. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = stream.getText(ctx.getSourceInterval()); + /// </pre> + /// </summary> + /// <param name="ctx"> The context providing the source interval of tokens to get + /// text for. </param> + /// <returns> The text of all tokens within the source interval of {@code ctx}. </returns> + virtual std::string getText(RuleContext *ctx) = 0; + + /// <summary> + /// Return the text of all tokens in this stream between {@code start} and + /// {@code stop} (inclusive). + /// <p/> + /// If the specified {@code start} or {@code stop} token was not provided by + /// this stream, or if the {@code stop} occurred before the {@code start} + /// token, the behavior is unspecified. + /// <p/> + /// For streams which ensure that the <seealso cref="Token#getTokenIndex"/> method is + /// accurate for all of its provided tokens, this method behaves like the + /// following code. Other streams may implement this method in other ways + /// provided the behavior is consistent with this at a high level. + /// + /// <pre> + /// TokenStream stream = ...; + /// String text = ""; + /// for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) { + /// text += stream.get(i).getText(); + /// } + /// </pre> + /// </summary> + /// <param name="start"> The first token in the interval to get text for. </param> + /// <param name="stop"> The last token in the interval to get text for (inclusive). </param> + /// <returns> The text of all tokens lying between the specified {@code start} + /// and {@code stop} tokens. + /// </returns> + /// <exception cref="UnsupportedOperationException"> if this stream does not support + /// this method for the specified tokens </exception> + virtual std::string getText(Token *start, Token *stop) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.cpp b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.cpp new file mode 100644 index 0000000000..9050eb5c91 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.cpp @@ -0,0 +1,425 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "misc/Interval.h" +#include "Token.h" +#include "TokenStream.h" + +#include "TokenStreamRewriter.h" + +using namespace antlr4; + +using antlr4::misc::Interval; + +TokenStreamRewriter::RewriteOperation::RewriteOperation(TokenStreamRewriter *outerInstance_, size_t index_) + : outerInstance(outerInstance_) { + + InitializeInstanceFields(); + this->index = index_; +} + +TokenStreamRewriter::RewriteOperation::RewriteOperation(TokenStreamRewriter *outerInstance_, size_t index_, + const std::string& text_) : outerInstance(outerInstance_) { + + InitializeInstanceFields(); + this->index = index_; + this->text = text_; +} + +TokenStreamRewriter::RewriteOperation::~RewriteOperation() +{ +} + +size_t TokenStreamRewriter::RewriteOperation::execute(std::string * /*buf*/) { + return index; +} + +std::string TokenStreamRewriter::RewriteOperation::toString() { + std::string opName = "TokenStreamRewriter"; + size_t dollarIndex = opName.find('$'); + opName = opName.substr(dollarIndex + 1, opName.length() - (dollarIndex + 1)); + return "<" + opName + "@" + outerInstance->tokens->get(dollarIndex)->getText() + ":\"" + text + "\">"; +} + +void TokenStreamRewriter::RewriteOperation::InitializeInstanceFields() { + instructionIndex = 0; + index = 0; +} + +TokenStreamRewriter::InsertBeforeOp::InsertBeforeOp(TokenStreamRewriter *outerInstance_, size_t index_, const std::string& text_) +: RewriteOperation(outerInstance_, index_, text_), outerInstance(outerInstance_) { +} + +size_t TokenStreamRewriter::InsertBeforeOp::execute(std::string *buf) { + buf->append(text); + if (outerInstance->tokens->get(index)->getType() != Token::EOF) { + buf->append(outerInstance->tokens->get(index)->getText()); + } + return index + 1; +} + +TokenStreamRewriter::ReplaceOp::ReplaceOp(TokenStreamRewriter *outerInstance_, size_t from, size_t to, const std::string& text) +: RewriteOperation(outerInstance_, from, text), outerInstance(outerInstance_) { + + InitializeInstanceFields(); + lastIndex = to; +} + +size_t TokenStreamRewriter::ReplaceOp::execute(std::string *buf) { + buf->append(text); + return lastIndex + 1; +} + +std::string TokenStreamRewriter::ReplaceOp::toString() { + if (text.empty()) { + return "<DeleteOp@" + outerInstance->tokens->get(index)->getText() + ".." + outerInstance->tokens->get(lastIndex)->getText() + ">"; + } + return "<ReplaceOp@" + outerInstance->tokens->get(index)->getText() + ".." + outerInstance->tokens->get(lastIndex)->getText() + ":\"" + text + "\">"; +} + +void TokenStreamRewriter::ReplaceOp::InitializeInstanceFields() { + lastIndex = 0; +} + +//------------------ TokenStreamRewriter ------------------------------------------------------------------------------- + +const std::string TokenStreamRewriter::DEFAULT_PROGRAM_NAME = "default"; + +TokenStreamRewriter::TokenStreamRewriter(TokenStream *tokens_) : tokens(tokens_) { + _programs[DEFAULT_PROGRAM_NAME].reserve(PROGRAM_INIT_SIZE); +} + +TokenStreamRewriter::~TokenStreamRewriter() { + for (const auto &program : _programs) { + for (auto *operation : program.second) { + delete operation; + } + } +} + +TokenStream *TokenStreamRewriter::getTokenStream() { + return tokens; +} + +void TokenStreamRewriter::rollback(size_t instructionIndex) { + rollback(DEFAULT_PROGRAM_NAME, instructionIndex); +} + +void TokenStreamRewriter::rollback(const std::string &programName, size_t instructionIndex) { + std::vector<RewriteOperation*> is = _programs[programName]; + if (is.size() > 0) { + _programs.insert({ programName, std::vector<RewriteOperation*>(is.begin() + MIN_TOKEN_INDEX, is.begin() + instructionIndex) }); + } +} + +void TokenStreamRewriter::deleteProgram() { + deleteProgram(DEFAULT_PROGRAM_NAME); +} + +void TokenStreamRewriter::deleteProgram(const std::string &programName) { + rollback(programName, MIN_TOKEN_INDEX); +} + +void TokenStreamRewriter::insertAfter(Token *t, const std::string& text) { + insertAfter(DEFAULT_PROGRAM_NAME, t, text); +} + +void TokenStreamRewriter::insertAfter(size_t index, const std::string& text) { + insertAfter(DEFAULT_PROGRAM_NAME, index, text); +} + +void TokenStreamRewriter::insertAfter(const std::string &programName, Token *t, const std::string& text) { + insertAfter(programName, t->getTokenIndex(), text); +} + +void TokenStreamRewriter::insertAfter(const std::string &programName, size_t index, const std::string& text) { + // to insert after, just insert before next index (even if past end) + insertBefore(programName, index + 1, text); +} + +void TokenStreamRewriter::insertBefore(Token *t, const std::string& text) { + insertBefore(DEFAULT_PROGRAM_NAME, t, text); +} + +void TokenStreamRewriter::insertBefore(size_t index, const std::string& text) { + insertBefore(DEFAULT_PROGRAM_NAME, index, text); +} + +void TokenStreamRewriter::insertBefore(const std::string &programName, Token *t, const std::string& text) { + insertBefore(programName, t->getTokenIndex(), text); +} + +void TokenStreamRewriter::insertBefore(const std::string &programName, size_t index, const std::string& text) { + RewriteOperation *op = new InsertBeforeOp(this, index, text); /* mem-check: deleted in d-tor */ + std::vector<RewriteOperation*> &rewrites = getProgram(programName); + op->instructionIndex = rewrites.size(); + rewrites.push_back(op); +} + +void TokenStreamRewriter::replace(size_t index, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, index, index, text); +} + +void TokenStreamRewriter::replace(size_t from, size_t to, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, from, to, text); +} + +void TokenStreamRewriter::replace(Token *indexT, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); +} + +void TokenStreamRewriter::replace(Token *from, Token *to, const std::string& text) { + replace(DEFAULT_PROGRAM_NAME, from, to, text); +} + +void TokenStreamRewriter::replace(const std::string &programName, size_t from, size_t to, const std::string& text) { + if (from > to || to >= tokens->size()) { + throw IllegalArgumentException("replace: range invalid: " + std::to_string(from) + ".." + std::to_string(to) + + "(size = " + std::to_string(tokens->size()) + ")"); + } + RewriteOperation *op = new ReplaceOp(this, from, to, text); /* mem-check: deleted in d-tor */ + std::vector<RewriteOperation*> &rewrites = getProgram(programName); + op->instructionIndex = rewrites.size(); + rewrites.push_back(op); +} + +void TokenStreamRewriter::replace(const std::string &programName, Token *from, Token *to, const std::string& text) { + replace(programName, from->getTokenIndex(), to->getTokenIndex(), text); +} + +void TokenStreamRewriter::Delete(size_t index) { + Delete(DEFAULT_PROGRAM_NAME, index, index); +} + +void TokenStreamRewriter::Delete(size_t from, size_t to) { + Delete(DEFAULT_PROGRAM_NAME, from, to); +} + +void TokenStreamRewriter::Delete(Token *indexT) { + Delete(DEFAULT_PROGRAM_NAME, indexT, indexT); +} + +void TokenStreamRewriter::Delete(Token *from, Token *to) { + Delete(DEFAULT_PROGRAM_NAME, from, to); +} + +void TokenStreamRewriter::Delete(const std::string &programName, size_t from, size_t to) { + std::string nullString; + replace(programName, from, to, nullString); +} + +void TokenStreamRewriter::Delete(const std::string &programName, Token *from, Token *to) { + std::string nullString; + replace(programName, from, to, nullString); +} + +size_t TokenStreamRewriter::getLastRewriteTokenIndex() { + return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); +} + +size_t TokenStreamRewriter::getLastRewriteTokenIndex(const std::string &programName) { + if (_lastRewriteTokenIndexes.find(programName) == _lastRewriteTokenIndexes.end()) { + return INVALID_INDEX; + } + return _lastRewriteTokenIndexes[programName]; +} + +void TokenStreamRewriter::setLastRewriteTokenIndex(const std::string &programName, size_t i) { + _lastRewriteTokenIndexes.insert({ programName, i }); +} + +std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::getProgram(const std::string &name) { + auto iterator = _programs.find(name); + if (iterator == _programs.end()) { + return initializeProgram(name); + } + return iterator->second; +} + +std::vector<TokenStreamRewriter::RewriteOperation*>& TokenStreamRewriter::initializeProgram(const std::string &name) { + _programs[name].reserve(PROGRAM_INIT_SIZE); + return _programs[name]; +} + +std::string TokenStreamRewriter::getText() { + return getText(DEFAULT_PROGRAM_NAME, Interval(0UL, tokens->size() - 1)); +} + +std::string TokenStreamRewriter::getText(std::string programName) { + return getText(programName, Interval(0UL, tokens->size() - 1)); +} + +std::string TokenStreamRewriter::getText(const Interval &interval) { + return getText(DEFAULT_PROGRAM_NAME, interval); +} + +std::string TokenStreamRewriter::getText(const std::string &programName, const Interval &interval) { + std::vector<TokenStreamRewriter::RewriteOperation*> &rewrites = _programs[programName]; + size_t start = interval.a; + size_t stop = interval.b; + + // ensure start/end are in range + if (stop > tokens->size() - 1) { + stop = tokens->size() - 1; + } + if (start == INVALID_INDEX) { + start = 0; + } + + if (rewrites.empty() || rewrites.empty()) { + return tokens->getText(interval); // no instructions to execute + } + std::string buf; + + // First, optimize instruction stream + std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> indexToOp = reduceToSingleOperationPerIndex(rewrites); + + // Walk buffer, executing instructions and emitting tokens + size_t i = start; + while (i <= stop && i < tokens->size()) { + RewriteOperation *op = indexToOp[i]; + indexToOp.erase(i); // remove so any left have index size-1 + Token *t = tokens->get(i); + if (op == nullptr) { + // no operation at that index, just dump token + if (t->getType() != Token::EOF) { + buf.append(t->getText()); + } + i++; // move to next token + } + else { + i = op->execute(&buf); // execute operation and skip + } + } + + // include stuff after end if it's last index in buffer + // So, if they did an insertAfter(lastValidIndex, "foo"), include + // foo if end==lastValidIndex. + if (stop == tokens->size() - 1) { + // Scan any remaining operations after last token + // should be included (they will be inserts). + for (auto op : indexToOp) { + if (op.second->index >= tokens->size() - 1) { + buf.append(op.second->text); + } + } + } + return buf; +} + +std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> TokenStreamRewriter::reduceToSingleOperationPerIndex( + std::vector<TokenStreamRewriter::RewriteOperation*> &rewrites) { + + + // WALK REPLACES + for (size_t i = 0; i < rewrites.size(); ++i) { + TokenStreamRewriter::RewriteOperation *op = rewrites[i]; + ReplaceOp *rop = dynamic_cast<ReplaceOp *>(op); + if (rop == nullptr) + continue; + + // Wipe prior inserts within range + std::vector<InsertBeforeOp *> inserts = getKindOfOps<InsertBeforeOp>(rewrites, i); + for (auto *iop : inserts) { + if (iop->index == rop->index) { + // E.g., insert before 2, delete 2..2; update replace + // text to include insert before, kill insert + delete rewrites[iop->instructionIndex]; + rewrites[iop->instructionIndex] = nullptr; + rop->text = iop->text + (!rop->text.empty() ? rop->text : ""); + } + else if (iop->index > rop->index && iop->index <= rop->lastIndex) { + // delete insert as it's a no-op. + delete rewrites[iop->instructionIndex]; + rewrites[iop->instructionIndex] = nullptr; + } + } + // Drop any prior replaces contained within + std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i); + for (auto *prevRop : prevReplaces) { + if (prevRop->index >= rop->index && prevRop->lastIndex <= rop->lastIndex) { + // delete replace as it's a no-op. + delete rewrites[prevRop->instructionIndex]; + rewrites[prevRop->instructionIndex] = nullptr; + continue; + } + // throw exception unless disjoint or identical + bool disjoint = prevRop->lastIndex < rop->index || prevRop->index > rop->lastIndex; + // Delete special case of replace (text==null): + // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + if (prevRop->text.empty() && rop->text.empty() && !disjoint) { + delete rewrites[prevRop->instructionIndex]; + rewrites[prevRop->instructionIndex] = nullptr; // kill first delete + rop->index = std::min(prevRop->index, rop->index); + rop->lastIndex = std::max(prevRop->lastIndex, rop->lastIndex); + std::cout << "new rop " << rop << std::endl; + } + else if (!disjoint) { + throw IllegalArgumentException("replace op boundaries of " + rop->toString() + + " overlap with previous " + prevRop->toString()); + } + } + } + + // WALK INSERTS + for (size_t i = 0; i < rewrites.size(); i++) { + InsertBeforeOp *iop = dynamic_cast<InsertBeforeOp *>(rewrites[i]); + if (iop == nullptr) + continue; + + // combine current insert with prior if any at same index + + std::vector<InsertBeforeOp *> prevInserts = getKindOfOps<InsertBeforeOp>(rewrites, i); + for (auto *prevIop : prevInserts) { + if (prevIop->index == iop->index) { // combine objects + // convert to strings...we're in process of toString'ing + // whole token buffer so no lazy eval issue with any templates + iop->text = catOpText(&iop->text, &prevIop->text); + // delete redundant prior insert + delete rewrites[prevIop->instructionIndex]; + rewrites[prevIop->instructionIndex] = nullptr; + } + } + // look for replaces where iop.index is in range; error + std::vector<ReplaceOp*> prevReplaces = getKindOfOps<ReplaceOp>(rewrites, i); + for (auto *rop : prevReplaces) { + if (iop->index == rop->index) { + rop->text = catOpText(&iop->text, &rop->text); + delete rewrites[i]; + rewrites[i] = nullptr; // delete current insert + continue; + } + if (iop->index >= rop->index && iop->index <= rop->lastIndex) { + throw IllegalArgumentException("insert op " + iop->toString() + " within boundaries of previous " + rop->toString()); + } + } + } + + std::unordered_map<size_t, TokenStreamRewriter::RewriteOperation*> m; + for (TokenStreamRewriter::RewriteOperation *op : rewrites) { + if (op == nullptr) { // ignore deleted ops + continue; + } + if (m.count(op->index) > 0) { + throw RuntimeException("should only be one op per index"); + } + m[op->index] = op; + } + + return m; +} + +std::string TokenStreamRewriter::catOpText(std::string *a, std::string *b) { + std::string x = ""; + std::string y = ""; + if (a != nullptr) { + x = *a; + } + if (b != nullptr) { + y = *b; + } + return x + y; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h new file mode 100644 index 0000000000..929056a3f9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h @@ -0,0 +1,295 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { + + /** + * Useful for rewriting out a buffered input token stream after doing some + * augmentation or other manipulations on it. + * + * <p> + * You can insert stuff, replace, and delete chunks. Note that the operations + * are done lazily--only if you convert the buffer to a {@link String} with + * {@link TokenStream#getText()}. This is very efficient because you are not + * moving data around all the time. As the buffer of tokens is converted to + * strings, the {@link #getText()} method(s) scan the input token stream and + * check to see if there is an operation at the current index. If so, the + * operation is done and then normal {@link String} rendering continues on the + * buffer. This is like having multiple Turing machine instruction streams + * (programs) operating on a single input tape. :)</p> + * + * <p> + * This rewriter makes no modifications to the token stream. It does not ask the + * stream to fill itself up nor does it advance the input cursor. The token + * stream {@link TokenStream#index()} will return the same value before and + * after any {@link #getText()} call.</p> + * + * <p> + * The rewriter only works on tokens that you have in the buffer and ignores the + * current input cursor. If you are buffering tokens on-demand, calling + * {@link #getText()} halfway through the input will only do rewrites for those + * tokens in the first half of the file.</p> + * + * <p> + * Since the operations are done lazily at {@link #getText}-time, operations do + * not screw up the token index values. That is, an insert operation at token + * index {@code i} does not change the index values for tokens + * {@code i}+1..n-1.</p> + * + * <p> + * Because operations never actually alter the buffer, you may always get the + * original token stream back without undoing anything. Since the instructions + * are queued up, you can easily simulate transactions and roll back any changes + * if there is an error just by removing instructions. For example,</p> + * + * <pre> + * CharStream input = new ANTLRFileStream("input"); + * TLexer lex = new TLexer(input); + * CommonTokenStream tokens = new CommonTokenStream(lex); + * T parser = new T(tokens); + * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); + * parser.startRule(); + * </pre> + * + * <p> + * Then in the rules, you can execute (assuming rewriter is visible):</p> + * + * <pre> + * Token t,u; + * ... + * rewriter.insertAfter(t, "text to put after t");} + * rewriter.insertAfter(u, "text after u");} + * System.out.println(rewriter.getText()); + * </pre> + * + * <p> + * You can also have multiple "instruction streams" and get multiple rewrites + * from a single pass over the input. Just name the instruction streams and use + * that name again when printing the buffer. This could be useful for generating + * a C file and also its header file--all from the same buffer:</p> + * + * <pre> + * rewriter.insertAfter("pass1", t, "text to put after t");} + * rewriter.insertAfter("pass2", u, "text after u");} + * System.out.println(rewriter.getText("pass1")); + * System.out.println(rewriter.getText("pass2")); + * </pre> + * + * <p> + * If you don't use named rewrite streams, a "default" stream is used as the + * first example shows.</p> + */ + class ANTLR4CPP_PUBLIC TokenStreamRewriter { + public: + static const std::string DEFAULT_PROGRAM_NAME; + static constexpr size_t PROGRAM_INIT_SIZE = 100; + static constexpr size_t MIN_TOKEN_INDEX = 0; + + TokenStreamRewriter(TokenStream *tokens); + virtual ~TokenStreamRewriter(); + + TokenStream *getTokenStream(); + + virtual void rollback(size_t instructionIndex); + + /// Rollback the instruction stream for a program so that + /// the indicated instruction (via instructionIndex) is no + /// longer in the stream. UNTESTED! + virtual void rollback(const std::string &programName, size_t instructionIndex); + + virtual void deleteProgram(); + + /// Reset the program so that no instructions exist. + virtual void deleteProgram(const std::string &programName); + virtual void insertAfter(Token *t, const std::string& text); + virtual void insertAfter(size_t index, const std::string& text); + virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); + virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); + + virtual void insertBefore(Token *t, const std::string& text); + virtual void insertBefore(size_t index, const std::string& text); + virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); + virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); + + virtual void replace(size_t index, const std::string& text); + virtual void replace(size_t from, size_t to, const std::string& text); + virtual void replace(Token *indexT, const std::string& text); + virtual void replace(Token *from, Token *to, const std::string& text); + virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); + virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); + + virtual void Delete(size_t index); + virtual void Delete(size_t from, size_t to); + virtual void Delete(Token *indexT); + virtual void Delete(Token *from, Token *to); + virtual void Delete(const std::string &programName, size_t from, size_t to); + virtual void Delete(const std::string &programName, Token *from, Token *to); + + virtual size_t getLastRewriteTokenIndex(); + + /// Return the text from the original tokens altered per the + /// instructions given to this rewriter. + virtual std::string getText(); + + /** Return the text from the original tokens altered per the + * instructions given to this rewriter in programName. + */ + std::string getText(std::string programName); + + /// Return the text associated with the tokens in the interval from the + /// original token stream but with the alterations given to this rewriter. + /// The interval refers to the indexes in the original token stream. + /// We do not alter the token stream in any way, so the indexes + /// and intervals are still consistent. Includes any operations done + /// to the first and last token in the interval. So, if you did an + /// insertBefore on the first token, you would get that insertion. + /// The same is true if you do an insertAfter the stop token. + virtual std::string getText(const misc::Interval &interval); + + virtual std::string getText(const std::string &programName, const misc::Interval &interval); + + protected: + class RewriteOperation { + public: + /// What index into rewrites List are we? + size_t index; + std::string text; + + /// Token buffer index. + size_t instructionIndex; + + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); + RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + virtual ~RewriteOperation(); + + /// Execute the rewrite operation by possibly adding to the buffer. + /// Return the index of the next token to operate on. + + virtual size_t execute(std::string *buf); + virtual std::string toString(); + + private: + TokenStreamRewriter *const outerInstance; + void InitializeInstanceFields(); + }; + + class InsertBeforeOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); + + virtual size_t execute(std::string *buf) override; + }; + + class ReplaceOp : public RewriteOperation { + private: + TokenStreamRewriter *const outerInstance; + + public: + size_t lastIndex; + + ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); + virtual size_t execute(std::string *buf) override; + virtual std::string toString() override; + + private: + void InitializeInstanceFields(); + }; + + /// Our source stream + TokenStream *const tokens; + + /// You may have multiple, named streams of rewrite operations. + /// I'm calling these things "programs." + /// Maps String (name) -> rewrite (List) + std::map<std::string, std::vector<RewriteOperation*>> _programs; + + /// <summary> + /// Map String (program name) -> Integer index </summary> + std::map<std::string, size_t> _lastRewriteTokenIndexes; + virtual size_t getLastRewriteTokenIndex(const std::string &programName); + virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i); + virtual std::vector<RewriteOperation*>& getProgram(const std::string &name); + + /// <summary> + /// We need to combine operations and report invalid operations (like + /// overlapping replaces that are not completed nested). Inserts to + /// same index need to be combined etc... Here are the cases: + /// + /// I.i.u I.j.v leave alone, nonoverlapping + /// I.i.u I.i.v combine: Iivu + /// + /// R.i-j.u R.x-y.v | i-j in x-y delete first R + /// R.i-j.u R.i-j.v delete first R + /// R.i-j.u R.x-y.v | x-y in i-j ERROR + /// R.i-j.u R.x-y.v | boundaries overlap ERROR + /// + /// Delete special case of replace (text==null): + /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + /// + /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before + /// we're not deleting i) + /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping + /// R.x-y.v I.i.u | i in x-y ERROR + /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) + /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + /// + /// I.i.u = insert u before op @ index i + /// R.x-y.u = replace x-y indexed tokens with u + /// + /// First we need to examine replaces. For any replace op: + /// + /// 1. wipe out any insertions before op within that range. + /// 2. Drop any replace op before that is contained completely within + /// that range. + /// 3. Throw exception upon boundary overlap with any previous replace. + /// + /// Then we can deal with inserts: + /// + /// 1. for any inserts to same index, combine even if not adjacent. + /// 2. for any prior replace with same left boundary, combine this + /// insert with replace and delete this replace. + /// 3. throw exception if index in same range as previous replace + /// + /// Don't actually delete; make op null in list. Easier to walk list. + /// Later we can throw as we add to index -> op map. + /// + /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + /// inserted stuff would be before the replace range. But, if you + /// add tokens in front of a method body '{' and then delete the method + /// body, I think the stuff before the '{' you added should disappear too. + /// + /// Return a map from token index to operation. + /// </summary> + virtual std::unordered_map<size_t, RewriteOperation*> reduceToSingleOperationPerIndex(std::vector<RewriteOperation*> &rewrites); + + virtual std::string catOpText(std::string *a, std::string *b); + + /// Get all operations before an index of a particular kind. + template <typename T> + std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) { + std::vector<T *> ops; + for (size_t i = 0; i < before && i < rewrites.size(); i++) { + T *op = dynamic_cast<T *>(rewrites[i]); + if (op == nullptr) { // ignore deleted or non matching entries + continue; + } + ops.push_back(op); + } + return ops; + } + + private: + std::vector<RewriteOperation *>& initializeProgram(const std::string &name); + + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.cpp new file mode 100644 index 0000000000..bbfb8848fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.cpp @@ -0,0 +1,208 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "Exceptions.h" +#include "support/Utf8.h" + +#include "UnbufferedCharStream.h" + +using namespace antlrcpp; +using namespace antlr4; +using namespace antlr4::misc; + +UnbufferedCharStream::UnbufferedCharStream(std::wistream &input) + : _p(0), _numMarkers(0), _lastChar(0), _lastCharBufferStart(0), _currentCharIndex(0), _input(input) { + // The vector's size is what used to be n in Java code. + fill(1); // prime +} + +void UnbufferedCharStream::consume() { + if (LA(1) == EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + // buf always has at least data[p==0] in this method due to ctor + _lastChar = _data[_p]; // track last char for LA(-1) + + if (_p == _data.size() - 1 && _numMarkers == 0) { + size_t capacity = _data.capacity(); + _data.clear(); + _data.reserve(capacity); + + _p = 0; + _lastCharBufferStart = _lastChar; + } else { + _p++; + } + + _currentCharIndex++; + sync(1); +} + +void UnbufferedCharStream::sync(size_t want) { + if (_p + want <= _data.size()) // Already enough data loaded? + return; + + fill(_p + want - _data.size()); +} + +size_t UnbufferedCharStream::fill(size_t n) { + for (size_t i = 0; i < n; i++) { + if (_data.size() > 0 && _data.back() == 0xFFFF) { + return i; + } + + try { + char32_t c = nextChar(); + add(c); +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (IOException &ioe) { + // throw_with_nested is not available before VS 2015. + throw ioe; +#else + } catch (IOException & /*ioe*/) { + std::throw_with_nested(RuntimeException()); +#endif + } + } + + return n; +} + +char32_t UnbufferedCharStream::nextChar() { + return _input.get(); +} + +void UnbufferedCharStream::add(char32_t c) { + _data += c; +} + +size_t UnbufferedCharStream::LA(ssize_t i) { + if (i == -1) { // special case + return _lastChar; + } + + // We can look back only as many chars as we have buffered. + ssize_t index = static_cast<ssize_t>(_p) + i - 1; + if (index < 0) { + throw IndexOutOfBoundsException(); + } + + if (i > 0) { + sync(static_cast<size_t>(i)); // No need to sync if we look back. + } + if (static_cast<size_t>(index) >= _data.size()) { + return EOF; + } + + if (_data[static_cast<size_t>(index)] == std::char_traits<wchar_t>::eof()) { + return EOF; + } + + return _data[static_cast<size_t>(index)]; +} + +ssize_t UnbufferedCharStream::mark() { + if (_numMarkers == 0) { + _lastCharBufferStart = _lastChar; + } + + ssize_t mark = -static_cast<ssize_t>(_numMarkers) - 1; + _numMarkers++; + return mark; +} + +void UnbufferedCharStream::release(ssize_t marker) { + ssize_t expectedMark = -static_cast<ssize_t>(_numMarkers); + if (marker != expectedMark) { + throw IllegalStateException("release() called with an invalid marker."); + } + + _numMarkers--; + if (_numMarkers == 0 && _p > 0) { + _data.erase(0, _p); + _p = 0; + _lastCharBufferStart = _lastChar; + } +} + +size_t UnbufferedCharStream::index() { + return _currentCharIndex; +} + +void UnbufferedCharStream::seek(size_t index) { + if (index == _currentCharIndex) { + return; + } + + if (index > _currentCharIndex) { + sync(index - _currentCharIndex); + index = std::min(index, getBufferStartIndex() + _data.size() - 1); + } + + // index == to bufferStartIndex should set p to 0 + ssize_t i = static_cast<ssize_t>(index) - static_cast<ssize_t>(getBufferStartIndex()); + if (i < 0) { + throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index)); + } else if (i >= static_cast<ssize_t>(_data.size())) { + throw UnsupportedOperationException("Seek to index outside buffer: " + std::to_string(index) + + " not in " + std::to_string(getBufferStartIndex()) + ".." + + std::to_string(getBufferStartIndex() + _data.size())); + } + + _p = static_cast<size_t>(i); + _currentCharIndex = index; + if (_p == 0) { + _lastChar = _lastCharBufferStart; + } else { + _lastChar = _data[_p - 1]; + } +} + +size_t UnbufferedCharStream::size() { + throw UnsupportedOperationException("Unbuffered stream cannot know its size"); +} + +std::string UnbufferedCharStream::getSourceName() const { + if (name.empty()) { + return UNKNOWN_SOURCE_NAME; + } + + return name; +} + +std::string UnbufferedCharStream::getText(const misc::Interval &interval) { + if (interval.a < 0 || interval.b < interval.a - 1) { + throw IllegalArgumentException("invalid interval"); + } + + size_t bufferStartIndex = getBufferStartIndex(); + if (!_data.empty() && _data.back() == 0xFFFF) { + if (interval.a + interval.length() > bufferStartIndex + _data.size()) { + throw IllegalArgumentException("the interval extends past the end of the stream"); + } + } + + if (interval.a < static_cast<ssize_t>(bufferStartIndex) || interval.b >= ssize_t(bufferStartIndex + _data.size())) { + throw UnsupportedOperationException("interval " + interval.toString() + " outside buffer: " + + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _data.size() - 1)); + } + // convert from absolute to local index + size_t i = interval.a - bufferStartIndex; + auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(i, interval.length())); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Unbuffered stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +std::string UnbufferedCharStream::toString() const { + throw UnsupportedOperationException("Unbuffered stream cannot be materialized to a string"); +} + +size_t UnbufferedCharStream::getBufferStartIndex() const { + return _currentCharIndex - _p; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h new file mode 100644 index 0000000000..5b05834f85 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" + +namespace antlr4 { + + /// Do not buffer up the entire char stream. It does keep a small buffer + /// for efficiency and also buffers while a mark exists (set by the + /// lookahead prediction in parser). "Unbuffered" here refers to fact + /// that it doesn't buffer all data, not that's it's on demand loading of char. + class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream { + public: + /// The name or source of this char stream. + std::string name; + + explicit UnbufferedCharStream(std::wistream &input); + + void consume() override; + size_t LA(ssize_t i) override; + + /// <summary> + /// Return a marker that we can release later. + /// <p/> + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + /// </summary> + ssize_t mark() override; + + /// <summary> + /// Decrement number of markers, resetting buffer if we hit 0. </summary> + /// <param name="marker"> </param> + void release(ssize_t marker) override; + size_t index() override; + + /// <summary> + /// Seek to absolute character index, which might not be in the current + /// sliding window. Move {@code p} to {@code index-bufferStartIndex}. + /// </summary> + void seek(size_t index) override; + size_t size() override; + std::string getSourceName() const override; + std::string getText(const misc::Interval &interval) override; + + std::string toString() const override; + + protected: + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, <seealso cref="#consume consume()"/> resets so + /// we start filling at index 0 again. + // UTF-32 encoded. + std::u32string _data; + typedef char32_t storage_type; + + /// <summary> + /// 0..n-1 index into <seealso cref="#data data"/> of next character. + /// <p/> + /// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are + /// out of buffered characters. + /// </summary> + size_t _p; + + /// <summary> + /// Count up with <seealso cref="#mark mark()"/> and down with + /// <seealso cref="#release release()"/>. When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}. + /// </summary> + size_t _numMarkers; + + /// This is the {@code LA(-1)} character for the current position. + size_t _lastChar; // UTF-32 + + /// <summary> + /// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the + /// first character in <seealso cref="#data data"/>. Otherwise, this is unspecified. + /// </summary> + size_t _lastCharBufferStart; // UTF-32 + + /// <summary> + /// Absolute character index. It's the index of the character about to be + /// read via {@code LA(1)}. Goes from 0 to the number of characters in the + /// entire stream, although the stream size is unknown before the end is + /// reached. + /// </summary> + size_t _currentCharIndex; + + std::wistream &_input; + + /// <summary> + /// Make sure we have 'want' elements from current position <seealso cref="#p p"/>. + /// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is + /// the char index 'need' elements ahead. If we need 1 element, + /// {@code (p+1-1)==p} must be less than {@code data.length}. + /// </summary> + virtual void sync(size_t want); + + /// <summary> + /// Add {@code n} characters to the buffer. Returns the number of characters + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} characters could be added. + /// </summary> + virtual size_t fill(size_t n); + + /// Override to provide different source of characters than + /// <seealso cref="#input input"/>. + virtual char32_t nextChar(); + virtual void add(char32_t c); + size_t getBufferStartIndex() const; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.cpp new file mode 100644 index 0000000000..16ff49e332 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.cpp @@ -0,0 +1,270 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "Exceptions.h" +#include "assert.h" +#include "TokenSource.h" +#include "support/Arrays.h" +#include "misc/Interval.h" +#include "RuleContext.h" +#include "WritableToken.h" + +#include "UnbufferedTokenStream.h" + +using namespace antlr4; + +UnbufferedTokenStream::UnbufferedTokenStream(TokenSource *tokenSource) : UnbufferedTokenStream(tokenSource, 256) { +} + +UnbufferedTokenStream::UnbufferedTokenStream(TokenSource *tokenSource, int /*bufferSize*/) + : _tokenSource(tokenSource), _lastToken(nullptr), _lastTokenBufferStart(nullptr) +{ + InitializeInstanceFields(); + fill(1); // prime the pump +} + +UnbufferedTokenStream::~UnbufferedTokenStream() { +} + +Token* UnbufferedTokenStream::get(size_t i) const +{ // get absolute index + size_t bufferStartIndex = getBufferStartIndex(); + if (i < bufferStartIndex || i >= bufferStartIndex + _tokens.size()) { + throw IndexOutOfBoundsException(std::string("get(") + std::to_string(i) + std::string(") outside buffer: ") + + std::to_string(bufferStartIndex) + std::string("..") + std::to_string(bufferStartIndex + _tokens.size())); + } + return _tokens[i - bufferStartIndex].get(); +} + +Token* UnbufferedTokenStream::LT(ssize_t i) +{ + if (i == -1) { + return _lastToken; + } + + sync(i); + ssize_t index = static_cast<ssize_t>(_p) + i - 1; + if (index < 0) { + throw IndexOutOfBoundsException(std::string("LT(") + std::to_string(i) + std::string(") gives negative index")); + } + + if (index >= static_cast<ssize_t>(_tokens.size())) { + assert(_tokens.size() > 0 && _tokens.back()->getType() == EOF); + return _tokens.back().get(); + } + + return _tokens[static_cast<size_t>(index)].get(); +} + +size_t UnbufferedTokenStream::LA(ssize_t i) +{ + return LT(i)->getType(); +} + +TokenSource* UnbufferedTokenStream::getTokenSource() const +{ + return _tokenSource; +} + +std::string UnbufferedTokenStream::getText() +{ + return ""; +} + +std::string UnbufferedTokenStream::getText(RuleContext* ctx) +{ + return getText(ctx->getSourceInterval()); +} + +std::string UnbufferedTokenStream::getText(Token *start, Token *stop) +{ + return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); +} + +void UnbufferedTokenStream::consume() +{ + if (LA(1) == EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + // buf always has at least tokens[p==0] in this method due to ctor + _lastToken = _tokens[_p].get(); // track last token for LT(-1) + + // if we're at last token and no markers, opportunity to flush buffer + if (_p == _tokens.size() - 1 && _numMarkers == 0) { + _tokens.clear(); + _p = 0; + _lastTokenBufferStart = _lastToken; + } else { + ++_p; + } + + ++_currentTokenIndex; + sync(1); +} + +/// <summary> +/// Make sure we have 'need' elements from current position <seealso cref="#p p"/>. Last valid +/// {@code p} index is {@code tokens.length-1}. {@code p+need-1} is the tokens index 'need' elements +/// ahead. If we need 1 element, {@code (p+1-1)==p} must be less than {@code tokens.length}. +/// </summary> +void UnbufferedTokenStream::sync(ssize_t want) +{ + ssize_t need = (static_cast<ssize_t>(_p) + want - 1) - static_cast<ssize_t>(_tokens.size()) + 1; // how many more elements we need? + if (need > 0) { + fill(static_cast<size_t>(need)); + } +} + +/// <summary> +/// Add {@code n} elements to the buffer. Returns the number of tokens +/// actually added to the buffer. If the return value is less than {@code n}, +/// then EOF was reached before {@code n} tokens could be added. +/// </summary> +size_t UnbufferedTokenStream::fill(size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (_tokens.size() > 0 && _tokens.back()->getType() == EOF) { + return i; + } + + add(_tokenSource->nextToken()); + } + + return n; +} + +void UnbufferedTokenStream::add(std::unique_ptr<Token> t) +{ + WritableToken *writable = dynamic_cast<WritableToken *>(t.get()); + if (writable != nullptr) { + writable->setTokenIndex(int(getBufferStartIndex() + _tokens.size())); + } + + _tokens.push_back(std::move(t)); +} + +/// <summary> +/// Return a marker that we can release later. +/// <p/> +/// The specific marker value used for this class allows for some level of +/// protection against misuse where {@code seek()} is called on a mark or +/// {@code release()} is called in the wrong order. +/// </summary> +ssize_t UnbufferedTokenStream::mark() +{ + if (_numMarkers == 0) { + _lastTokenBufferStart = _lastToken; + } + + int mark = -_numMarkers - 1; + _numMarkers++; + return mark; +} + +void UnbufferedTokenStream::release(ssize_t marker) +{ + ssize_t expectedMark = -_numMarkers; + if (marker != expectedMark) { + throw IllegalStateException("release() called with an invalid marker."); + } + + _numMarkers--; + if (_numMarkers == 0) { // can we release buffer? + if (_p > 0) { + // Copy tokens[p]..tokens[n-1] to tokens[0]..tokens[(n-1)-p], reset ptrs + // p is last valid token; move nothing if p==n as we have no valid char + _tokens.erase(_tokens.begin(), _tokens.begin() + static_cast<ssize_t>(_p)); + _p = 0; + } + + _lastTokenBufferStart = _lastToken; + } +} + +size_t UnbufferedTokenStream::index() +{ + return _currentTokenIndex; +} + +void UnbufferedTokenStream::seek(size_t index) +{ // seek to absolute index + if (index == _currentTokenIndex) { + return; + } + + if (index > _currentTokenIndex) { + sync(ssize_t(index - _currentTokenIndex)); + index = std::min(index, getBufferStartIndex() + _tokens.size() - 1); + } + + size_t bufferStartIndex = getBufferStartIndex(); + if (bufferStartIndex > index) { + throw IllegalArgumentException(std::string("cannot seek to negative index ") + std::to_string(index)); + } + + size_t i = index - bufferStartIndex; + if (i >= _tokens.size()) { + throw UnsupportedOperationException(std::string("seek to index outside buffer: ") + std::to_string(index) + + " not in " + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStartIndex + _tokens.size())); + } + + _p = i; + _currentTokenIndex = index; + if (_p == 0) { + _lastToken = _lastTokenBufferStart; + } else { + _lastToken = _tokens[_p - 1].get(); + } +} + +size_t UnbufferedTokenStream::size() +{ + throw UnsupportedOperationException("Unbuffered stream cannot know its size"); +} + +std::string UnbufferedTokenStream::getSourceName() const +{ + return _tokenSource->getSourceName(); +} + +std::string UnbufferedTokenStream::getText(const misc::Interval &interval) +{ + size_t bufferStartIndex = getBufferStartIndex(); + size_t bufferStopIndex = bufferStartIndex + _tokens.size() - 1; + + size_t start = interval.a; + size_t stop = interval.b; + if (start < bufferStartIndex || stop > bufferStopIndex) { + throw UnsupportedOperationException(std::string("interval ") + interval.toString() + + " not in token buffer window: " + std::to_string(bufferStartIndex) + ".." + std::to_string(bufferStopIndex)); + } + + size_t a = start - bufferStartIndex; + size_t b = stop - bufferStartIndex; + + std::stringstream ss; + for (size_t i = a; i <= b; i++) { + Token *t = _tokens[i].get(); + if (i > 0) + ss << ", "; + ss << t->getText(); + } + + return ss.str(); +} + +size_t UnbufferedTokenStream::getBufferStartIndex() const +{ + return _currentTokenIndex - _p; +} + +void UnbufferedTokenStream::InitializeInstanceFields() +{ + _p = 0; + _numMarkers = 0; + _currentTokenIndex = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.h b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.h new file mode 100644 index 0000000000..0c67ec8610 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedTokenStream.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC UnbufferedTokenStream : public TokenStream { + public: + UnbufferedTokenStream(TokenSource *tokenSource); + UnbufferedTokenStream(TokenSource *tokenSource, int bufferSize); + UnbufferedTokenStream(const UnbufferedTokenStream& other) = delete; + virtual ~UnbufferedTokenStream(); + + UnbufferedTokenStream& operator = (const UnbufferedTokenStream& other) = delete; + + virtual Token* get(size_t i) const override; + virtual Token* LT(ssize_t i) override; + virtual size_t LA(ssize_t i) override; + + virtual TokenSource* getTokenSource() const override; + + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText() override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + virtual void consume() override; + + /// <summary> + /// Return a marker that we can release later. + /// <p/> + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + /// </summary> + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + virtual size_t index() override; + virtual void seek(size_t index) override; + virtual size_t size() override; + virtual std::string getSourceName() const override; + + protected: + /// Make sure we have 'need' elements from current position p. Last valid + /// p index is tokens.length - 1. p + need - 1 is the tokens index 'need' elements + /// ahead. If we need 1 element, (p+1-1)==p must be less than tokens.length. + TokenSource *_tokenSource; + + /// <summary> + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, <seealso cref="#consume consume()"/> resets so + /// we start filling at index 0 again. + /// </summary> + + std::vector<std::unique_ptr<Token>> _tokens; + + /// <summary> + /// 0..n-1 index into <seealso cref="#tokens tokens"/> of next token. + /// <p/> + /// The {@code LT(1)} token is {@code tokens[p]}. If {@code p == n}, we are + /// out of buffered tokens. + /// </summary> + size_t _p; + + /// <summary> + /// Count up with <seealso cref="#mark mark()"/> and down with + /// <seealso cref="#release release()"/>. When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code tokens[p]..tokens[n-1]} to {@code tokens[0]..tokens[(n-1)-p]}. + /// </summary> + int _numMarkers; + + /// <summary> + /// This is the {@code LT(-1)} token for the current position. + /// </summary> + Token *_lastToken; + + /// <summary> + /// When {@code numMarkers > 0}, this is the {@code LT(-1)} token for the + /// first token in <seealso cref="#tokens"/>. Otherwise, this is {@code null}. + /// </summary> + Token *_lastTokenBufferStart; + + /// <summary> + /// Absolute token index. It's the index of the token about to be read via + /// {@code LT(1)}. Goes from 0 to the number of tokens in the entire stream, + /// although the stream size is unknown before the end is reached. + /// <p/> + /// This value is used to set the token indexes if the stream provides tokens + /// that implement <seealso cref="WritableToken"/>. + /// </summary> + size_t _currentTokenIndex; + + virtual void sync(ssize_t want); + + /// <summary> + /// Add {@code n} elements to the buffer. Returns the number of tokens + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} tokens could be added. + /// </summary> + virtual size_t fill(size_t n); + virtual void add(std::unique_ptr<Token> t); + + size_t getBufferStartIndex() const; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/Version.h b/contrib/libs/antlr4_cpp_runtime/src/Version.h new file mode 100644 index 0000000000..43f00ea65c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Version.h @@ -0,0 +1,42 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#define ANTLRCPP_VERSION_MAJOR 4 +#define ANTLRCPP_VERSION_MINOR 11 +#define ANTLRCPP_VERSION_PATCH 1 + +#define ANTLRCPP_MAKE_VERSION(major, minor, patch) ((major) * 100000 + (minor) * 1000 + (patch)) + +#define ANTLRCPP_VERSION \ + ANTLRCPP_MAKE_VERSION(ANTLR4CPP_VERSION_MAJOR, ANTLR4CPP_VERSION_MINOR, ANTLR4CPP_VERSION_PATCH) + +#define ANTLRCPP_VERSION_STRING \ + ANTLR4CPP_STRINGIFY(ANTLR4CPP_VERSION_MAJOR) "." \ + ANTLR4CPP_STRINGIFY(ANTLR4CPP_VERSION_MINOR) "." \ + ANTLR4CPP_STRINGIFY(ANTLR4CPP_VERSION_PATCH) diff --git a/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.cpp b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.cpp new file mode 100644 index 0000000000..0f783d5d79 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" + +#include "Vocabulary.h" + +using namespace antlr4::dfa; + +const Vocabulary Vocabulary::EMPTY_VOCABULARY; + +Vocabulary::Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames) +: Vocabulary(std::move(literalNames), std::move(symbolicNames), {}) { +} + +Vocabulary::Vocabulary(std::vector<std::string> literalNames, + std::vector<std::string> symbolicNames, std::vector<std::string> displayNames) + : _literalNames(std::move(literalNames)), _symbolicNames(std::move(symbolicNames)), _displayNames(std::move(displayNames)), + _maxTokenType(std::max(_displayNames.size(), std::max(_literalNames.size(), _symbolicNames.size())) - 1) { + // See note here on -1 part: https://github.com/antlr/antlr4/pull/1146 +} + +std::string_view Vocabulary::getLiteralName(size_t tokenType) const { + if (tokenType < _literalNames.size()) { + return _literalNames[tokenType]; + } + + return ""; +} + +std::string_view Vocabulary::getSymbolicName(size_t tokenType) const { + if (tokenType == Token::EOF) { + return "EOF"; + } + + if (tokenType < _symbolicNames.size()) { + return _symbolicNames[tokenType]; + } + + return ""; +} + +std::string Vocabulary::getDisplayName(size_t tokenType) const { + if (tokenType < _displayNames.size()) { + std::string_view displayName = _displayNames[tokenType]; + if (!displayName.empty()) { + return std::string(displayName); + } + } + + std::string_view literalName = getLiteralName(tokenType); + if (!literalName.empty()) { + return std::string(literalName); + } + + std::string_view symbolicName = getSymbolicName(tokenType); + if (!symbolicName.empty()) { + return std::string(symbolicName); + } + + return std::to_string(tokenType); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h new file mode 100644 index 0000000000..af5b243880 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h @@ -0,0 +1,177 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace dfa { + + /// This class provides a default implementation of the <seealso cref="Vocabulary"/> + /// interface. + class ANTLR4CPP_PUBLIC Vocabulary final { + public: + /// Gets an empty <seealso cref="Vocabulary"/> instance. + /// + /// <para> + /// No literal or symbol names are assigned to token types, so + /// <seealso cref="#getDisplayName(int)"/> returns the numeric value for all tokens + /// except <seealso cref="Token#EOF"/>.</para> + [[deprecated("Use the default constructor of Vocabulary instead.")]] static const Vocabulary EMPTY_VOCABULARY; + + Vocabulary() {} + + Vocabulary(const Vocabulary&) = default; + + /// <summary> + /// Constructs a new instance of <seealso cref="Vocabulary"/> from the specified + /// literal and symbolic token names. + /// </summary> + /// <param name="literalNames"> The literal names assigned to tokens, or {@code null} + /// if no literal names are assigned. </param> + /// <param name="symbolicNames"> The symbolic names assigned to tokens, or + /// {@code null} if no symbolic names are assigned. + /// </param> + /// <seealso cref= #getLiteralName(int) </seealso> + /// <seealso cref= #getSymbolicName(int) </seealso> + Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames); + + /// <summary> + /// Constructs a new instance of <seealso cref="Vocabulary"/> from the specified + /// literal, symbolic, and display token names. + /// </summary> + /// <param name="literalNames"> The literal names assigned to tokens, or {@code null} + /// if no literal names are assigned. </param> + /// <param name="symbolicNames"> The symbolic names assigned to tokens, or + /// {@code null} if no symbolic names are assigned. </param> + /// <param name="displayNames"> The display names assigned to tokens, or {@code null} + /// to use the values in {@code literalNames} and {@code symbolicNames} as + /// the source of display names, as described in + /// <seealso cref="#getDisplayName(int)"/>. + /// </param> + /// <seealso cref= #getLiteralName(int) </seealso> + /// <seealso cref= #getSymbolicName(int) </seealso> + /// <seealso cref= #getDisplayName(int) </seealso> + Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames, + std::vector<std::string> displayNames); + + /// <summary> + /// Returns the highest token type value. It can be used to iterate from + /// zero to that number, inclusively, thus querying all stored entries. </summary> + /// <returns> the highest token type value </returns> + constexpr size_t getMaxTokenType() const { return _maxTokenType; } + + /// <summary> + /// Gets the string literal associated with a token type. The string returned + /// by this method, when not {@code null}, can be used unaltered in a parser + /// grammar to represent this token type. + /// + /// <para>The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types.</para> + /// + /// <table> + /// <tr> + /// <th>Rule</th> + /// <th>Literal Name</th> + /// <th>Java String Literal</th> + /// </tr> + /// <tr> + /// <td>{@code THIS : 'this';}</td> + /// <td>{@code 'this'}</td> + /// <td>{@code "'this'"}</td> + /// </tr> + /// <tr> + /// <td>{@code SQUOTE : '\'';}</td> + /// <td>{@code '\''}</td> + /// <td>{@code "'\\''"}</td> + /// </tr> + /// <tr> + /// <td>{@code ID : [A-Z]+;}</td> + /// <td>n/a</td> + /// <td>{@code null}</td> + /// </tr> + /// </table> + /// </summary> + /// <param name="tokenType"> The token type. + /// </param> + /// <returns> The string literal associated with the specified token type, or + /// {@code null} if no string literal is associated with the type. </returns> + std::string_view getLiteralName(size_t tokenType) const; + + /// <summary> + /// Gets the symbolic name associated with a token type. The string returned + /// by this method, when not {@code null}, can be used unaltered in a parser + /// grammar to represent this token type. + /// + /// <para>This method supports token types defined by any of the following + /// methods:</para> + /// + /// <ul> + /// <li>Tokens created by lexer rules.</li> + /// <li>Tokens defined in a <code>tokens{}</code> block in a lexer or parser + /// grammar.</li> + /// <li>The implicitly defined {@code EOF} token, which has the token type + /// <seealso cref="Token#EOF"/>.</li> + /// </ul> + /// + /// <para>The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types.</para> + /// + /// <table> + /// <tr> + /// <th>Rule</th> + /// <th>Symbolic Name</th> + /// </tr> + /// <tr> + /// <td>{@code THIS : 'this';}</td> + /// <td>{@code THIS}</td> + /// </tr> + /// <tr> + /// <td>{@code SQUOTE : '\'';}</td> + /// <td>{@code SQUOTE}</td> + /// </tr> + /// <tr> + /// <td>{@code ID : [A-Z]+;}</td> + /// <td>{@code ID}</td> + /// </tr> + /// </table> + /// </summary> + /// <param name="tokenType"> The token type. + /// </param> + /// <returns> The symbolic name associated with the specified token type, or + /// {@code null} if no symbolic name is associated with the type. </returns> + std::string_view getSymbolicName(size_t tokenType) const; + + /// <summary> + /// Gets the display name of a token type. + /// + /// <para>ANTLR provides a default implementation of this method, but + /// applications are free to override the behavior in any manner which makes + /// sense for the application. The default implementation returns the first + /// result from the following list which produces a non-{@code null} + /// result.</para> + /// + /// <ol> + /// <li>The result of <seealso cref="#getLiteralName"/></li> + /// <li>The result of <seealso cref="#getSymbolicName"/></li> + /// <li>The result of <seealso cref="Integer#toString"/></li> + /// </ol> + /// </summary> + /// <param name="tokenType"> The token type. + /// </param> + /// <returns> The display name of the token type, for use in error reporting or + /// other user-visible messages which reference specific token types. </returns> + std::string getDisplayName(size_t tokenType) const; + + private: + std::vector<std::string> const _literalNames; + std::vector<std::string> const _symbolicNames; + std::vector<std::string> const _displayNames; + const size_t _maxTokenType = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/WritableToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.cpp new file mode 100644 index 0000000000..a30cd96f19 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "WritableToken.h" + +antlr4::WritableToken::~WritableToken() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/WritableToken.h b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.h new file mode 100644 index 0000000000..28856f25b9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/WritableToken.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC WritableToken : public Token { + public: + virtual ~WritableToken(); + virtual void setText(const std::string &text) = 0; + virtual void setType(size_t ttype) = 0; + virtual void setLine(size_t line) = 0; + virtual void setCharPositionInLine(size_t pos) = 0; + virtual void setChannel(size_t channel) = 0; + virtual void setTokenIndex(size_t index) = 0; + }; + +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/antlr4-common.h b/contrib/libs/antlr4_cpp_runtime/src/antlr4-common.h new file mode 100644 index 0000000000..d7f9a65fa1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/antlr4-common.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <algorithm> +#include <any> +#include <atomic> +#include <bitset> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <exception> +#include <fstream> +#include <iostream> +#include <iterator> +#include <limits> +#include <map> +#include <memory> +#include <set> +#include <sstream> +#include <stack> +#include <string> +#include <string_view> +#include <typeinfo> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +// Defines for the Guid class and other platform dependent stuff. +#ifdef _WIN32 + #ifdef _MSC_VER + #pragma warning (disable: 4250) // Class inherits by dominance. + #pragma warning (disable: 4512) // assignment operator could not be generated + + #if _MSC_VER < 1900 + // Before VS 2015 code like "while (true)" will create a (useless) warning in level 4. + #pragma warning (disable: 4127) // conditional expression is constant + #endif + #endif + + #ifdef _WIN64 + typedef __int64 ssize_t; + #else + typedef __int32 ssize_t; + #endif + + #ifdef ANTLR4CPP_EXPORTS + #define ANTLR4CPP_PUBLIC __declspec(dllexport) + #else + #ifdef ANTLR4CPP_STATIC + #define ANTLR4CPP_PUBLIC + #else + #define ANTLR4CPP_PUBLIC __declspec(dllimport) + #endif + #endif + +#elif defined(__APPLE__) + #if __GNUC__ >= 4 + #define ANTLR4CPP_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define ANTLR4CPP_PUBLIC + #endif +#else + #if __GNUC__ >= 6 + #define ANTLR4CPP_PUBLIC __attribute__ ((visibility ("default"))) + #else + #define ANTLR4CPP_PUBLIC + #endif +#endif + +#ifdef __has_builtin +#define ANTLR4CPP_HAVE_BUILTIN(x) __has_builtin(x) +#else +#define ANTLR4CPP_HAVE_BUILTIN(x) 0 +#endif + +#define ANTLR4CPP_INTERNAL_STRINGIFY(x) #x +#define ANTLR4CPP_STRINGIFY(x) ANTLR4CPP_INTERNAL_STRINGIFY(x) + +// We use everything from the C++ standard library by default. +#ifndef ANTLR4CPP_USING_ABSEIL +#define ANTLR4CPP_USING_ABSEIL 0 +#endif + +#include "support/Declarations.h" + +// We have to undefine this symbol as ANTLR will use this name for own members and even +// generated functions. Because EOF is a global macro we cannot use e.g. a namespace scope to disambiguate. +#ifdef EOF +#undef EOF +#endif + +#define INVALID_INDEX std::numeric_limits<size_t>::max() +template<class T> using Ref = std::shared_ptr<T>; diff --git a/contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h b/contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h new file mode 100644 index 0000000000..50b85aa4fc --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h @@ -0,0 +1,168 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +// This is the umbrella header for all ANTLR4 C++ runtime headers. + +#include "antlr4-common.h" + +#include "ANTLRErrorListener.h" +#include "ANTLRErrorStrategy.h" +#include "ANTLRFileStream.h" +#include "ANTLRInputStream.h" +#include "BailErrorStrategy.h" +#include "BaseErrorListener.h" +#include "BufferedTokenStream.h" +#include "CharStream.h" +#include "CommonToken.h" +#include "CommonTokenFactory.h" +#include "CommonTokenStream.h" +#include "ConsoleErrorListener.h" +#include "DefaultErrorStrategy.h" +#include "DiagnosticErrorListener.h" +#include "Exceptions.h" +#include "FailedPredicateException.h" +#include "InputMismatchException.h" +#include "IntStream.h" +#include "InterpreterRuleContext.h" +#include "Lexer.h" +#include "LexerInterpreter.h" +#include "LexerNoViableAltException.h" +#include "ListTokenSource.h" +#include "NoViableAltException.h" +#include "Parser.h" +#include "ParserInterpreter.h" +#include "ParserRuleContext.h" +#include "ProxyErrorListener.h" +#include "RecognitionException.h" +#include "Recognizer.h" +#include "RuleContext.h" +#include "RuleContextWithAltNum.h" +#include "RuntimeMetaData.h" +#include "Token.h" +#include "TokenFactory.h" +#include "TokenSource.h" +#include "TokenStream.h" +#include "TokenStreamRewriter.h" +#include "UnbufferedCharStream.h" +#include "UnbufferedTokenStream.h" +#include "Version.h" +#include "Vocabulary.h" +#include "Vocabulary.h" +#include "WritableToken.h" +#include "atn/ATN.h" +#include "atn/ATNConfig.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNDeserializationOptions.h" +#include "atn/ATNDeserializer.h" +#include "atn/ATNSimulator.h" +#include "atn/ATNState.h" +#include "atn/ATNType.h" +#include "atn/ActionTransition.h" +#include "atn/AmbiguityInfo.h" +#include "atn/ArrayPredictionContext.h" +#include "atn/AtomTransition.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/BlockStartState.h" +#include "atn/ContextSensitivityInfo.h" +#include "atn/DecisionEventInfo.h" +#include "atn/DecisionInfo.h" +#include "atn/DecisionState.h" +#include "atn/EpsilonTransition.h" +#include "atn/ErrorInfo.h" +#include "atn/LL1Analyzer.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerATNSimulator.h" +#include "atn/LexerAction.h" +#include "atn/LexerActionExecutor.h" +#include "atn/LexerActionType.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerCustomAction.h" +#include "atn/LexerIndexedCustomAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" +#include "atn/LookaheadEventInfo.h" +#include "atn/LoopEndState.h" +#include "atn/NotSetTransition.h" +#include "atn/OrderedATNConfigSet.h" +#include "atn/ParseInfo.h" +#include "atn/ParserATNSimulator.h" +#include "atn/ParserATNSimulatorOptions.h" +#include "atn/PlusBlockStartState.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/PredicateTransition.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "atn/PredictionContextMergeCache.h" +#include "atn/PredictionContextMergeCacheOptions.h" +#include "atn/PredictionMode.h" +#include "atn/ProfilingATNSimulator.h" +#include "atn/RangeTransition.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SemanticContext.h" +#include "atn/SerializedATNView.h" +#include "atn/SetTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/StarBlockStartState.h" +#include "atn/StarLoopEntryState.h" +#include "atn/StarLoopbackState.h" +#include "atn/TokensStartState.h" +#include "atn/Transition.h" +#include "atn/WildcardTransition.h" +#include "dfa/DFA.h" +#include "dfa/DFASerializer.h" +#include "dfa/DFAState.h" +#include "dfa/LexerDFASerializer.h" +#include "misc/InterpreterDataReader.h" +#include "misc/Interval.h" +#include "misc/IntervalSet.h" +#include "misc/MurmurHash.h" +#include "misc/Predicate.h" +#include "support/Any.h" +#include "support/Arrays.h" +#include "support/BitSet.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" +#include "tree/AbstractParseTreeVisitor.h" +#include "tree/ErrorNode.h" +#include "tree/ErrorNodeImpl.h" +#include "tree/ParseTree.h" +#include "tree/ParseTreeListener.h" +#include "tree/ParseTreeProperty.h" +#include "tree/ParseTreeVisitor.h" +#include "tree/ParseTreeWalker.h" +#include "tree/TerminalNode.h" +#include "tree/TerminalNodeImpl.h" +#include "tree/Trees.h" +#include "tree/pattern/Chunk.h" +#include "tree/pattern/ParseTreeMatch.h" +#include "tree/pattern/ParseTreePattern.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "tree/pattern/RuleTagToken.h" +#include "tree/pattern/TagChunk.h" +#include "tree/pattern/TextChunk.h" +#include "tree/pattern/TokenTagToken.h" +#include "tree/xpath/XPath.h" +#include "tree/xpath/XPathElement.h" +#include "tree/xpath/XPathLexer.h" +#include "tree/xpath/XPathLexerErrorListener.h" +#include "tree/xpath/XPathRuleAnywhereElement.h" +#include "tree/xpath/XPathRuleElement.h" +#include "tree/xpath/XPathTokenAnywhereElement.h" +#include "tree/xpath/XPathTokenElement.h" +#include "tree/xpath/XPathWildcardAnywhereElement.h" +#include "tree/xpath/XPathWildcardElement.h" +#include "internal/Synchronization.h" diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp new file mode 100644 index 0000000000..339515cc9c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp @@ -0,0 +1,159 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LL1Analyzer.h" +#include "Token.h" +#include "atn/RuleTransition.h" +#include "misc/IntervalSet.h" +#include "RuleContext.h" +#include "atn/DecisionState.h" +#include "Recognizer.h" +#include "atn/ATNType.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "atn/ATN.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +ATN::ATN() : ATN(ATNType::LEXER, 0) {} + +ATN::ATN(ATNType grammarType_, size_t maxTokenType_) : grammarType(grammarType_), maxTokenType(maxTokenType_) {} + +ATN::~ATN() { + for (ATNState *state : states) { + delete state; + } +} + +misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const { + LL1Analyzer analyzer(*this); + return analyzer.LOOK(s, ctx); + +} + +misc::IntervalSet const& ATN::nextTokens(ATNState *s) const { + if (!s->_nextTokenUpdated) { + UniqueLock<Mutex> lock(_mutex); + if (!s->_nextTokenUpdated) { + s->_nextTokenWithinRule = nextTokens(s, nullptr); + s->_nextTokenUpdated = true; + } + } + return s->_nextTokenWithinRule; +} + +void ATN::addState(ATNState *state) { + if (state != nullptr) { + //state->atn = this; + state->stateNumber = static_cast<int>(states.size()); + } + + states.push_back(state); +} + +void ATN::removeState(ATNState *state) { + delete states.at(state->stateNumber);// just free mem, don't shift states in list + states.at(state->stateNumber) = nullptr; +} + +int ATN::defineDecisionState(DecisionState *s) { + decisionToState.push_back(s); + s->decision = static_cast<int>(decisionToState.size() - 1); + return s->decision; +} + +DecisionState *ATN::getDecisionState(size_t decision) const { + if (!decisionToState.empty()) { + return decisionToState[decision]; + } + return nullptr; +} + +size_t ATN::getNumberOfDecisions() const { + return decisionToState.size(); +} + +misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *context) const { + if (stateNumber == ATNState::INVALID_STATE_NUMBER || stateNumber >= states.size()) { + throw IllegalArgumentException("Invalid state number."); + } + + RuleContext *ctx = context; + ATNState *s = states.at(stateNumber); + misc::IntervalSet following = nextTokens(s); + if (!following.contains(Token::EPSILON)) { + return following; + } + + misc::IntervalSet expected; + expected.addAll(following); + expected.remove(Token::EPSILON); + while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) { + ATNState *invokingState = states.at(ctx->invokingState); + const RuleTransition *rt = static_cast<const RuleTransition*>(invokingState->transitions[0].get()); + following = nextTokens(rt->followState); + expected.addAll(following); + expected.remove(Token::EPSILON); + + if (ctx->parent == nullptr) { + break; + } + ctx = static_cast<RuleContext *>(ctx->parent); + } + + if (following.contains(Token::EPSILON)) { + expected.add(Token::EOF); + } + + return expected; +} + +std::string ATN::toString() const { + std::stringstream ss; + std::string type; + switch (grammarType) { + case ATNType::LEXER: + type = "LEXER "; + break; + + case ATNType::PARSER: + type = "PARSER "; + break; + + default: + break; + } + ss << "(" << type << "ATN " << std::hex << this << std::dec << ") maxTokenType: " << maxTokenType << std::endl; + ss << "states (" << states.size() << ") {" << std::endl; + + size_t index = 0; + for (auto *state : states) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + index = 0; + for (auto *state : decisionToState) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + ss << "}"; + + return ss.str(); +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h new file mode 100644 index 0000000000..f12476358a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h @@ -0,0 +1,133 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "internal/Synchronization.h" + +// GCC generates a warning when forward-declaring ATN if ATN has already been +// declared due to the attributes added by ANTLR4CPP_PUBLIC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159 +// Add constant that can be checked so forward-declarations can be omitted. +#define ANTLR4CPP_ATN_DECLARED + +namespace antlr4 { +namespace atn { + + class LexerATNSimulator; + class ParserATNSimulator; + + class ANTLR4CPP_PUBLIC ATN { + public: + static constexpr size_t INVALID_ALT_NUMBER = 0; + + /// Used for runtime deserialization of ATNs from strings. + ATN(); + + ATN(ATNType grammarType, size_t maxTokenType); + + ATN(const ATN&) = delete; + + ATN(ATN&&) = delete; + + ~ATN(); + + ATN& operator=(const ATN&) = delete; + + ATN& operator=(ATN&&) = delete; + + std::vector<ATNState *> states; + + /// Each subrule/rule is a decision point and we must track them so we + /// can go back later and build DFA predictors for them. This includes + /// all the rules, subrules, optional blocks, ()+, ()* etc... + std::vector<DecisionState *> decisionToState; + + /// Maps from rule index to starting state number. + std::vector<RuleStartState *> ruleToStartState; + + /// Maps from rule index to stop state number. + std::vector<RuleStopState *> ruleToStopState; + + /// The type of the ATN. + ATNType grammarType; + + /// The maximum value for any symbol recognized by a transition in the ATN. + size_t maxTokenType; + + /// <summary> + /// For lexer ATNs, this maps the rule index to the resulting token type. + /// For parser ATNs, this maps the rule index to the generated bypass token + /// type if the + /// <seealso cref="ATNDeserializationOptions#isGenerateRuleBypassTransitions"/> + /// deserialization option was specified; otherwise, this is {@code null}. + /// </summary> + std::vector<size_t> ruleToTokenType; + + /// For lexer ATNs, this is an array of {@link LexerAction} objects which may + /// be referenced by action transitions in the ATN. + std::vector<Ref<const LexerAction>> lexerActions; + + std::vector<TokensStartState *> modeToStartState; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in state {@code s}. + /// If {@code ctx} is null, the set of tokens will not include what can follow + /// the rule surrounding {@code s}. In other words, the set will be + /// restricted to tokens reachable staying within {@code s}'s rule. + /// </summary> + misc::IntervalSet nextTokens(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in {@code s} and + /// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of + /// rule. + /// </summary> + misc::IntervalSet const& nextTokens(ATNState *s) const; + + void addState(ATNState *state); + + void removeState(ATNState *state); + + int defineDecisionState(DecisionState *s); + + DecisionState *getDecisionState(size_t decision) const; + + size_t getNumberOfDecisions() const; + + /// <summary> + /// Computes the set of input symbols which could follow ATN state number + /// {@code stateNumber} in the specified full {@code context}. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// <seealso cref="RuleStopState"/> of the outermost context without matching any + /// symbols, <seealso cref="Token#EOF"/> is added to the returned set. + /// <p/> + /// If {@code context} is {@code null}, it is treated as + /// <seealso cref="ParserRuleContext#EMPTY"/>. + /// </summary> + /// <param name="stateNumber"> the ATN state number </param> + /// <param name="context"> the full parse context </param> + /// <returns> The set of potentially valid input symbols which could follow the + /// specified state in the specified context. </returns> + /// <exception cref="IllegalArgumentException"> if the ATN does not contain a state with + /// number {@code stateNumber} </exception> + misc::IntervalSet getExpectedTokens(size_t stateNumber, RuleContext *context) const; + + std::string toString() const; + + private: + friend class LexerATNSimulator; + friend class ParserATNSimulator; + + mutable internal::Mutex _mutex; + mutable internal::SharedMutex _stateMutex; + mutable internal::SharedMutex _edgeMutex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp new file mode 100644 index 0000000000..be4d5bfa8c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" + +#include "atn/ATNConfig.h" + +using namespace antlr4::atn; + +namespace { + +/** + * This field stores the bit mask for implementing the + * {@link #isPrecedenceFilterSuppressed} property as a bit within the + * existing {@link #reachesIntoOuterContext} field. + */ +inline constexpr size_t SUPPRESS_PRECEDENCE_FILTER = 0x40000000; + +} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context) + : ATNConfig(state, alt, std::move(context), 0, SemanticContext::Empty::Instance) {} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, alt, std::move(context), 0, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext) + : ATNConfig(other.state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state) + : ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, other.semanticContext) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context) + : ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, other.semanticContext) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext) + : state(state), alt(alt), context(std::move(context)), reachesIntoOuterContext(reachesIntoOuterContext), semanticContext(std::move(semanticContext)) {} + +size_t ATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +size_t ATNConfig::getOuterContextDepth() const { + return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER; +} + +bool ATNConfig::isPrecedenceFilterSuppressed() const { + return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0; +} + +void ATNConfig::setPrecedenceFilterSuppressed(bool value) { + if (value) { + reachesIntoOuterContext |= SUPPRESS_PRECEDENCE_FILTER; + } else { + reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER; + } +} + +bool ATNConfig::operator==(const ATNConfig &other) const { + return state->stateNumber == other.state->stateNumber && alt == other.alt && + ((context == other.context) || (*context == *other.context)) && + *semanticContext == *other.semanticContext && + isPrecedenceFilterSuppressed() == other.isPrecedenceFilterSuppressed(); +} + +std::string ATNConfig::toString() const { + return toString(true); +} + +std::string ATNConfig::toString(bool showAlt) const { + std::stringstream ss; + ss << "("; + + ss << state->toString(); + if (showAlt) { + ss << "," << alt; + } + if (context) { + ss << ",[" << context->toString() << "]"; + } + if (semanticContext != nullptr && semanticContext != SemanticContext::Empty::Instance) { + ss << ",[" << semanticContext->toString() << "]"; + } + if (getOuterContextDepth() > 0) { + ss << ",up=" << getOuterContextDepth(); + } + ss << ")"; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h new file mode 100644 index 0000000000..1d2e7ae163 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> + +#include "antlr4-common.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A tuple: (ATN state, predicted alt, syntactic, semantic context). + /// The syntactic context is a graph-structured stack node whose + /// path(s) to the root is the rule invocation(s) + /// chain used to arrive at the state. The semantic context is + /// the tree of semantic predicates encountered before reaching + /// an ATN state. + /// </summary> + class ANTLR4CPP_PUBLIC ATNConfig { + public: + struct Hasher + { + size_t operator()(Ref<ATNConfig> const& k) const { + return k->hashCode(); + } + + size_t operator()(ATNConfig const& k) const { + return k.hashCode(); + } + }; + + struct Comparer { + bool operator()(Ref<ATNConfig> const& lhs, Ref<ATNConfig> const& rhs) const { + return (lhs == rhs) || (*lhs == *rhs); + } + + bool operator()(ATNConfig const& lhs, ATNConfig const& rhs) const { + return (&lhs == &rhs) || (lhs == rhs); + } + }; + + using Set = std::unordered_set<Ref<ATNConfig>, Hasher, Comparer>; + + /// The ATN state associated with this configuration. + ATNState *state = nullptr; + + /// What alt (or lexer rule) is predicted by this configuration. + const size_t alt = 0; + + /// The stack of invoking states leading to the rule/states associated + /// with this config. We track only those contexts pushed during + /// execution of the ATN simulator. + /// + /// Can be shared between multiple ANTConfig instances. + Ref<const PredictionContext> context; + + /** + * We cannot execute predicates dependent upon local context unless + * we know for sure we are in the correct context. Because there is + * no way to do this efficiently, we simply cannot evaluate + * dependent predicates unless we are in the rule that initially + * invokes the ATN simulator. + * + * <p> + * closure() tracks the depth of how far we dip into the outer context: + * depth > 0. Note that it may not be totally accurate depth since I + * don't ever decrement. TODO: make it a boolean then</p> + * + * <p> + * For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + * is also backed by this field. Since the field is publicly accessible, the + * highest bit which would not cause the value to become negative is used to + * store this field. This choice minimizes the risk that code which only + * compares this value to 0 would be affected by the new purpose of the + * flag. It also ensures the performance of the existing {@link ATNConfig} + * constructors as well as certain operations like + * {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + * <em>completely</em> unaffected by the change.</p> + */ + size_t reachesIntoOuterContext = 0; + + /// Can be shared between multiple ATNConfig instances. + Ref<const SemanticContext> semanticContext; + + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context); + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext); + + ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext); + ATNConfig(ATNConfig const& other, ATNState *state); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext); + + ATNConfig(ATNConfig const&) = default; + + ATNConfig(ATNConfig&&) = default; + + virtual ~ATNConfig() = default; + + virtual size_t hashCode() const; + + /** + * This method gets the value of the {@link #reachesIntoOuterContext} field + * as it existed prior to the introduction of the + * {@link #isPrecedenceFilterSuppressed} method. + */ + size_t getOuterContextDepth() const; + bool isPrecedenceFilterSuppressed() const; + void setPrecedenceFilterSuppressed(bool value); + + /// An ATN configuration is equal to another if both have + /// the same state, they predict the same alternative, and + /// syntactic/semantic contexts are the same. + bool operator==(const ATNConfig &other) const; + bool operator!=(const ATNConfig &other) const; + + virtual std::string toString() const; + std::string toString(bool showAlt) const; + + private: + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext); + }; + +} // namespace atn +} // namespace antlr4 + + +// Hash function for ATNConfig. + +namespace std { + using antlr4::atn::ATNConfig; + + template <> struct hash<ATNConfig> + { + size_t operator() (const ATNConfig &x) const + { + return x.hashCode(); + } + }; + + template <> struct hash<std::vector<Ref<ATNConfig>>> + { + size_t operator() (const std::vector<Ref<ATNConfig>> &vector) const + { + std::size_t seed = 0; + for (const auto &config : vector) { + seed ^= config->hashCode() + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp new file mode 100644 index 0000000000..4ebdf8882b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp @@ -0,0 +1,232 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "atn/ATNSimulator.h" +#include "Exceptions.h" +#include "atn/SemanticContext.h" +#include "support/Arrays.h" + +#include "atn/ATNConfigSet.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + +} + +ATNConfigSet::ATNConfigSet() : ATNConfigSet(true) {} + +ATNConfigSet::ATNConfigSet(const ATNConfigSet &other) + : fullCtx(other.fullCtx), _configLookup(other._configLookup.bucket_count(), ATNConfigHasher{this}, ATNConfigComparer{this}) { + addAll(other); + uniqueAlt = other.uniqueAlt; + conflictingAlts = other.conflictingAlts; + hasSemanticContext = other.hasSemanticContext; + dipsIntoOuterContext = other.dipsIntoOuterContext; +} + +ATNConfigSet::ATNConfigSet(bool fullCtx) + : fullCtx(fullCtx), _configLookup(0, ATNConfigHasher{this}, ATNConfigComparer{this}) {} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config) { + return add(config, nullptr); +} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache) { + assert(config); + + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (config->semanticContext != SemanticContext::Empty::Instance) { + hasSemanticContext = true; + } + if (config->getOuterContextDepth() > 0) { + dipsIntoOuterContext = true; + } + + auto existing = _configLookup.find(config.get()); + if (existing == _configLookup.end()) { + _configLookup.insert(config.get()); + _cachedHashCode = 0; + configs.push_back(config); // track order here + + return true; + } + + // a previous (s,i,pi,_), merge with it and save result + bool rootIsWildcard = !fullCtx; + Ref<const PredictionContext> merged = PredictionContext::merge((*existing)->context, config->context, rootIsWildcard, mergeCache); + // no need to check for existing.context, config.context in cache + // since only way to create new graphs is "call rule" and here. We + // cache at both places. + (*existing)->reachesIntoOuterContext = std::max((*existing)->reachesIntoOuterContext, config->reachesIntoOuterContext); + + // make sure to preserve the precedence filter suppression during the merge + if (config->isPrecedenceFilterSuppressed()) { + (*existing)->setPrecedenceFilterSuppressed(true); + } + + (*existing)->context = std::move(merged); // replace context; no need to alt mapping + + return true; +} + +bool ATNConfigSet::addAll(const ATNConfigSet &other) { + for (const auto &c : other.configs) { + add(c); + } + return false; +} + +std::vector<ATNState*> ATNConfigSet::getStates() const { + std::vector<ATNState*> states; + states.reserve(configs.size()); + for (const auto &c : configs) { + states.push_back(c->state); + } + return states; +} + +/** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + +BitSet ATNConfigSet::getAlts() const { + BitSet alts; + for (const auto &config : configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<Ref<const SemanticContext>> ATNConfigSet::getPredicates() const { + std::vector<Ref<const SemanticContext>> preds; + preds.reserve(configs.size()); + for (const auto &c : configs) { + if (c->semanticContext != SemanticContext::Empty::Instance) { + preds.push_back(c->semanticContext); + } + } + return preds; +} + +const Ref<ATNConfig>& ATNConfigSet::get(size_t i) const { + return configs[i]; +} + +void ATNConfigSet::optimizeConfigs(ATNSimulator *interpreter) { + assert(interpreter); + + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (_configLookup.empty()) + return; + + for (const auto &config : configs) { + config->context = interpreter->getCachedContext(config->context); + } +} + +bool ATNConfigSet::equals(const ATNConfigSet &other) const { + if (&other == this) { + return true; + } + + if (configs.size() != other.configs.size()) + return false; + + if (fullCtx != other.fullCtx || uniqueAlt != other.uniqueAlt || + conflictingAlts != other.conflictingAlts || hasSemanticContext != other.hasSemanticContext || + dipsIntoOuterContext != other.dipsIntoOuterContext) // includes stack context + return false; + + return Arrays::equals(configs, other.configs); +} + +size_t ATNConfigSet::hashCode() const { + size_t cachedHashCode = _cachedHashCode.load(std::memory_order_relaxed); + if (!isReadonly() || cachedHashCode == 0) { + cachedHashCode = 1; + for (const auto &i : configs) { + cachedHashCode = 31 * cachedHashCode + i->hashCode(); // Same as Java's list hashCode impl. + } + _cachedHashCode.store(cachedHashCode, std::memory_order_relaxed); + } + return cachedHashCode; +} + +size_t ATNConfigSet::size() const { + return configs.size(); +} + +bool ATNConfigSet::isEmpty() const { + return configs.empty(); +} + +void ATNConfigSet::clear() { + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + configs.clear(); + _cachedHashCode = 0; + _configLookup.clear(); +} + +bool ATNConfigSet::isReadonly() const { + return _readonly; +} + +void ATNConfigSet::setReadonly(bool readonly) { + _readonly = readonly; + LookupContainer(0, ATNConfigHasher{this}, ATNConfigComparer{this}).swap(_configLookup); +} + +std::string ATNConfigSet::toString() const { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < configs.size(); i++) { + ss << configs[i]->toString(); + } + ss << "]"; + + if (hasSemanticContext) { + ss << ",hasSemanticContext = " << hasSemanticContext; + } + if (uniqueAlt != ATN::INVALID_ALT_NUMBER) { + ss << ",uniqueAlt = " << uniqueAlt; + } + + if (conflictingAlts.size() > 0) { + ss << ",conflictingAlts = "; + ss << conflictingAlts.toString(); + } + + if (dipsIntoOuterContext) { + ss << ", dipsIntoOuterContext"; + } + return ss.str(); +} + +size_t ATNConfigSet::hashCode(const ATNConfig &other) const { + size_t hashCode = 7; + hashCode = 31 * hashCode + other.state->stateNumber; + hashCode = 31 * hashCode + other.alt; + hashCode = 31 * hashCode + other.semanticContext->hashCode(); + return hashCode; +} + +bool ATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const { + return lhs.state->stateNumber == rhs.state->stateNumber && lhs.alt == rhs.alt && *lhs.semanticContext == *rhs.semanticContext; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h new file mode 100644 index 0000000000..d147f183a0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> + +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "FlatHashSet.h" + +namespace antlr4 { +namespace atn { + + /// Specialized set that can track info about the set, with support for combining similar configurations using a + /// graph-structured stack. + class ANTLR4CPP_PUBLIC ATNConfigSet { + public: + /// Track the elements as they are added to the set; supports get(i) + std::vector<Ref<ATNConfig>> configs; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + size_t uniqueAlt = 0; + + /** Currently this is only used when we detect SLL conflict; this does + * not necessarily represent the ambiguous alternatives. In fact, + * I should also point out that this seems to include predicated alternatives + * that have predicates that evaluate to false. Computed in computeTargetState(). + */ + antlrcpp::BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext = false; + bool dipsIntoOuterContext = false; + + /// Indicates that this configuration set is part of a full context + /// LL prediction. It will be used to determine how to merge $. With SLL + /// it's a wildcard whereas it is not for LL context merge. + const bool fullCtx = true; + + ATNConfigSet(); + + ATNConfigSet(const ATNConfigSet &other); + + ATNConfigSet(ATNConfigSet&&) = delete; + + explicit ATNConfigSet(bool fullCtx); + + virtual ~ATNConfigSet() = default; + + bool add(const Ref<ATNConfig> &config); + + /// <summary> + /// Adding a new config means merging contexts with existing configs for + /// {@code (s, i, pi, _)}, where {@code s} is the + /// <seealso cref="ATNConfig#state"/>, {@code i} is the <seealso cref="ATNConfig#alt"/>, and + /// {@code pi} is the <seealso cref="ATNConfig#semanticContext"/>. We use + /// {@code (s,i,pi)} as key. + /// <p/> + /// This method updates <seealso cref="#dipsIntoOuterContext"/> and + /// <seealso cref="#hasSemanticContext"/> when necessary. + /// </summary> + bool add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache); + + bool addAll(const ATNConfigSet &other); + + std::vector<ATNState*> getStates() const; + + /** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + antlrcpp::BitSet getAlts() const; + std::vector<Ref<const SemanticContext>> getPredicates() const; + + const Ref<ATNConfig>& get(size_t i) const; + + void optimizeConfigs(ATNSimulator *interpreter); + + size_t size() const; + bool isEmpty() const; + void clear(); + bool isReadonly() const; + void setReadonly(bool readonly); + + virtual size_t hashCode() const; + + virtual bool equals(const ATNConfigSet &other) const; + + virtual std::string toString() const; + + private: + struct ATNConfigHasher final { + const ATNConfigSet* atnConfigSet; + + size_t operator()(const ATNConfig *other) const { + assert(other != nullptr); + return atnConfigSet->hashCode(*other); + } + }; + + struct ATNConfigComparer final { + const ATNConfigSet* atnConfigSet; + + bool operator()(const ATNConfig *lhs, const ATNConfig *rhs) const { + assert(lhs != nullptr); + assert(rhs != nullptr); + return atnConfigSet->equals(*lhs, *rhs); + } + }; + + mutable std::atomic<size_t> _cachedHashCode = 0; + + /// Indicates that the set of configurations is read-only. Do not + /// allow any code to manipulate the set; DFA states will point at + /// the sets and they must not change. This does not protect the other + /// fields; in particular, conflictingAlts is set after + /// we've made this readonly. + bool _readonly = false; + + virtual size_t hashCode(const ATNConfig &atnConfig) const; + + virtual bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const; + + using LookupContainer = FlatHashSet<ATNConfig*, ATNConfigHasher, ATNConfigComparer>; + + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + LookupContainer _configLookup; + }; + + inline bool operator==(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return lhs.equals(rhs); } + + inline bool operator!=(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return !operator==(lhs, rhs); } + +} // namespace atn +} // namespace antlr4 + +namespace std { + +template <> +struct hash<::antlr4::atn::ATNConfigSet> { + size_t operator()(const ::antlr4::atn::ATNConfigSet &atnConfigSet) const { + return atnConfigSet.hashCode(); + } +}; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp new file mode 100644 index 0000000000..e0a7cb2b27 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" +#include "Exceptions.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ATNDeserializationOptions::ATNDeserializationOptions(ATNDeserializationOptions *options) + : _readOnly(false), _verifyATN(options->_verifyATN), + _generateRuleBypassTransitions(options->_generateRuleBypassTransitions) {} + +const ATNDeserializationOptions& ATNDeserializationOptions::getDefaultOptions() { + static const ATNDeserializationOptions* const defaultOptions = new ATNDeserializationOptions(); + return *defaultOptions; +} + +void ATNDeserializationOptions::makeReadOnly() { + _readOnly = true; +} + +void ATNDeserializationOptions::setVerifyATN(bool verify) { + throwIfReadOnly(); + _verifyATN = verify; +} + +void ATNDeserializationOptions::setGenerateRuleBypassTransitions(bool generate) { + throwIfReadOnly(); + _generateRuleBypassTransitions = generate; +} + +void ATNDeserializationOptions::throwIfReadOnly() const { + if (isReadOnly()) { + throw IllegalStateException("ATNDeserializationOptions is read only."); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h new file mode 100644 index 0000000000..595f918649 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + +class ANTLR4CPP_PUBLIC ATNDeserializationOptions final { +public: + ATNDeserializationOptions() + : _readOnly(false), _verifyATN(true), _generateRuleBypassTransitions(false) {} + + // TODO: Is this useful? If so we should mark it as explicit, otherwise remove it. + ATNDeserializationOptions(ATNDeserializationOptions *options); + + ATNDeserializationOptions(const ATNDeserializationOptions&) = default; + + ATNDeserializationOptions& operator=(const ATNDeserializationOptions&) = default; + + static const ATNDeserializationOptions& getDefaultOptions(); + + bool isReadOnly() const { return _readOnly; } + + void makeReadOnly(); + + bool isVerifyATN() const { return _verifyATN; } + + void setVerifyATN(bool verify); + + bool isGenerateRuleBypassTransitions() const { return _generateRuleBypassTransitions; } + + void setGenerateRuleBypassTransitions(bool generate); + +private: + void throwIfReadOnly() const; + + bool _readOnly; + bool _verifyATN; + bool _generateRuleBypassTransitions; +}; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp new file mode 100644 index 0000000000..2da3c32357 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp @@ -0,0 +1,628 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" + +#include "atn/ATNType.h" +#include "atn/ATNState.h" +#include "atn/ATN.h" + +#include "atn/LoopEndState.h" +#include "atn/DecisionState.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/TokensStartState.h" +#include "atn/RuleTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PlusBlockStartState.h" +#include "atn/StarLoopbackState.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/StarLoopEntryState.h" + +#include "atn/AtomTransition.h" +#include "atn/StarBlockStartState.h" +#include "atn/RangeTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/SetTransition.h" +#include "atn/NotSetTransition.h" +#include "atn/WildcardTransition.h" +#include "atn/TransitionType.h" +#include "Token.h" + +#include "misc/IntervalSet.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerCustomAction.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" + +#include "atn/ATNDeserializer.h" + +#include <cassert> +#include <string> +#include <vector> + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + void checkCondition(bool condition, std::string_view message) { + if (!condition) { + throw IllegalStateException(std::string(message)); + } + } + + void checkCondition(bool condition) { + checkCondition(condition, ""); + } + + /** + * Analyze the {@link StarLoopEntryState} states in the specified ATN to set + * the {@link StarLoopEntryState#isPrecedenceDecision} field to the + * correct value. + * + * @param atn The ATN. + */ + void markPrecedenceDecisions(const ATN &atn) { + for (ATNState *state : atn.states) { + if (!StarLoopEntryState::is(state)) { + continue; + } + + /* We analyze the ATN to determine if this ATN decision state is the + * decision for the closure block that determines whether a + * precedence rule should continue or complete. + */ + if (atn.ruleToStartState[state->ruleIndex]->isLeftRecursiveRule) { + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (LoopEndState::is(maybeLoopEndState)) { + if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) { + downCast<StarLoopEntryState*>(state)->isPrecedenceDecision = true; + } + } + } + } + } + + Ref<const LexerAction> lexerActionFactory(LexerActionType type, int data1, int data2) { + switch (type) { + case LexerActionType::CHANNEL: + return std::make_shared<LexerChannelAction>(data1); + + case LexerActionType::CUSTOM: + return std::make_shared<LexerCustomAction>(data1, data2); + + case LexerActionType::MODE: + return std::make_shared< LexerModeAction>(data1); + + case LexerActionType::MORE: + return LexerMoreAction::getInstance(); + + case LexerActionType::POP_MODE: + return LexerPopModeAction::getInstance(); + + case LexerActionType::PUSH_MODE: + return std::make_shared<LexerPushModeAction>(data1); + + case LexerActionType::SKIP: + return LexerSkipAction::getInstance(); + + case LexerActionType::TYPE: + return std::make_shared<LexerTypeAction>(data1); + + default: + throw IllegalArgumentException("The specified lexer action type " + std::to_string(static_cast<size_t>(type)) + + " is not valid."); + } + } + + ConstTransitionPtr edgeFactory(const ATN &atn, TransitionType type, size_t trg, size_t arg1, size_t arg2, + size_t arg3, const std::vector<misc::IntervalSet> &sets) { + ATNState *target = atn.states[trg]; + switch (type) { + case TransitionType::EPSILON: + return std::make_unique<EpsilonTransition>(target); + case TransitionType::RANGE: + if (arg3 != 0) { + return std::make_unique<RangeTransition>(target, Token::EOF, arg2); + } else { + return std::make_unique<RangeTransition>(target, arg1, arg2); + } + case TransitionType::RULE: + return std::make_unique<RuleTransition>(downCast<RuleStartState*>(atn.states[arg1]), arg2, (int)arg3, target); + case TransitionType::PREDICATE: + return std::make_unique<PredicateTransition>(target, arg1, arg2, arg3 != 0); + case TransitionType::PRECEDENCE: + return std::make_unique<PrecedencePredicateTransition>(target, (int)arg1); + case TransitionType::ATOM: + if (arg3 != 0) { + return std::make_unique<AtomTransition>(target, Token::EOF); + } else { + return std::make_unique<AtomTransition>(target, arg1); + } + case TransitionType::ACTION: + return std::make_unique<ActionTransition>(target, arg1, arg2, arg3 != 0); + case TransitionType::SET: + return std::make_unique<SetTransition>(target, sets[arg1]); + case TransitionType::NOT_SET: + return std::make_unique<NotSetTransition>(target, sets[arg1]); + case TransitionType::WILDCARD: + return std::make_unique<WildcardTransition>(target); + } + + throw IllegalArgumentException("The specified transition type is not valid."); + } + + /* mem check: all created instances are freed in the d-tor of the ATN. */ + ATNState* stateFactory(ATNStateType type, size_t ruleIndex) { + ATNState *s; + switch (type) { + case ATNStateType::INVALID: + return nullptr; + case ATNStateType::BASIC : + s = new BasicState(); + break; + case ATNStateType::RULE_START : + s = new RuleStartState(); + break; + case ATNStateType::BLOCK_START : + s = new BasicBlockStartState(); + break; + case ATNStateType::PLUS_BLOCK_START : + s = new PlusBlockStartState(); + break; + case ATNStateType::STAR_BLOCK_START : + s = new StarBlockStartState(); + break; + case ATNStateType::TOKEN_START : + s = new TokensStartState(); + break; + case ATNStateType::RULE_STOP : + s = new RuleStopState(); + break; + case ATNStateType::BLOCK_END : + s = new BlockEndState(); + break; + case ATNStateType::STAR_LOOP_BACK : + s = new StarLoopbackState(); + break; + case ATNStateType::STAR_LOOP_ENTRY : + s = new StarLoopEntryState(); + break; + case ATNStateType::PLUS_LOOP_BACK : + s = new PlusLoopbackState(); + break; + case ATNStateType::LOOP_END : + s = new LoopEndState(); + break; + default : + std::string message = "The specified state type " + std::to_string(static_cast<size_t>(type)) + " is not valid."; + throw IllegalArgumentException(message); + } + assert(s->getStateType() == type); + s->ruleIndex = ruleIndex; + return s; + } + + ssize_t readUnicodeInt32(SerializedATNView data, int& p) { + return static_cast<ssize_t>(data[p++]); + } + + void deserializeSets( + SerializedATNView data, + int& p, + std::vector<misc::IntervalSet>& sets) { + size_t nsets = data[p++]; + sets.reserve(sets.size() + nsets); + for (size_t i = 0; i < nsets; i++) { + size_t nintervals = data[p++]; + misc::IntervalSet set; + + bool containsEof = data[p++] != 0; + if (containsEof) { + set.add(-1); + } + + for (size_t j = 0; j < nintervals; j++) { + auto a = readUnicodeInt32(data, p); + auto b = readUnicodeInt32(data, p); + set.add(a, b); + } + sets.push_back(set); + } + } + +} + +ATNDeserializer::ATNDeserializer() : ATNDeserializer(ATNDeserializationOptions::getDefaultOptions()) {} + +ATNDeserializer::ATNDeserializer(ATNDeserializationOptions deserializationOptions) : _deserializationOptions(std::move(deserializationOptions)) {} + +std::unique_ptr<ATN> ATNDeserializer::deserialize(SerializedATNView data) const { + int p = 0; + int version = data[p++]; + if (version != SERIALIZED_VERSION) { + std::string reason = "Could not deserialize ATN with version" + std::to_string(version) + "(expected " + std::to_string(SERIALIZED_VERSION) + ")."; + + throw UnsupportedOperationException(reason); + } + + ATNType grammarType = (ATNType)data[p++]; + size_t maxTokenType = data[p++]; + auto atn = std::make_unique<ATN>(grammarType, maxTokenType); + + // + // STATES + // + { + std::vector<std::pair<LoopEndState*, size_t>> loopBackStateNumbers; + std::vector<std::pair<BlockStartState*, size_t>> endStateNumbers; + size_t nstates = data[p++]; + atn->states.reserve(nstates); + loopBackStateNumbers.reserve(nstates); // Reserve worst case size, its short lived. + endStateNumbers.reserve(nstates); // Reserve worst case size, its short lived. + for (size_t i = 0; i < nstates; i++) { + ATNStateType stype = static_cast<ATNStateType>(data[p++]); + // ignore bad type of states + if (stype == ATNStateType::INVALID) { + atn->addState(nullptr); + continue; + } + + size_t ruleIndex = data[p++]; + ATNState *s = stateFactory(stype, ruleIndex); + if (stype == ATNStateType::LOOP_END) { // special case + int loopBackStateNumber = data[p++]; + loopBackStateNumbers.push_back({ downCast<LoopEndState*>(s), loopBackStateNumber }); + } else if (BlockStartState::is(s)) { + int endStateNumber = data[p++]; + endStateNumbers.push_back({ downCast<BlockStartState*>(s), endStateNumber }); + } + atn->addState(s); + } + + // delay the assignment of loop back and end states until we know all the state instances have been initialized + for (auto &pair : loopBackStateNumbers) { + pair.first->loopBackState = atn->states[pair.second]; + } + + for (auto &pair : endStateNumbers) { + pair.first->endState = downCast<BlockEndState*>(atn->states[pair.second]); + } + } + + size_t numNonGreedyStates = data[p++]; + for (size_t i = 0; i < numNonGreedyStates; i++) { + size_t stateNumber = data[p++]; + // The serialized ATN must be specifying the right states, so that the + // cast below is correct. + downCast<DecisionState*>(atn->states[stateNumber])->nonGreedy = true; + } + + size_t numPrecedenceStates = data[p++]; + for (size_t i = 0; i < numPrecedenceStates; i++) { + size_t stateNumber = data[p++]; + downCast<RuleStartState*>(atn->states[stateNumber])->isLeftRecursiveRule = true; + } + + // + // RULES + // + size_t nrules = data[p++]; + atn->ruleToStartState.reserve(nrules); + for (size_t i = 0; i < nrules; i++) { + size_t s = data[p++]; + // Also here, the serialized atn must ensure to point to the correct class type. + RuleStartState *startState = downCast<RuleStartState*>(atn->states[s]); + atn->ruleToStartState.push_back(startState); + if (atn->grammarType == ATNType::LEXER) { + size_t tokenType = data[p++]; + atn->ruleToTokenType.push_back(tokenType); + } + } + + atn->ruleToStopState.resize(nrules); + for (ATNState *state : atn->states) { + if (!RuleStopState::is(state)) { + continue; + } + + RuleStopState *stopState = downCast<RuleStopState*>(state); + atn->ruleToStopState[state->ruleIndex] = stopState; + atn->ruleToStartState[state->ruleIndex]->stopState = stopState; + } + + // + // MODES + // + size_t nmodes = data[p++]; + atn->modeToStartState.reserve(nmodes); + for (size_t i = 0; i < nmodes; i++) { + size_t s = data[p++]; + atn->modeToStartState.push_back(downCast<TokensStartState*>(atn->states[s])); + } + + // + // SETS + // + { + std::vector<misc::IntervalSet> sets; + + deserializeSets(data, p, sets); + sets.shrink_to_fit(); + + // + // EDGES + // + int nedges = data[p++]; + for (int i = 0; i < nedges; i++) { + size_t src = data[p]; + size_t trg = data[p + 1]; + TransitionType ttype = static_cast<TransitionType>(data[p + 2]); + size_t arg1 = data[p + 3]; + size_t arg2 = data[p + 4]; + size_t arg3 = data[p + 5]; + ConstTransitionPtr trans = edgeFactory(*atn, ttype, trg, arg1, arg2, arg3, sets); + ATNState *srcState = atn->states[src]; + srcState->addTransition(std::move(trans)); + p += 6; + } + } + // edges for rule stop states can be derived, so they aren't serialized + for (ATNState *state : atn->states) { + for (size_t i = 0; i < state->transitions.size(); i++) { + const Transition *t = state->transitions[i].get(); + if (!RuleTransition::is(t)) { + continue; + } + + const RuleTransition *ruleTransition = downCast<const RuleTransition*>(t); + size_t outermostPrecedenceReturn = INVALID_INDEX; + if (atn->ruleToStartState[ruleTransition->target->ruleIndex]->isLeftRecursiveRule) { + if (ruleTransition->precedence == 0) { + outermostPrecedenceReturn = ruleTransition->target->ruleIndex; + } + } + + ConstTransitionPtr returnTransition = std::make_unique<EpsilonTransition>(ruleTransition->followState, outermostPrecedenceReturn); + atn->ruleToStopState[ruleTransition->target->ruleIndex]->addTransition(std::move(returnTransition)); + } + } + + for (ATNState *state : atn->states) { + if (BlockStartState::is(state)) { + BlockStartState *startState = downCast<BlockStartState*>(state); + + // we need to know the end state to set its start state + if (startState->endState == nullptr) { + throw IllegalStateException(); + } + + // block end states can only be associated to a single block start state + if (startState->endState->startState != nullptr) { + throw IllegalStateException(); + } + + startState->endState->startState = downCast<BlockStartState*>(state); + } + + if (PlusLoopbackState::is(state)) { + PlusLoopbackState *loopbackState = downCast<PlusLoopbackState*>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (PlusBlockStartState::is(target)) { + (downCast<PlusBlockStartState*>(target))->loopBackState = loopbackState; + } + } + } else if (StarLoopbackState::is(state)) { + StarLoopbackState *loopbackState = downCast<StarLoopbackState*>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (StarLoopEntryState::is(target)) { + downCast<StarLoopEntryState*>(target)->loopBackState = loopbackState; + } + } + } + } + + // + // DECISIONS + // + size_t ndecisions = data[p++]; + atn->decisionToState.reserve(ndecisions); + for (size_t i = 0; i < ndecisions; i++) { + size_t s = data[p++]; + DecisionState *decState = downCast<DecisionState*>(atn->states[s]); + if (decState == nullptr) + throw IllegalStateException(); + + atn->decisionToState.push_back(decState); + decState->decision = static_cast<int>(i); + } + + // + // LEXER ACTIONS + // + if (atn->grammarType == ATNType::LEXER) { + atn->lexerActions.resize(data[p++]); + for (size_t i = 0; i < atn->lexerActions.size(); i++) { + LexerActionType actionType = static_cast<LexerActionType>(data[p++]); + int data1 = data[p++]; + int data2 = data[p++]; + atn->lexerActions[i] = lexerActionFactory(actionType, data1, data2); + } + } + + markPrecedenceDecisions(*atn); + + if (_deserializationOptions.isVerifyATN()) { + verifyATN(*atn); + } + + if (_deserializationOptions.isGenerateRuleBypassTransitions() && atn->grammarType == ATNType::PARSER) { + atn->ruleToTokenType.resize(atn->ruleToStartState.size()); + for (size_t i = 0; i < atn->ruleToStartState.size(); i++) { + atn->ruleToTokenType[i] = static_cast<int>(atn->maxTokenType + i + 1); + } + + for (std::vector<RuleStartState*>::size_type i = 0; i < atn->ruleToStartState.size(); i++) { + BasicBlockStartState *bypassStart = new BasicBlockStartState(); /* mem check: freed in ATN d-tor */ + bypassStart->ruleIndex = static_cast<int>(i); + atn->addState(bypassStart); + + BlockEndState *bypassStop = new BlockEndState(); /* mem check: freed in ATN d-tor */ + bypassStop->ruleIndex = static_cast<int>(i); + atn->addState(bypassStop); + + bypassStart->endState = bypassStop; + atn->defineDecisionState(bypassStart); + + bypassStop->startState = bypassStart; + + ATNState *endState; + const Transition *excludeTransition = nullptr; + if (atn->ruleToStartState[i]->isLeftRecursiveRule) { + // wrap from the beginning of the rule to the StarLoopEntryState + endState = nullptr; + for (ATNState *state : atn->states) { + if (state->ruleIndex != i) { + continue; + } + + if (!StarLoopEntryState::is(state)) { + continue; + } + + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (!LoopEndState::is(maybeLoopEndState)) { + continue; + } + + if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) { + endState = state; + break; + } + } + + if (endState == nullptr) { + throw UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); + + } + + excludeTransition = (static_cast<StarLoopEntryState*>(endState))->loopBackState->transitions[0].get(); + } else { + endState = atn->ruleToStopState[i]; + } + + // all non-excluded transitions that currently target end state need to target blockEnd instead + for (ATNState *state : atn->states) { + for (auto &transition : state->transitions) { + if (transition.get() == excludeTransition) { + continue; + } + + if (transition->target == endState) { + const_cast<Transition*>(transition.get())->target = bypassStop; + } + } + } + + // all transitions leaving the rule start state need to leave blockStart instead + while (atn->ruleToStartState[i]->transitions.size() > 0) { + ConstTransitionPtr transition = atn->ruleToStartState[i]->removeTransition(atn->ruleToStartState[i]->transitions.size() - 1); + bypassStart->addTransition(std::move(transition)); + } + + // link the new states + atn->ruleToStartState[i]->addTransition(std::make_unique<EpsilonTransition>(bypassStart)); + bypassStop->addTransition(std::make_unique<EpsilonTransition>(endState)); + + ATNState *matchState = new BasicState(); /* mem check: freed in ATN d-tor */ + atn->addState(matchState); + matchState->addTransition(std::make_unique<AtomTransition>(bypassStop, atn->ruleToTokenType[i])); + bypassStart->addTransition(std::make_unique<EpsilonTransition>(matchState)); + } + + if (_deserializationOptions.isVerifyATN()) { + // reverify after modification + verifyATN(*atn); + } + } + + return atn; +} + +void ATNDeserializer::verifyATN(const ATN &atn) const { + // verify assumptions + for (ATNState *state : atn.states) { + if (state == nullptr) { + continue; + } + + checkCondition(state->epsilonOnlyTransitions || state->transitions.size() <= 1); + + if (PlusBlockStartState::is(state)) { + checkCondition((downCast<PlusBlockStartState*>(state))->loopBackState != nullptr); + } + + if (StarLoopEntryState::is(state)) { + StarLoopEntryState *starLoopEntryState = downCast<StarLoopEntryState*>(state); + checkCondition(starLoopEntryState->loopBackState != nullptr); + checkCondition(starLoopEntryState->transitions.size() == 2); + + if (StarBlockStartState::is(starLoopEntryState->transitions[0]->target)) { + checkCondition(downCast<LoopEndState*>(starLoopEntryState->transitions[1]->target) != nullptr); + checkCondition(!starLoopEntryState->nonGreedy); + } else if (LoopEndState::is(starLoopEntryState->transitions[0]->target)) { + checkCondition(StarBlockStartState::is(starLoopEntryState->transitions[1]->target)); + checkCondition(starLoopEntryState->nonGreedy); + } else { + throw IllegalStateException(); + } + } + + if (StarLoopbackState::is(state)) { + checkCondition(state->transitions.size() == 1); + checkCondition(StarLoopEntryState::is(state->transitions[0]->target)); + } + + if (LoopEndState::is(state)) { + checkCondition((downCast<LoopEndState*>(state))->loopBackState != nullptr); + } + + if (RuleStartState::is(state)) { + checkCondition((downCast<RuleStartState*>(state))->stopState != nullptr); + } + + if (BlockStartState::is(state)) { + checkCondition((downCast<BlockStartState*>(state))->endState != nullptr); + } + + if (BlockEndState::is(state)) { + checkCondition((downCast<BlockEndState*>(state))->startState != nullptr); + } + + if (DecisionState::is(state)) { + DecisionState *decisionState = downCast<DecisionState*>(state); + checkCondition(decisionState->transitions.size() <= 1 || decisionState->decision >= 0); + } else { + checkCondition(state->transitions.size() <= 1 || RuleStopState::is(state)); + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h new file mode 100644 index 0000000000..3cd56b9cdf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNDeserializationOptions.h" +#include "atn/SerializedATNView.h" +#include "atn/LexerAction.h" +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializer final { + public: + static constexpr size_t SERIALIZED_VERSION = 4; + + ATNDeserializer(); + + explicit ATNDeserializer(ATNDeserializationOptions deserializationOptions); + + std::unique_ptr<ATN> deserialize(SerializedATNView input) const; + void verifyATN(const ATN &atn) const; + + private: + const ATNDeserializationOptions _deserializationOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp new file mode 100644 index 0000000000..04e1af992e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNSimulator.h" + +#include "atn/ATNConfigSet.h" +#include "atn/ATNDeserializer.h" +#include "atn/ATNType.h" +#include "dfa/DFAState.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlr4::atn; + +const Ref<DFAState> ATNSimulator::ERROR = std::make_shared<DFAState>(std::numeric_limits<int>::max()); + +ATNSimulator::ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache) + : atn(atn), _sharedContextCache(sharedContextCache) {} + +void ATNSimulator::clearDFA() { + throw UnsupportedOperationException("This ATN simulator does not support clearing the DFA."); +} + +PredictionContextCache& ATNSimulator::getSharedContextCache() const { + return _sharedContextCache; +} + +Ref<const PredictionContext> ATNSimulator::getCachedContext(const Ref<const PredictionContext> &context) { + // This function must only be called with an active state lock, as we are going to change a shared structure. + return PredictionContext::getCachedContext(context, getSharedContextCache()); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h new file mode 100644 index 0000000000..b14939e219 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATN.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSimulator { + public: + /// Must distinguish between missing edge and edge we know leads nowhere. + static const Ref<dfa::DFAState> ERROR; + const ATN &atn; + + ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache); + + virtual ~ATNSimulator() = default; + + virtual void reset() = 0; + + /** + * Clear the DFA cache used by the current instance. Since the DFA cache may + * be shared by multiple ATN simulators, this method may affect the + * performance (but not accuracy) of other parsers which are being used + * concurrently. + * + * @throws UnsupportedOperationException if the current instance does not + * support clearing the DFA. + * + * @since 4.3 + */ + virtual void clearDFA(); + + PredictionContextCache& getSharedContextCache() const; + Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context); + + protected: + /// <summary> + /// The context cache maps all PredictionContext objects that are equals() + /// to a single cached copy. This cache is shared across all contexts + /// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + /// to use only cached nodes/graphs in addDFAState(). We don't want to + /// fill this during closure() since there are lots of contexts that + /// pop up but are not used ever again. It also greatly slows down closure(). + /// <p/> + /// This cache makes a huge difference in memory and a little bit in speed. + /// For the Java grammar on java.*, it dropped the memory requirements + /// at the end from 25M to 16M. We don't store any of the full context + /// graphs in the DFA because they are limited to local context only, + /// but apparently there's a lot of repetition there as well. We optimize + /// the config contexts before storing the config set in the DFA states + /// by literally rebuilding them with cached subgraphs only. + /// <p/> + /// I tried a cache for use during closure operations, that was + /// whacked after each adaptivePredict(). It cost a little bit + /// more time I think and doesn't save on the overall footprint + /// so it's not worth the complexity. + /// </summary> + PredictionContextCache &_sharedContextCache; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp new file mode 100644 index 0000000000..29911901be --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/Transition.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +#include "atn/ATNState.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +size_t ATNState::hashCode() const { + return stateNumber; +} + +bool ATNState::equals(const ATNState &other) const { + return stateNumber == other.stateNumber; +} + +bool ATNState::isNonGreedyExitState() const { + return false; +} + +std::string ATNState::toString() const { + return std::to_string(stateNumber); +} + +void ATNState::addTransition(ConstTransitionPtr e) { + addTransition(transitions.size(), std::move(e)); +} + +void ATNState::addTransition(size_t index, ConstTransitionPtr e) { + for (const auto &transition : transitions) + if (transition->target->stateNumber == e->target->stateNumber) { + return; + } + + if (transitions.empty()) { + epsilonOnlyTransitions = e->isEpsilon(); + } else if (epsilonOnlyTransitions != e->isEpsilon()) { + std::cerr << "ATN state %d has both epsilon and non-epsilon transitions.\n" << stateNumber; + epsilonOnlyTransitions = false; + } + + transitions.insert(transitions.begin() + index, std::move(e)); +} + +ConstTransitionPtr ATNState::removeTransition(size_t index) { + ConstTransitionPtr result = std::move(transitions[index]); + transitions.erase(transitions.begin() + index); + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h new file mode 100644 index 0000000000..7613f40eee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h @@ -0,0 +1,139 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" +#include "atn/ATNStateType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// The following images show the relation of states and + /// <seealso cref="ATNState#transitions"/> for various grammar constructs. + /// + /// <ul> + /// + /// <li>Solid edges marked with an ε indicate a required + /// <seealso cref="EpsilonTransition"/>.</li> + /// + /// <li>Dashed edges indicate locations where any transition derived from + /// <seealso cref="Transition"/> might appear.</li> + /// + /// <li>Dashed nodes are place holders for either a sequence of linked + /// <seealso cref="BasicState"/> states or the inclusion of a block representing a nested + /// construct in one of the forms below.</li> + /// + /// <li>Nodes showing multiple outgoing alternatives with a {@code ...} support + /// any number of alternatives (one or more). Nodes without the {@code ...} only + /// support the exact number of alternatives shown in the diagram.</li> + /// + /// </ul> + /// + /// <h2>Basic Blocks</h2> + /// + /// <h3>Rule</h3> + /// + /// <embed src="images/Rule.svg" type="image/svg+xml"/> + /// + /// <h3>Block of 1 or more alternatives</h3> + /// + /// <embed src="images/Block.svg" type="image/svg+xml"/> + /// + /// <h2>Greedy Loops</h2> + /// + /// <h3>Greedy Closure: {@code (...)*}</h3> + /// + /// <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Positive Closure: {@code (...)+}</h3> + /// + /// <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Optional: {@code (...)?}</h3> + /// + /// <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/> + /// + /// <h2>Non-Greedy Loops</h2> + /// + /// <h3>Non-Greedy Closure: {@code (...)*?}</h3> + /// + /// <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3> + /// + /// <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Optional: {@code (...)??}</h3> + /// + /// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/> + /// </summary> + +// GCC generates a warning here if ATN has already been declared due to the +// attributes added by ANTLR4CPP_PUBLIC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159 +// Only forward-declare if it hasn't already been declared. +#ifndef ANTLR4CPP_ATN_DECLARED + class ANTLR4CPP_PUBLIC ATN; +#endif + + class ANTLR4CPP_PUBLIC ATNState { + public: + static constexpr size_t INITIAL_NUM_TRANSITIONS = 4; + static constexpr size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max(); + + size_t stateNumber = INVALID_STATE_NUMBER; + size_t ruleIndex = 0; // at runtime, we don't have Rule objects + bool epsilonOnlyTransitions = false; + + /// Track the transitions emanating from this ATN state. + std::vector<ConstTransitionPtr> transitions; + + ATNState() = delete; + + ATNState(ATNState const&) = delete; + + ATNState(ATNState&&) = delete; + + virtual ~ATNState() = default; + + ATNState& operator=(ATNState const&) = delete; + + ATNState& operator=(ATNState&&) = delete; + + void addTransition(ConstTransitionPtr e); + void addTransition(size_t index, ConstTransitionPtr e); + ConstTransitionPtr removeTransition(size_t index); + + virtual size_t hashCode() const; + virtual bool equals(const ATNState &other) const; + + virtual bool isNonGreedyExitState() const; + virtual std::string toString() const; + + ATNStateType getStateType() const { return _stateType; } + + protected: + explicit ATNState(ATNStateType stateType) : _stateType(stateType) {} + + private: + /// Used to cache lookahead during parsing, not used during construction. + + misc::IntervalSet _nextTokenWithinRule; + std::atomic<bool> _nextTokenUpdated { false }; + + const ATNStateType _stateType; + + friend class ATN; + }; + + inline bool operator==(const ATNState &lhs, const ATNState &rhs) { return lhs.equals(rhs); } + + inline bool operator!=(const ATNState &lhs, const ATNState &rhs) { return !operator==(lhs, rhs); } + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp new file mode 100644 index 0000000000..577e2af87c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp @@ -0,0 +1,33 @@ +#include "atn/ATNStateType.h" + +std::string antlr4::atn::atnStateTypeName(ATNStateType atnStateType) { + switch (atnStateType) { + case ATNStateType::INVALID: + return "INVALID"; + case ATNStateType::BASIC: + return "BASIC"; + case ATNStateType::RULE_START: + return "RULE_START"; + case ATNStateType::BLOCK_START: + return "BLOCK_START"; + case ATNStateType::PLUS_BLOCK_START: + return "PLUS_BLOCK_START"; + case ATNStateType::STAR_BLOCK_START: + return "STAR_BLOCK_START"; + case ATNStateType::TOKEN_START: + return "TOKEN_START"; + case ATNStateType::RULE_STOP: + return "RULE_STOP"; + case ATNStateType::BLOCK_END: + return "BLOCK_END"; + case ATNStateType::STAR_LOOP_BACK: + return "STAR_LOOP_BACK"; + case ATNStateType::STAR_LOOP_ENTRY: + return "STAR_LOOP_ENTRY"; + case ATNStateType::PLUS_LOOP_BACK: + return "PLUS_LOOP_BACK"; + case ATNStateType::LOOP_END: + return "LOOP_END"; + } + return "UNKNOWN"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h new file mode 100644 index 0000000000..e19b2cce92 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <string> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + // Constants for ATNState serialization. + enum class ATNStateType : size_t { + INVALID = 0, + BASIC = 1, + RULE_START = 2, + BLOCK_START = 3, + PLUS_BLOCK_START = 4, + STAR_BLOCK_START = 5, + TOKEN_START = 6, + RULE_STOP = 7, + BLOCK_END = 8, + STAR_LOOP_BACK = 9, + STAR_LOOP_ENTRY = 10, + PLUS_LOOP_BACK = 11, + LOOP_END = 12, + }; + + ANTLR4CPP_PUBLIC std::string atnStateTypeName(ATNStateType atnStateType); + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h new file mode 100644 index 0000000000..3530ef6051 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// Represents the type of recognizer an ATN applies to. + enum class ATNType { + LEXER = 0, + PARSER = 1, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp new file mode 100644 index 0000000000..1886b7e169 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ActionTransition.h" + +using namespace antlr4::atn; + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex) + : Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(INVALID_INDEX), isCtxDependent(false) { +} + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent) + : Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(actionIndex), isCtxDependent(isCtxDependent) { +} + +bool ActionTransition::isEpsilon() const { + return true; // we are to be ignored by analysis 'cept for predicates +} + +bool ActionTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string ActionTransition::toString() const { + return " ACTION " + Transition::toString() + " { ruleIndex: " + std::to_string(ruleIndex) + ", actionIndex: " + + std::to_string(actionIndex) + ", isCtxDependent: " + std::to_string(isCtxDependent) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h new file mode 100644 index 0000000000..1700297a78 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ActionTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ACTION; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const size_t ruleIndex; + const size_t actionIndex; + const bool isCtxDependent; // e.g., $i ref in action + + ActionTransition(ATNState *target, size_t ruleIndex); + + ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent); + + virtual bool isEpsilon() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp new file mode 100644 index 0000000000..72ce922633 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/AmbiguityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +AmbiguityInfo::AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->ambigAlts = ambigAlts; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h new file mode 100644 index 0000000000..db594a1f48 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for an ambiguity. + /// Ambiguities are decisions where a particular input resulted in an SLL + /// conflict, followed by LL prediction also reaching a conflict state + /// (indicating a true ambiguity in the grammar). + /// + /// <para> + /// This event may be reported during SLL prediction in cases where the + /// conflicting SLL configuration set provides sufficient information to + /// determine that the SLL conflict is truly an ambiguity. For example, if none + /// of the ATN configurations in the conflicting SLL configuration set have + /// traversed a global follow transition (i.e. + /// <seealso cref="ATNConfig#reachesIntoOuterContext"/> is 0 for all configurations), then + /// the result of SLL prediction for that input is known to be equivalent to the + /// result of LL prediction for that input.</para> + /// + /// <para> + /// In some cases, the minimum represented alternative in the conflicting LL + /// configuration set is not equal to the minimum represented alternative in the + /// conflicting SLL configuration set. Grammars and inputs which result in this + /// scenario are unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means + /// they cannot use the two-stage parsing strategy to improve parsing performance + /// for that input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportAmbiguity </seealso> + /// <seealso cref= ANTLRErrorListener#reportAmbiguity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC AmbiguityInfo : public DecisionEventInfo { + public: + /// The set of alternative numbers for this decision event that lead to a valid parse. + antlrcpp::BitSet ambigAlts; + + /// <summary> + /// Constructs a new instance of the <seealso cref="AmbiguityInfo"/> class with the + /// specified detailed ambiguity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set identifying the ambiguous + /// alternatives for the current input </param> + /// <param name="ambigAlts"> The set of alternatives in the decision that lead to a valid parse. + /// The predicted alt is the min(ambigAlts) </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the ambiguity was identified during + /// prediction </param> + /// <param name="fullCtx"> {@code true} if the ambiguity was identified during LL + /// prediction; otherwise, {@code false} if the ambiguity was identified + /// during SLL prediction </param> + AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, TokenStream *input, + size_t startIndex, size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp new file mode 100644 index 0000000000..e9478001b4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp @@ -0,0 +1,109 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ArrayPredictionContext.h" + +#include <cstring> + +#include "atn/SingletonPredictionContext.h" +#include "misc/MurmurHash.h" +#include "support/Casts.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + + bool predictionContextEqual(const Ref<const PredictionContext> &lhs, const Ref<const PredictionContext> &rhs) { + return *lhs == *rhs; + } + +} + +ArrayPredictionContext::ArrayPredictionContext(const SingletonPredictionContext &predictionContext) + : ArrayPredictionContext({ predictionContext.parent }, { predictionContext.returnState }) {} + +ArrayPredictionContext::ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, + std::vector<size_t> returnStates) + : PredictionContext(PredictionContextType::ARRAY), parents(std::move(parents)), returnStates(std::move(returnStates)) { + assert(this->parents.size() > 0); + assert(this->returnStates.size() > 0); + assert(this->parents.size() == this->returnStates.size()); +} + +bool ArrayPredictionContext::isEmpty() const { + // Since EMPTY_RETURN_STATE can only appear in the last position, we don't need to verify that size == 1. + return returnStates[0] == EMPTY_RETURN_STATE; +} + +size_t ArrayPredictionContext::size() const { + return returnStates.size(); +} + +const Ref<const PredictionContext>& ArrayPredictionContext::getParent(size_t index) const { + return parents[index]; +} + +size_t ArrayPredictionContext::getReturnState(size_t index) const { + return returnStates[index]; +} + +size_t ArrayPredictionContext::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getContextType())); + for (const auto &parent : parents) { + hash = MurmurHash::update(hash, parent); + } + for (const auto &returnState : returnStates) { + hash = MurmurHash::update(hash, returnState); + } + return MurmurHash::finish(hash, 1 + parents.size() + returnStates.size()); +} + +bool ArrayPredictionContext::equals(const PredictionContext &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const auto &array = downCast<const ArrayPredictionContext&>(other); + return returnStates.size() == array.returnStates.size() && + parents.size() == array.parents.size() && + cachedHashCodeEqual(cachedHashCode(), array.cachedHashCode()) && + std::memcmp(returnStates.data(), array.returnStates.data(), returnStates.size() * sizeof(decltype(returnStates)::value_type)) == 0 && + std::equal(parents.begin(), parents.end(), array.parents.begin(), predictionContextEqual); +} + +std::string ArrayPredictionContext::toString() const { + if (isEmpty()) { + return "[]"; + } + + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < returnStates.size(); i++) { + if (i > 0) { + ss << ", "; + } + if (returnStates[i] == EMPTY_RETURN_STATE) { + ss << "$"; + continue; + } + ss << returnStates[i]; + if (parents[i] != nullptr) { + ss << " " << parents[i]->toString(); + } else { + ss << "nul"; + } + } + ss << "]"; + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h new file mode 100644 index 0000000000..f43db98a01 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h @@ -0,0 +1,51 @@ + +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class SingletonPredictionContext; + + class ANTLR4CPP_PUBLIC ArrayPredictionContext final : public PredictionContext { + public: + static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::ARRAY; } + + static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); } + + /// Parent can be empty only if full ctx mode and we make an array + /// from EMPTY and non-empty. We merge EMPTY by using null parent and + /// returnState == EMPTY_RETURN_STATE. + // Also here: we use a strong reference to our parents to avoid having them freed prematurely. + // See also SinglePredictionContext. + std::vector<Ref<const PredictionContext>> parents; + + /// Sorted for merge, no duplicates; if present, EMPTY_RETURN_STATE is always last. + std::vector<size_t> returnStates; + + explicit ArrayPredictionContext(const SingletonPredictionContext &predictionContext); + + ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, std::vector<size_t> returnStates); + + ArrayPredictionContext(ArrayPredictionContext&&) = default; + + bool isEmpty() const override; + size_t size() const override; + const Ref<const PredictionContext>& getParent(size_t index) const override; + size_t getReturnState(size_t index) const override; + bool equals(const PredictionContext &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp new file mode 100644 index 0000000000..74153bf5cd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" + +#include "atn/AtomTransition.h" + +using namespace antlr4::misc; +using namespace antlr4::atn; + +AtomTransition::AtomTransition(ATNState *target, size_t label) : Transition(TransitionType::ATOM, target), _label(label) { +} + +IntervalSet AtomTransition::label() const { + return IntervalSet::of((int)_label); +} + +bool AtomTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return _label == symbol; +} + +std::string AtomTransition::toString() const { + return "ATOM " + Transition::toString() + " { label: " + std::to_string(_label) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h new file mode 100644 index 0000000000..db62a7feab --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// TODO: make all transitions sets? no, should remove set edges. + class ANTLR4CPP_PUBLIC AtomTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ATOM; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + /// The token type or character value; or, signifies special label. + /// TODO: rename this to label + const size_t _label; + + AtomTransition(ATNState *target, size_t label); + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h new file mode 100644 index 0000000000..1c462ec0eb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BasicBlockStartState() : BlockStartState(ATNStateType::BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h new file mode 100644 index 0000000000..7f8a9ef0dd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BASIC; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BasicState() : ATNState(ATNStateType::BASIC) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h new file mode 100644 index 0000000000..11ef5499ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Terminal node of a simple {@code (a|b|c)} block. + class ANTLR4CPP_PUBLIC BlockEndState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_END; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BlockStartState *startState = nullptr; + + BlockEndState() : ATNState(ATNStateType::BLOCK_END) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h new file mode 100644 index 0000000000..3475115894 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The start of a regular {@code (...)} block. + class ANTLR4CPP_PUBLIC BlockStartState : public DecisionState { + public: + static bool is(const ATNState &atnState) { + const auto stateType = atnState.getStateType(); + return stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::STAR_BLOCK_START; + } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BlockEndState *endState = nullptr; + + protected: + using DecisionState::DecisionState; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp new file mode 100644 index 0000000000..12442a9bc0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ContextSensitivityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ContextSensitivityInfo::ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, + size_t startIndex, size_t stopIndex) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, true) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h new file mode 100644 index 0000000000..430ce3b6e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a context sensitivity. + /// Context sensitivities are decisions where a particular input resulted in an + /// SLL conflict, but LL prediction produced a single unique alternative. + /// + /// <para> + /// In some cases, the unique alternative identified by LL prediction is not + /// equal to the minimum represented alternative in the conflicting SLL + /// configuration set. Grammars and inputs which result in this scenario are + /// unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means they cannot use + /// the two-stage parsing strategy to improve parsing performance for that + /// input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportContextSensitivity </seealso> + /// <seealso cref= ANTLRErrorListener#reportContextSensitivity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ContextSensitivityInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ContextSensitivityInfo"/> class + /// with the specified detailed context sensitivity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the unique + /// alternative identified by full-context prediction </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the context sensitivity was + /// identified during full-context prediction </param> + ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp new file mode 100644 index 0000000000..bca6c778c0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +DecisionEventInfo::DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx) + : decision(decision), configs(configs), input(input), startIndex(startIndex), stopIndex(stopIndex), fullCtx(fullCtx) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h new file mode 100644 index 0000000000..af7f5f4b17 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This is the base class for gathering detailed information about prediction + /// events which occur during parsing. + /// + /// Note that we could record the parser call stack at the time this event + /// occurred but in the presence of left recursive rules, the stack is kind of + /// meaningless. It's better to look at the individual configurations for their + /// individual stacks. Of course that is a <seealso cref="PredictionContext"/> object + /// not a parse tree node and so it does not have information about the extent + /// (start...stop) of the various subtrees. Examining the stack tops of all + /// configurations provide the return states for the rule invocations. + /// From there you can get the enclosing rule. + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionEventInfo { + public: + /// <summary> + /// The invoked decision number which this event is related to. + /// </summary> + /// <seealso cref= ATN#decisionToState </seealso> + const size_t decision; + + /// <summary> + /// The configuration set containing additional information relevant to the + /// prediction state when the current event occurred, or {@code null} if no + /// additional information is relevant or available. + /// </summary> + const ATNConfigSet *configs; + + /// <summary> + /// The input token stream which is being parsed. + /// </summary> + const TokenStream *input; + + /// <summary> + /// The token index in the input stream at which the current prediction was + /// originally invoked. + /// </summary> + const size_t startIndex; + + /// <summary> + /// The token index in the input stream at which the current event occurred. + /// </summary> + const size_t stopIndex; + + /// <summary> + /// {@code true} if the current event occurred during LL prediction; + /// otherwise, {@code false} if the input occurred during SLL prediction. + /// </summary> + const bool fullCtx; + + DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp new file mode 100644 index 0000000000..ee9b1aac34 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ErrorInfo.h" +#include "atn/LookaheadEventInfo.h" + +#include "atn/DecisionInfo.h" + +using namespace antlr4::atn; + +DecisionInfo::DecisionInfo(size_t decision) : decision(decision) { +} + +std::string DecisionInfo::toString() const { + std::stringstream ss; + + ss << "{decision=" << decision << ", contextSensitivities=" << contextSensitivities.size() << ", errors="; + ss << errors.size() << ", ambiguities=" << ambiguities.size() << ", SLL_lookahead=" << SLL_TotalLook; + ss << ", SLL_ATNTransitions=" << SLL_ATNTransitions << ", SLL_DFATransitions=" << SLL_DFATransitions; + ss << ", LL_Fallback=" << LL_Fallback << ", LL_lookahead=" << LL_TotalLook << ", LL_ATNTransitions=" << LL_ATNTransitions << '}'; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h new file mode 100644 index 0000000000..2b43ad8be9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ContextSensitivityInfo.h" +#include "atn/AmbiguityInfo.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/ErrorInfo.h" + +namespace antlr4 { +namespace atn { + + class LookaheadEventInfo; + + /// <summary> + /// This class contains profiling gathered for a particular decision. + /// + /// <para> + /// Parsing performance in ANTLR 4 is heavily influenced by both static factors + /// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the + /// choice of input and the state of the DFA cache at the time profiling + /// operations are started). For best results, gather and use aggregate + /// statistics from a large sample of inputs representing the inputs expected in + /// production before using the results to make changes in the grammar.</para> + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionInfo { + public: + /// <summary> + /// The decision number, which is an index into <seealso cref="ATN#decisionToState"/>. + /// </summary> + const size_t decision; + + /// <summary> + /// The total number of times <seealso cref="ParserATNSimulator#adaptivePredict"/> was + /// invoked for this decision. + /// </summary> + long long invocations = 0; + + /// <summary> + /// The total time spent in <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision, in nanoseconds. + /// + /// <para> + /// The value of this field contains the sum of differential results obtained + /// by <seealso cref="System#nanoTime()"/>, and is not adjusted to compensate for JIT + /// and/or garbage collection overhead. For best accuracy, use a modern JVM + /// implementation that provides precise results from + /// <seealso cref="System#nanoTime()"/>, and perform profiling in a separate process + /// which is warmed up by parsing the input prior to profiling. If desired, + /// call <seealso cref="ATNSimulator#clearDFA"/> to reset the DFA cache to its initial + /// state before starting the profiling measurement pass.</para> + /// </summary> + long long timeInPrediction = 0; + + /// <summary> + /// The sum of the lookahead required for SLL prediction for this decision. + /// Note that SLL prediction is used before LL prediction for performance + /// reasons even when <seealso cref="PredictionMode#LL"/> or + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. + /// </summary> + long long SLL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MaxLook = 0; + + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#SLL_MaxLook"/> value was set. + Ref<LookaheadEventInfo> SLL_MaxLookEvent; + + /// <summary> + /// The sum of the lookahead required for LL prediction for this decision. + /// Note that LL prediction is only used when SLL prediction reaches a + /// conflict state. + /// </summary> + long long LL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MaxLook = 0; + + /// <summary> + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#LL_MaxLook"/> value was set. + /// </summary> + Ref<LookaheadEventInfo> LL_MaxLookEvent; + + /// <summary> + /// A collection of <seealso cref="ContextSensitivityInfo"/> instances describing the + /// context sensitivities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= ContextSensitivityInfo </seealso> + std::vector<ContextSensitivityInfo> contextSensitivities; + + /// <summary> + /// A collection of <seealso cref="ErrorInfo"/> instances describing the parse errors + /// identified during calls to <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision. + /// </summary> + /// <seealso cref= ErrorInfo </seealso> + std::vector<ErrorInfo> errors; + + /// <summary> + /// A collection of <seealso cref="AmbiguityInfo"/> instances describing the + /// ambiguities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= AmbiguityInfo </seealso> + std::vector<AmbiguityInfo> ambiguities; + + /// <summary> + /// A collection of <seealso cref="PredicateEvalInfo"/> instances describing the + /// results of evaluating individual predicates during prediction for this + /// decision. + /// </summary> + /// <seealso cref= PredicateEvalInfo </seealso> + std::vector<PredicateEvalInfo> predicateEvals; + + /// <summary> + /// The total number of ATN transitions required during SLL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of SLL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the SLL parsing algorithm + /// will use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #SLL_ATNTransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long SLL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during SLL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for SLL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long SLL_DFATransitions = 0; + + /// <summary> + /// Gets the total number of times SLL prediction completed in a conflict + /// state, resulting in fallback to LL prediction. + /// + /// <para>Note that this value is not related to whether or not + /// <seealso cref="PredictionMode#SLL"/> may be used successfully with a particular + /// grammar. If the ambiguity resolution algorithm applied to the SLL + /// conflicts for this decision produce the same result as LL prediction for + /// this decision, <seealso cref="PredictionMode#SLL"/> would produce the same overall + /// parsing result as <seealso cref="PredictionMode#LL"/>.</para> + /// </summary> + long long LL_Fallback = 0; + + /// <summary> + /// The total number of ATN transitions required during LL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of LL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the LL parsing algorithm will + /// use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #LL_DFATransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long LL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during LL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for LL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long LL_DFATransitions = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="DecisionInfo"/> class to contain + /// statistics for a particular decision. + /// </summary> + /// <param name="decision"> The decision number </param> + explicit DecisionInfo(size_t decision); + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp new file mode 100644 index 0000000000..72adb210f5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionState.h" + +using namespace antlr4::atn; + +std::string DecisionState::toString() const { + return "DECISION " + ATNState::toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h new file mode 100644 index 0000000000..b7341ac6c9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC DecisionState : public ATNState { + public: + static bool is(const ATNState &atnState) { + const auto stateType = atnState.getStateType(); + return (stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::TOKEN_START) || + stateType == ATNStateType::PLUS_LOOP_BACK || + stateType == ATNStateType::STAR_LOOP_ENTRY; + } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + int decision = -1; + bool nonGreedy = false; + + virtual std::string toString() const override; + + protected: + using ATNState::ATNState; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp new file mode 100644 index 0000000000..503fb1630e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EpsilonTransition.h" + +using namespace antlr4::atn; + +EpsilonTransition::EpsilonTransition(ATNState *target) : EpsilonTransition(target, INVALID_INDEX) { +} + +EpsilonTransition::EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn) + : Transition(TransitionType::EPSILON, target), _outermostPrecedenceReturn(outermostPrecedenceReturn) { +} + +size_t EpsilonTransition::outermostPrecedenceReturn() const { + return _outermostPrecedenceReturn; +} + +bool EpsilonTransition::isEpsilon() const { + return true; +} + +bool EpsilonTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string EpsilonTransition::toString() const { + return "EPSILON " + Transition::toString() + " {}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h new file mode 100644 index 0000000000..21bc812822 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EpsilonTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::EPSILON; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + explicit EpsilonTransition(ATNState *target); + EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn); + + /** + * @return the rule index of a precedence rule for which this transition is + * returning from, where the precedence value is 0; otherwise, INVALID_INDEX. + * + * @see ATNConfig#isPrecedenceFilterSuppressed() + * @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + * @since 4.4.1 + */ + size_t outermostPrecedenceReturn() const; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + private: + const size_t _outermostPrecedenceReturn; // A rule index. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp new file mode 100644 index 0000000000..efe8507124 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" + +#include "atn/ErrorInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ErrorInfo::ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h new file mode 100644 index 0000000000..d34642a195 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a syntax error + /// identified during prediction. Syntax errors occur when the prediction + /// algorithm is unable to identify an alternative which would lead to a + /// successful parse. + /// </summary> + /// <seealso cref= Parser#notifyErrorListeners(Token, String, RecognitionException) </seealso> + /// <seealso cref= ANTLRErrorListener#syntaxError + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ErrorInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ErrorInfo"/> class with the + /// specified detailed syntax error information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set reached during prediction + /// prior to reaching the <seealso cref="ATNSimulator#ERROR"/> state </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the syntax error was identified </param> + /// <param name="fullCtx"> {@code true} if the syntax error was identified during LL + /// prediction; otherwise, {@code false} if the syntax error was identified + /// during SLL prediction </param> + ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, + bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp new file mode 100644 index 0000000000..1d43697584 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp @@ -0,0 +1,189 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/Transition.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/WildcardTransition.h" +#include "atn/NotSetTransition.h" +#include "misc/IntervalSet.h" +#include "atn/ATNConfig.h" + +#include "support/CPPUtils.h" + +#include "atn/LL1Analyzer.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + struct ATNConfigHasher final { + size_t operator()(const ATNConfig& atn_config) const { + return atn_config.hashCode(); + } + }; + + struct ATNConfigComparer final { + bool operator()(const ATNConfig& lhs, const ATNConfig& rhs) const { + return lhs == rhs; + } + }; + + class LL1AnalyzerImpl final { + public: + LL1AnalyzerImpl(const ATN& atn, misc::IntervalSet& look, bool seeThruPreds, bool addEOF) : _atn(atn), _look(look), _seeThruPreds(seeThruPreds), _addEOF(addEOF) {} + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and {@code stopState} or the end of the + /// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to + /// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + /// {@code true} and {@code stopState} or the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state. </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> The outer context, or {@code null} if the outer context should + /// not be used. </param> + /// <param name="look"> The result lookahead set. </param> + /// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN + /// from causing a stack overflow. Outside code should pass + /// {@code new HashSet<ATNConfig>} for this argument. </param> + /// <param name="calledRuleStack"> A set used for preventing left recursion in the + /// ATN from causing a stack overflow. Outside code should pass + /// {@code new BitSet()} for this argument. </param> + /// <param name="seeThruPreds"> {@code true} to true semantic predicates as + /// implicitly {@code true} and "see through them", otherwise {@code false} + /// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the + /// result if one is encountered. </param> + /// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the + /// outermost context is reached. This parameter has no effect if {@code ctx} + /// is {@code null}. </param> + void LOOK(ATNState *s, ATNState *stopState, Ref<const PredictionContext> const& ctx) { + if (!_lookBusy.insert(ATNConfig(s, 0, ctx)).second) { + return; + } + + // ml: s can never be null, hence no need to check if stopState is != null. + if (s == stopState) { + if (ctx == nullptr) { + _look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && _addEOF) { + _look.add(Token::EOF); + return; + } + } + + if (s->getStateType() == ATNStateType::RULE_STOP) { + if (ctx == nullptr) { + _look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && _addEOF) { + _look.add(Token::EOF); + return; + } + + if (ctx != PredictionContext::EMPTY) { + bool removed = _calledRuleStack.test(s->ruleIndex); + _calledRuleStack[s->ruleIndex] = false; + // run thru all possible stack tops in ctx + for (size_t i = 0; i < ctx->size(); i++) { + ATNState *returnState = _atn.states[ctx->getReturnState(i)]; + LOOK(returnState, stopState, ctx->getParent(i)); + } + if (removed) { + _calledRuleStack.set(s->ruleIndex); + } + return; + } + } + + size_t n = s->transitions.size(); + for (size_t i = 0; i < n; i++) { + const Transition *t = s->transitions[i].get(); + const auto tType = t->getTransitionType(); + + if (tType == TransitionType::RULE) { + if (_calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex]) { + continue; + } + + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<const RuleTransition*>(t))->followState->stateNumber); + + _calledRuleStack.set((static_cast<const RuleTransition*>(t))->target->ruleIndex); + LOOK(t->target, stopState, newContext); + _calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex] = false; + + } else if (tType == TransitionType::PREDICATE || tType == TransitionType::PRECEDENCE) { + if (_seeThruPreds) { + LOOK(t->target, stopState, ctx); + } else { + _look.add(LL1Analyzer::HIT_PRED); + } + } else if (t->isEpsilon()) { + LOOK(t->target, stopState, ctx); + } else if (tType == TransitionType::WILDCARD) { + _look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } else { + misc::IntervalSet set = t->label(); + if (!set.isEmpty()) { + if (tType == TransitionType::NOT_SET) { + set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } + _look.addAll(set); + } + } + } + } + + private: + const ATN& _atn; + misc::IntervalSet& _look; + antlrcpp::BitSet _calledRuleStack; + std::unordered_set<ATNConfig, ATNConfigHasher, ATNConfigComparer> _lookBusy; + bool _seeThruPreds; + bool _addEOF; + }; + +} + +std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) const { + std::vector<misc::IntervalSet> look; + + if (s == nullptr) { + return look; + } + + look.resize(s->transitions.size()); // Fills all interval sets with defaults. + for (size_t alt = 0; alt < s->transitions.size(); alt++) { + LL1AnalyzerImpl impl(_atn, look[alt], false, false); + impl.LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY); + // Wipe out lookahead for this alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if (look[alt].size() == 0 || look[alt].contains(LL1Analyzer::HIT_PRED)) { + look[alt].clear(); + } + } + return look; +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, RuleContext *ctx) const { + return LOOK(s, nullptr, ctx); +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const { + Ref<const PredictionContext> lookContext = ctx != nullptr ? PredictionContext::fromRuleContext(_atn, ctx) : nullptr; + misc::IntervalSet r; + LL1AnalyzerImpl impl(_atn, r, true, true); + impl.LOOK(s, stopState, lookContext); + return r; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h new file mode 100644 index 0000000000..7d47c7610f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" +#include "atn/ATNConfig.h" +#include "atn/PredictionContext.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LL1Analyzer final { + public: + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. + static constexpr size_t HIT_PRED = Token::INVALID_TYPE; + + explicit LL1Analyzer(const atn::ATN &atn) : _atn(atn) {} + + /// <summary> + /// Calculates the SLL(1) expected lookahead set for each outgoing transition + /// of an <seealso cref="ATNState"/>. The returned array has one element for each + /// outgoing transition in {@code s}. If the closure from transition + /// <em>i</em> leads to a semantic predicate before matching a symbol, the + /// element at index <em>i</em> of the result will be {@code null}. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <returns> the expected symbols for each outgoing transition of {@code s}. </returns> + std::vector<misc::IntervalSet> getDecisionLookahead(ATNState *s) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + misc::IntervalSet LOOK(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const; + + private: + const atn::ATN &_atn; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp new file mode 100644 index 0000000000..e70cfac2ca --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/DecisionState.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" +#include "atn/LexerActionExecutor.h" + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerATNConfig.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context) + : ATNConfig(state, alt, std::move(context)) {} + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor) + : ATNConfig(state, alt, std::move(context)), _lexerActionExecutor(std::move(lexerActionExecutor)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state) + : ATNConfig(other, state), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor) + : ATNConfig(other, state), _lexerActionExecutor(std::move(lexerActionExecutor)), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context) + : ATNConfig(other, state, std::move(context)), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +size_t LexerATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::update(hashCode, _passedThroughNonGreedyDecision ? 1 : 0); + hashCode = misc::MurmurHash::update(hashCode, _lexerActionExecutor); + hashCode = misc::MurmurHash::finish(hashCode, 6); + return hashCode; +} + +bool LexerATNConfig::operator==(const LexerATNConfig& other) const +{ + if (this == &other) + return true; + + if (_passedThroughNonGreedyDecision != other._passedThroughNonGreedyDecision) + return false; + + if (_lexerActionExecutor == nullptr) + return other._lexerActionExecutor == nullptr; + if (*_lexerActionExecutor != *(other._lexerActionExecutor)) { + return false; + } + + return ATNConfig::operator==(other); +} + +bool LexerATNConfig::checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target) { + return source._passedThroughNonGreedyDecision || + (DecisionState::is(target) && downCast<DecisionState*>(target)->nonGreedy); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h new file mode 100644 index 0000000000..7d1d6b40e2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LexerATNConfig final : public ATNConfig { + public: + LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context); + LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor); + + LexerATNConfig(LexerATNConfig const& other, ATNState *state); + LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor); + LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context); + + /** + * Gets the {@link LexerActionExecutor} capable of executing the embedded + * action(s) for the current configuration. + */ + const Ref<const LexerActionExecutor>& getLexerActionExecutor() const { return _lexerActionExecutor; } + bool hasPassedThroughNonGreedyDecision() const { return _passedThroughNonGreedyDecision; } + + virtual size_t hashCode() const override; + + bool operator==(const LexerATNConfig& other) const; + + private: + /** + * This is the backing field for {@link #getLexerActionExecutor}. + */ + const Ref<const LexerActionExecutor> _lexerActionExecutor; + const bool _passedThroughNonGreedyDecision = false; + + static bool checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp new file mode 100644 index 0000000000..ef1b1cf2f1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp @@ -0,0 +1,617 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "IntStream.h" +#include "atn/OrderedATNConfigSet.h" +#include "Token.h" +#include "LexerNoViableAltException.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/PredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/TokensStartState.h" +#include "misc/Interval.h" +#include "dfa/DFA.h" +#include "Lexer.h" +#include "internal/Synchronization.h" + +#include "dfa/DFAState.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerActionExecutor.h" + +#include "atn/LexerATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_DFA 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +void LexerATNSimulator::SimState::reset() { + *this = SimState(); +} + +LexerATNSimulator::LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : LexerATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +LexerATNSimulator::LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : ATNSimulator(atn, sharedContextCache), _recog(recog), _decisionToDFA(decisionToDFA) { + InitializeInstanceFields(); +} + +void LexerATNSimulator::copyState(LexerATNSimulator *simulator) { + _charPositionInLine = simulator->_charPositionInLine; + _line = simulator->_line; + _mode = simulator->_mode; + _startIndex = simulator->_startIndex; +} + +size_t LexerATNSimulator::match(CharStream *input, size_t mode) { + _mode = mode; + ssize_t mark = input->mark(); + + auto onExit = finally([input, mark] { + input->release(mark); + }); + + _startIndex = input->index(); + _prevAccept.reset(); + const dfa::DFA &dfa = _decisionToDFA[mode]; + dfa::DFAState* s0; + { + SharedLock<SharedMutex> stateLock(atn._stateMutex); + s0 = dfa.s0; + } + if (s0 == nullptr) { + return matchATN(input); + } else { + return execATN(input, s0); + } +} + +void LexerATNSimulator::reset() { + _prevAccept.reset(); + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = Lexer::DEFAULT_MODE; +} + +void LexerATNSimulator::clearDFA() { + size_t size = _decisionToDFA.size(); + _decisionToDFA.clear(); + for (size_t d = 0; d < size; ++d) { + _decisionToDFA.emplace_back(atn.getDecisionState(d), d); + } +} + +size_t LexerATNSimulator::matchATN(CharStream *input) { + ATNState *startState = atn.modeToStartState[_mode]; + + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(input, startState); + + bool suppressEdge = s0_closure->hasSemanticContext; + s0_closure->hasSemanticContext = false; + + dfa::DFAState *next = addDFAState(s0_closure.release(), suppressEdge); + + size_t predict = execATN(input, next); + + return predict; +} + +size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) { + if (ds0->isAcceptState) { + // allow zero-length tokens + // ml: in Java code this method uses 3 params. The first is a member var of the class anyway (_prevAccept), so why pass it here? + captureSimState(input, ds0); + } + + size_t t = input->LA(1); + dfa::DFAState *s = ds0; // s is current/from DFA state + + while (true) { // while more work + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=null, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=null, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set; there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + dfa::DFAState *target = getExistingTargetState(s, t); + if (target == nullptr) { + target = computeTargetState(input, s, t); + } + + if (target == ERROR.get()) { + break; + } + + // If this is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if (t != Token::EOF) { + consume(input); + } + + if (target->isAcceptState) { + captureSimState(input, target); + if (t == Token::EOF) { + break; + } + } + + t = input->LA(1); + s = target; // flip; current DFA target becomes new src/from state + } + + return failOrAccept(input, s->configs.get(), t); +} + +dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) { + dfa::DFAState* retval = nullptr; + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + if (t <= MAX_DFA_EDGE) { + auto iterator = s->edges.find(t - MIN_DFA_EDGE); +#if DEBUG_ATN == 1 + if (iterator != s->edges.end()) { + std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl; + } +#endif + + if (iterator != s->edges.end()) + retval = iterator->second; + } + return retval; +} + +dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) { + OrderedATNConfigSet *reach = new OrderedATNConfigSet(); /* mem-check: deleted on error or managed by new DFA state. */ + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + getReachableConfigSet(input, s->configs.get(), reach, t); + + if (reach->isEmpty()) { // we got nowhere on t from s + if (!reach->hasSemanticContext) { + // we got nowhere on t, don't throw out this knowledge; it'd + // cause a failover from DFA later. + addDFAEdge(s, t, ERROR.get()); + } + delete reach; + + // stop when we can't match any more char + return ERROR.get(); + } + + // Add an edge from s to target DFA found/created for reach + return addDFAEdge(s, t, reach); +} + +size_t LexerATNSimulator::failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t) { + if (_prevAccept.dfaState != nullptr) { + accept(input, _prevAccept.dfaState->lexerActionExecutor, _startIndex, _prevAccept.index, _prevAccept.line, _prevAccept.charPos); + return _prevAccept.dfaState->prediction; + } else { + // if no accept and EOF is first char, return EOF + if (t == Token::EOF && input->index() == _startIndex) { + return Token::EOF; + } + + throw LexerNoViableAltException(_recog, input, _startIndex, reach); + } +} + +void LexerATNSimulator::getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, ATNConfigSet *reach, size_t t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + size_t skipAlt = ATN::INVALID_ALT_NUMBER; + + for (const auto &c : closure_->configs) { + bool currentAltReachedAcceptState = c->alt == skipAlt; + if (currentAltReachedAcceptState && (std::static_pointer_cast<LexerATNConfig>(c))->hasPassedThroughNonGreedyDecision()) { + continue; + } + +#if DEBUG_ATN == 1 + std::cout << "testing " << getTokenName((int)t) << " at " << c->toString(true) << std::endl; +#endif + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + const Transition *trans = c->state->transitions[ti].get(); + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + auto lexerActionExecutor = downCast<const LexerATNConfig&>(*c).getLexerActionExecutor(); + if (lexerActionExecutor != nullptr) { + lexerActionExecutor = lexerActionExecutor->fixOffsetBeforeMatch((int)input->index() - (int)_startIndex); + } + + bool treatEofAsEpsilon = t == Token::EOF; + Ref<LexerATNConfig> config = std::make_shared<LexerATNConfig>(downCast<const LexerATNConfig&>(*c), + target, std::move(lexerActionExecutor)); + + if (closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c->alt; + break; + } + } + } + } +} + +void LexerATNSimulator::accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t /*startIndex*/, + size_t index, size_t line, size_t charPos) { +#if DEBUG_ATN == 1 + std::cout << "ACTION "; + std::cout << toString(lexerActionExecutor) << std::endl; +#endif + + // seek to after last char in token + input->seek(index); + _line = line; + _charPositionInLine = (int)charPos; + + if (lexerActionExecutor != nullptr && _recog != nullptr) { + lexerActionExecutor->execute(_recog, input, _startIndex); + } +} + +atn::ATNState *LexerATNSimulator::getReachableTarget(const Transition *trans, size_t t) { + if (trans->matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + return trans->target; + } + + return nullptr; +} + +std::unique_ptr<ATNConfigSet> LexerATNSimulator::computeStartState(CharStream *input, ATNState *p) { + Ref<const PredictionContext> initialContext = PredictionContext::EMPTY; // ml: the purpose of this assignment is unclear + std::unique_ptr<ATNConfigSet> configs(new OrderedATNConfigSet()); + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(target, (int)(i + 1), initialContext); + closure(input, c, configs.get(), false, false, false); + } + + return configs; +} + +bool LexerATNSimulator::closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon) { +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { +#if DEBUG_ATN == 1 + if (_recog != nullptr) { + std::cout << "closure at " << _recog->getRuleNames()[config->state->ruleIndex] << " rule stop " << config << std::endl; + } else { + std::cout << "closure at rule stop " << config << std::endl; + } +#endif + + if (config->context == nullptr || config->context->hasEmptyPath()) { + if (config->context == nullptr || config->context->isEmpty()) { + configs->add(config); + return true; + } else { + configs->add(std::make_shared<LexerATNConfig>(*config, config->state, PredictionContext::EMPTY)); + currentAltReachedAcceptState = true; + } + } + + if (config->context != nullptr && !config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE) { + Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(*config, returnState, newContext); + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + } + + return currentAltReachedAcceptState; + } + + // optimization + if (!config->state->epsilonOnlyTransitions) { + if (!currentAltReachedAcceptState || !config->hasPassedThroughNonGreedyDecision()) { + configs->add(config); + } + } + + ATNState *p = config->state; + for (size_t i = 0; i < p->transitions.size(); i++) { + const Transition *t = p->transitions[i].get(); + Ref<LexerATNConfig> c = getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon); + if (c != nullptr) { + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + + return currentAltReachedAcceptState; +} + +Ref<LexerATNConfig> LexerATNSimulator::getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon) { + + Ref<LexerATNConfig> c = nullptr; + switch (t->getTransitionType()) { + case TransitionType::RULE: { + const RuleTransition *ruleTransition = static_cast<const RuleTransition*>(t); + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, ruleTransition->followState->stateNumber); + c = std::make_shared<LexerATNConfig>(*config, t->target, newContext); + break; + } + + case TransitionType::PRECEDENCE: + throw UnsupportedOperationException("Precedence predicates are not supported in lexers."); + + case TransitionType::PREDICATE: { + /* Track traversing semantic predicates. If we traverse, + we cannot add a DFA state for this "reach" computation + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not used that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + const PredicateTransition *pt = static_cast<const PredicateTransition*>(t); + +#if DEBUG_ATN == 1 + std::cout << "EVAL rule " << pt->getRuleIndex() << ":" << pt->getPredIndex() << std::endl; +#endif + + configs->hasSemanticContext = true; + if (evaluatePredicate(input, pt->getRuleIndex(), pt->getPredIndex(), speculative)) { + c = std::make_shared<LexerATNConfig>(*config, t->target); + } + break; + } + + case TransitionType::ACTION: + if (config->context == nullptr|| config->context->hasEmptyPath()) { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty() is false. In this case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + auto lexerActionExecutor = LexerActionExecutor::append(config->getLexerActionExecutor(), + atn.lexerActions[static_cast<const ActionTransition *>(t)->actionIndex]); + c = std::make_shared<LexerATNConfig>(*config, t->target, std::move(lexerActionExecutor)); + break; + } + else { + // ignore actions in referenced rules + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + } + + case TransitionType::EPSILON: + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + + case TransitionType::ATOM: + case TransitionType::RANGE: + case TransitionType::SET: + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + } + } + + break; + + default: // To silence the compiler. Other transition types are not used here. + break; + } + + return c; +} + +bool LexerATNSimulator::evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative) { + // assume true if no recognizer was provided + if (_recog == nullptr) { + return true; + } + + if (!speculative) { + return _recog->sempred(nullptr, ruleIndex, predIndex); + } + + size_t savedCharPositionInLine = _charPositionInLine; + size_t savedLine = _line; + size_t index = input->index(); + ssize_t marker = input->mark(); + + auto onExit = finally([this, input, savedCharPositionInLine, savedLine, index, marker] { + _charPositionInLine = savedCharPositionInLine; + _line = savedLine; + input->seek(index); + input->release(marker); + }); + + consume(input); + return _recog->sempred(nullptr, ruleIndex, predIndex); +} + +void LexerATNSimulator::captureSimState(CharStream *input, dfa::DFAState *dfaState) { + _prevAccept.index = input->index(); + _prevAccept.line = _line; + _prevAccept.charPos = _charPositionInLine; + _prevAccept.dfaState = dfaState; +} + +dfa::DFAState *LexerATNSimulator::addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q) { + /* leading to this call, ATNConfigSet.hasSemanticContext is used as a + * marker indicating dynamic predicate evaluation makes this edge + * dependent on the specific input sequence, so the static edge in the + * DFA should be omitted. The target DFAState is still created since + * execATN has the ability to resynchronize with the DFA state cache + * following the predicate evaluation step. + * + * TJP notes: next time through the DFA, we see a pred again and eval. + * If that gets us to a previously created (but dangling) DFA + * state, we can continue in pure DFA mode from there. + */ + bool suppressEdge = q->hasSemanticContext; + q->hasSemanticContext = false; + + dfa::DFAState *to = addDFAState(q); + + if (suppressEdge) { + return to; + } + + addDFAEdge(from, t, to); + return to; +} + +void LexerATNSimulator::addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q) { + if (/*t < MIN_DFA_EDGE ||*/ t > MAX_DFA_EDGE) { // MIN_DFA_EDGE is 0 + // Only track edges within the DFA bounds + return; + } + + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + p->edges[t - MIN_DFA_EDGE] = q; // connect +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs) { + return addDFAState(configs, true); +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs, bool suppressEdge) { + /* the lexer evaluates predicates on-the-fly; by this point configs + * should not contain any configurations with unevaluated predicates. + */ + assert(!configs->hasSemanticContext); + + dfa::DFAState *proposed = new dfa::DFAState(std::unique_ptr<ATNConfigSet>(configs)); /* mem-check: managed by the DFA or deleted below */ + Ref<ATNConfig> firstConfigWithRuleStopState = nullptr; + for (const auto &c : configs->configs) { + if (RuleStopState::is(c->state)) { + firstConfigWithRuleStopState = c; + break; + } + } + + if (firstConfigWithRuleStopState != nullptr) { + proposed->isAcceptState = true; + proposed->lexerActionExecutor = downCast<const LexerATNConfig&>(*firstConfigWithRuleStopState).getLexerActionExecutor(); + proposed->prediction = atn.ruleToTokenType[firstConfigWithRuleStopState->state->ruleIndex]; + } + + dfa::DFA &dfa = _decisionToDFA[_mode]; + + { + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + auto [existing, inserted] = dfa.states.insert(proposed); + if (!inserted) { + delete proposed; + proposed = *existing; + } else { + // Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`, + // since we already inserted we need to subtract one. + proposed->stateNumber = static_cast<int>(dfa.states.size() - 1); + proposed->configs->setReadonly(true); + } + if (!suppressEdge) { + dfa.s0 = proposed; + } + } + + return proposed; +} + +dfa::DFA& LexerATNSimulator::getDFA(size_t mode) { + return _decisionToDFA[mode]; +} + +std::string LexerATNSimulator::getText(CharStream *input) { + // index is first lookahead char, don't include. + return input->getText(misc::Interval(_startIndex, input->index() - 1)); +} + +size_t LexerATNSimulator::getLine() const { + return _line; +} + +void LexerATNSimulator::setLine(size_t line) { + _line = line; +} + +size_t LexerATNSimulator::getCharPositionInLine() { + return _charPositionInLine; +} + +void LexerATNSimulator::setCharPositionInLine(size_t charPositionInLine) { + _charPositionInLine = charPositionInLine; +} + +void LexerATNSimulator::consume(CharStream *input) { + size_t curChar = input->LA(1); + if (curChar == '\n') { + _line++; + _charPositionInLine = 0; + } else { + _charPositionInLine++; + } + input->consume(); +} + +std::string LexerATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + return std::string("'") + static_cast<char>(t) + std::string("'"); +} + +void LexerATNSimulator::InitializeInstanceFields() { + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = antlr4::Lexer::DEFAULT_MODE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h new file mode 100644 index 0000000000..304430b04d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h @@ -0,0 +1,199 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <atomic> + +#include "atn/ATNSimulator.h" +#include "atn/LexerATNConfig.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { +namespace atn { + + /// "dup" of ParserInterpreter + class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { + protected: + struct ANTLR4CPP_PUBLIC SimState final { + size_t index = INVALID_INDEX; + size_t line = 0; + size_t charPos = INVALID_INDEX; + dfa::DFAState *dfaState = nullptr; + + void reset(); + }; + + public: + static constexpr size_t MIN_DFA_EDGE = 0; + static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN + + protected: + /// <summary> + /// When we hit an accept state in either the DFA or the ATN, we + /// have to notify the character stream to start buffering characters + /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state + /// includes the current index into the input, the current line, + /// and current character position in that line. Note that the Lexer is + /// tracking the starting line and characterization of the token. These + /// variables track the "state" of the simulator when it hits an accept state. + /// <p/> + /// We track these variables separately for the DFA and ATN simulation + /// because the DFA simulation often has to fail over to the ATN + /// simulation. If the ATN simulation fails, we need the DFA to fall + /// back to its previously accepted state, if any. If the ATN succeeds, + /// then the ATN does the accept and the DFA simulator that invoked it + /// can simply return the predicted token type. + /// </summary> + Lexer *const _recog; + + /// The current token's starting index into the character stream. + /// Shared across DFA to ATN simulation in case the ATN fails and the + /// DFA did not have a previous accept state. In this case, we use the + /// ATN-generated exception object. + size_t _startIndex; + + /// line number 1..n within the input. + size_t _line; + + /// The index of the character relative to the beginning of the line 0..n-1. + size_t _charPositionInLine; + + public: + std::vector<dfa::DFA> &_decisionToDFA; + + protected: + size_t _mode; + + /// Used during DFA/ATN exec to record the most recent accept configuration info. + SimState _prevAccept; + + public: + LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + virtual ~LexerATNSimulator() = default; + + virtual void copyState(LexerATNSimulator *simulator); + virtual size_t match(CharStream *input, size_t mode); + virtual void reset() override; + + virtual void clearDFA() override; + + protected: + virtual size_t matchATN(CharStream *input); + virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="input"> The input stream </param> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); + + virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); + + /// <summary> + /// Given a starting configuration set, figure out all ATN configurations + /// we can reach upon input {@code t}. Parameter {@code reach} is a return + /// parameter. + /// </summary> + void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already + ATNConfigSet *reach, size_t t); + + virtual void accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index, + size_t line, size_t charPos); + + virtual ATNState *getReachableTarget(const Transition *trans, size_t t); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p); + + /// <summary> + /// Since the alternatives within any lexer decision are ordered by + /// preference, this method stops pursuing the closure as soon as an accept + /// state is reached. After the first accept state is reached by depth-first + /// search from {@code config}, all other (potentially reachable) states for + /// this rule would have a lower priority. + /// </summary> + /// <returns> {@code true} if an accept state is reached, otherwise + /// {@code false}. </returns> + virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); + + // side-effect: can alter configs.hasSemanticContext + virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); + + /// <summary> + /// Evaluate a predicate specified in the lexer. + /// <p/> + /// If {@code speculative} is {@code true}, this method was called before + /// <seealso cref="#consume"/> for the matched character. This method should call + /// <seealso cref="#consume"/> before evaluating the predicate to ensure position + /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>, + /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current + /// lexer state. This method should restore {@code input} and the simulator + /// to the original state before returning (i.e. undo the actions made by the + /// call to <seealso cref="#consume"/>. + /// </summary> + /// <param name="input"> The input stream. </param> + /// <param name="ruleIndex"> The rule containing the predicate. </param> + /// <param name="predIndex"> The index of the predicate within the rule. </param> + /// <param name="speculative"> {@code true} if the current index in {@code input} is + /// one character before the predicate's location. + /// </param> + /// <returns> {@code true} if the specified predicate evaluates to + /// {@code true}. </returns> + virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); + + virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); + virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); + virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); + + /// <summary> + /// Add a new DFA state if there isn't one with this set of + /// configurations already. This method also detects the first + /// configuration containing an ATN rule stop state. Later, when + /// traversing the DFA, we will know which rule to accept. + /// </summary> + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); + + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge); + + public: + dfa::DFA& getDFA(size_t mode); + + /// Get the text matched so far for the current token. + virtual std::string getText(CharStream *input); + virtual size_t getLine() const; + virtual void setLine(size_t line); + virtual size_t getCharPositionInLine(); + virtual void setCharPositionInLine(size_t charPositionInLine); + virtual void consume(CharStream *input); + virtual std::string getTokenName(size_t t); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp new file mode 100644 index 0000000000..a9d9a6771b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp @@ -0,0 +1,15 @@ +#include "LexerAction.h" + +using namespace antlr4::atn; + +size_t LexerAction::hashCode() const { + auto hash = cachedHashCode(); + if (hash == 0) { + hash = hashCodeImpl(); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h new file mode 100644 index 0000000000..5c30a89608 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h @@ -0,0 +1,100 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents a single action which can be executed following the successful + /// match of a lexer rule. Lexer actions are used for both embedded action syntax + /// and ANTLR 4's new lexer command syntax. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerAction { + public: + virtual ~LexerAction() = default; + + /// <summary> + /// Gets the serialization type of the lexer action. + /// </summary> + /// <returns> The serialization type of the lexer action. </returns> + /// + /// IMPORTANT: Unlike Java, this returns LexerActionType::INDEXED_CUSTOM for instances of + /// LexerIndexedCustomAction. If you need the wrapped action type, use + /// LexerIndexedCustomAction::getAction()->getActionType(). + LexerActionType getActionType() const { return _actionType; } + + /// <summary> + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the <seealso cref="CharStream"/> + /// index at the time the action is executed. + /// + /// <para>Many lexer commands, including {@code type}, {@code skip}, and + /// {@code more}, do not check the input index during their execution. + /// Actions like this are position-independent, and may be stored more + /// efficiently as part of the <seealso cref="LexerATNConfig#lexerActionExecutor"/>.</para> + /// </summary> + /// <returns> {@code true} if the lexer action semantics can be affected by the + /// position of the input <seealso cref="CharStream"/> at the time it is executed; + /// otherwise, {@code false}. </returns> + bool isPositionDependent() const { return _positionDependent; } + + /// <summary> + /// Execute the lexer action in the context of the specified <seealso cref="Lexer"/>. + /// + /// <para>For position-dependent actions, the input stream must already be + /// positioned correctly prior to calling this method.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + virtual void execute(Lexer *lexer) const = 0; + + size_t hashCode() const; + + virtual bool equals(const LexerAction &other) const = 0; + + virtual std::string toString() const = 0; + + protected: + LexerAction(LexerActionType actionType, bool positionDependent) + : _actionType(actionType), _hashCode(0), _positionDependent(positionDependent) {} + + virtual size_t hashCodeImpl() const = 0; + + size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); } + + private: + const LexerActionType _actionType; + mutable std::atomic<size_t> _hashCode; + const bool _positionDependent; + }; + + inline bool operator==(const LexerAction &lhs, const LexerAction &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const LexerAction &lhs, const LexerAction &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::LexerAction> { + size_t operator()(const ::antlr4::atn::LexerAction &lexerAction) const { + return lexerAction.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp new file mode 100644 index 0000000000..490351b892 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp @@ -0,0 +1,111 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/LexerIndexedCustomAction.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" +#include "support/Casts.h" + +#include "atn/LexerActionExecutor.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + + bool lexerActionEqual(const Ref<const LexerAction> &lhs, const Ref<const LexerAction> &rhs) { + return *lhs == *rhs; + } + +} + +LexerActionExecutor::LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions) + : _lexerActions(std::move(lexerActions)), _hashCode(0) {} + +Ref<const LexerActionExecutor> LexerActionExecutor::append(const Ref<const LexerActionExecutor> &lexerActionExecutor, + Ref<const LexerAction> lexerAction) { + if (lexerActionExecutor == nullptr) { + return std::make_shared<LexerActionExecutor>(std::vector<Ref<const LexerAction>>{ std::move(lexerAction) }); + } + std::vector<Ref<const LexerAction>> lexerActions; + lexerActions.reserve(lexerActionExecutor->_lexerActions.size() + 1); + lexerActions.insert(lexerActions.begin(), lexerActionExecutor->_lexerActions.begin(), lexerActionExecutor->_lexerActions.end()); + lexerActions.push_back(std::move(lexerAction)); + return std::make_shared<LexerActionExecutor>(std::move(lexerActions)); +} + +Ref<const LexerActionExecutor> LexerActionExecutor::fixOffsetBeforeMatch(int offset) const { + std::vector<Ref<const LexerAction>> updatedLexerActions; + for (size_t i = 0; i < _lexerActions.size(); i++) { + if (_lexerActions[i]->isPositionDependent() && !LexerIndexedCustomAction::is(*_lexerActions[i])) { + if (updatedLexerActions.empty()) { + updatedLexerActions = _lexerActions; // Make a copy. + } + updatedLexerActions[i] = std::make_shared<LexerIndexedCustomAction>(offset, _lexerActions[i]); + } + } + if (updatedLexerActions.empty()) { + return shared_from_this(); + } + return std::make_shared<LexerActionExecutor>(std::move(updatedLexerActions)); +} + +const std::vector<Ref<const LexerAction>>& LexerActionExecutor::getLexerActions() const { + return _lexerActions; +} + +void LexerActionExecutor::execute(Lexer *lexer, CharStream *input, size_t startIndex) const { + bool requiresSeek = false; + size_t stopIndex = input->index(); + + auto onExit = finally([requiresSeek, input, stopIndex]() { + if (requiresSeek) { + input->seek(stopIndex); + } + }); + for (const auto &lexerAction : _lexerActions) { + if (LexerIndexedCustomAction::is(*lexerAction)) { + int offset = downCast<const LexerIndexedCustomAction&>(*lexerAction).getOffset(); + input->seek(startIndex + offset); + requiresSeek = (startIndex + offset) != stopIndex; + } else if (lexerAction->isPositionDependent()) { + input->seek(stopIndex); + requiresSeek = false; + } + lexerAction->execute(lexer); + } +} + +size_t LexerActionExecutor::hashCode() const { + auto hash = _hashCode.load(std::memory_order_relaxed); + if (hash == 0) { + hash = MurmurHash::initialize(); + for (const auto &lexerAction : _lexerActions) { + hash = MurmurHash::update(hash, lexerAction); + } + hash = MurmurHash::finish(hash, _lexerActions.size()); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} + +bool LexerActionExecutor::equals(const LexerActionExecutor &other) const { + if (this == std::addressof(other)) { + return true; + } + return cachedHashCodeEqual(_hashCode.load(std::memory_order_relaxed), other._hashCode.load(std::memory_order_relaxed)) && + _lexerActions.size() == other._lexerActions.size() && + std::equal(_lexerActions.begin(), _lexerActions.end(), other._lexerActions.begin(), lexerActionEqual); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h new file mode 100644 index 0000000000..28bb1e28ec --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Represents an executor for a sequence of lexer actions which traversed during + /// the matching operation of a lexer rule (token). + /// + /// <para>The executor tracks position information for position-dependent lexer actions + /// efficiently, ensuring that actions appearing only at the end of the rule do + /// not cause bloating of the <seealso cref="DFA"/> created for the lexer.</para> + class ANTLR4CPP_PUBLIC LexerActionExecutor final : public std::enable_shared_from_this<LexerActionExecutor> { + public: + /// <summary> + /// Constructs an executor for a sequence of <seealso cref="LexerAction"/> actions. </summary> + /// <param name="lexerActions"> The lexer actions to execute. </param> + explicit LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which executes the actions for + /// the input {@code lexerActionExecutor} followed by a specified + /// {@code lexerAction}. + /// </summary> + /// <param name="lexerActionExecutor"> The executor for actions already traversed by + /// the lexer while matching a token within a particular + /// <seealso cref="LexerATNConfig"/>. If this is {@code null}, the method behaves as + /// though it were an empty executor. </param> + /// <param name="lexerAction"> The lexer action to execute after the actions + /// specified in {@code lexerActionExecutor}. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> for executing the combine actions + /// of {@code lexerActionExecutor} and {@code lexerAction}. </returns> + static Ref<const LexerActionExecutor> append(const Ref<const LexerActionExecutor> &lexerActionExecutor, + Ref<const LexerAction> lexerAction); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which encodes the current offset + /// for position-dependent lexer actions. + /// + /// <para>Normally, when the executor encounters lexer actions where + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}, it calls + /// <seealso cref="IntStream#seek"/> on the input <seealso cref="CharStream"/> to set the input + /// position to the <em>end</em> of the current token. This behavior provides + /// for efficient DFA representation of lexer actions which appear at the end + /// of a lexer rule, even when the lexer rule matches a variable number of + /// characters.</para> + /// + /// <para>Prior to traversing a match transition in the ATN, the current offset + /// from the token start index is assigned to all position-dependent lexer + /// actions which have not already been assigned a fixed offset. By storing + /// the offsets relative to the token start index, the DFA representation of + /// lexer actions which appear in the middle of tokens remains efficient due + /// to sharing among tokens of the same length, regardless of their absolute + /// position in the input stream.</para> + /// + /// <para>If the current executor already has offsets assigned to all + /// position-dependent lexer actions, the method returns {@code this}.</para> + /// </summary> + /// <param name="offset"> The current offset to assign to all position-dependent + /// lexer actions which do not already have offsets assigned. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> which stores input stream offsets + /// for all position-dependent lexer actions. </returns> + Ref<const LexerActionExecutor> fixOffsetBeforeMatch(int offset) const; + + /// <summary> + /// Gets the lexer actions to be executed by this executor. </summary> + /// <returns> The lexer actions to be executed by this executor. </returns> + const std::vector<Ref<const LexerAction>>& getLexerActions() const; + + /// <summary> + /// Execute the actions encapsulated by this executor within the context of a + /// particular <seealso cref="Lexer"/>. + /// + /// <para>This method calls <seealso cref="IntStream#seek"/> to set the position of the + /// {@code input} <seealso cref="CharStream"/> prior to calling + /// <seealso cref="LexerAction#execute"/> on a position-dependent action. Before the + /// method returns, the input position will be restored to the same position + /// it was in when the method was invoked.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + /// <param name="input"> The input stream which is the source for the current token. + /// When this method is called, the current <seealso cref="IntStream#index"/> for + /// {@code input} should be the start of the following token, i.e. 1 + /// character past the end of the current token. </param> + /// <param name="startIndex"> The token start index. This value may be passed to + /// <seealso cref="IntStream#seek"/> to set the {@code input} position to the beginning + /// of the token. </param> + void execute(Lexer *lexer, CharStream *input, size_t startIndex) const; + + size_t hashCode() const; + + bool equals(const LexerActionExecutor &other) const; + + private: + const std::vector<Ref<const LexerAction>> _lexerActions; + mutable std::atomic<size_t> _hashCode; + }; + + inline bool operator==(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::LexerActionExecutor> { + size_t operator()(const ::antlr4::atn::LexerActionExecutor &lexerActionExecutor) const { + return lexerActionExecutor.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h new file mode 100644 index 0000000000..aab4033415 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents the serialization type of a <seealso cref="LexerAction"/>. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + enum class LexerActionType : size_t { + /// <summary> + /// The type of a <seealso cref="LexerChannelAction"/> action. + /// </summary> + CHANNEL = 0, + /// <summary> + /// The type of a <seealso cref="LexerCustomAction"/> action. + /// </summary> + CUSTOM, + /// <summary> + /// The type of a <seealso cref="LexerModeAction"/> action. + /// </summary> + MODE, + /// <summary> + /// The type of a <seealso cref="LexerMoreAction"/> action. + /// </summary> + MORE, + /// <summary> + /// The type of a <seealso cref="LexerPopModeAction"/> action. + /// </summary> + POP_MODE, + /// <summary> + /// The type of a <seealso cref="LexerPushModeAction"/> action. + /// </summary> + PUSH_MODE, + /// <summary> + /// The type of a <seealso cref="LexerSkipAction"/> action. + /// </summary> + SKIP, + /// <summary> + /// The type of a <seealso cref="LexerTypeAction"/> action. + /// </summary> + TYPE, + + INDEXED_CUSTOM, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp new file mode 100644 index 0000000000..b6cda6cff0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerChannelAction.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerChannelAction::LexerChannelAction(int channel) + : LexerAction(LexerActionType::CHANNEL, false), _channel(channel) {} + +void LexerChannelAction::execute(Lexer *lexer) const { + lexer->setChannel(getChannel()); +} + +size_t LexerChannelAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getChannel()); + return MurmurHash::finish(hash, 2); +} + +bool LexerChannelAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerChannelAction&>(other); + return getChannel() == lexerAction.getChannel(); +} + +std::string LexerChannelAction::toString() const { + return "channel(" + std::to_string(getChannel()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h new file mode 100644 index 0000000000..1a5c53efef --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + using antlr4::Lexer; + + /// <summary> + /// Implements the {@code channel} lexer action by calling + /// <seealso cref="Lexer#setChannel"/> with the assigned channel. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerChannelAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CHANNEL; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code channel} action with the specified channel value. </summary> + /// <param name="channel"> The channel value to pass to <seealso cref="Lexer#setChannel"/>. </param> + explicit LexerChannelAction(int channel); + + /// <summary> + /// Gets the channel to use for the <seealso cref="Token"/> created by the lexer. + /// </summary> + /// <returns> The channel to use for the <seealso cref="Token"/> created by the lexer. </returns> + int getChannel() const { return _channel; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setChannel"/> with the + /// value provided by <seealso cref="#getChannel"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _channel; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp new file mode 100644 index 0000000000..b6edd89ea1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerCustomAction::LexerCustomAction(size_t ruleIndex, size_t actionIndex) + : LexerAction(LexerActionType::CUSTOM, true), _ruleIndex(ruleIndex), _actionIndex(actionIndex) {} + +void LexerCustomAction::execute(Lexer *lexer) const { + lexer->action(nullptr, getRuleIndex(), getActionIndex()); +} + +size_t LexerCustomAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getRuleIndex()); + hash = MurmurHash::update(hash, getActionIndex()); + return MurmurHash::finish(hash, 3); +} + +bool LexerCustomAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerCustomAction&>(other); + return getRuleIndex() == lexerAction.getRuleIndex() && getActionIndex() == lexerAction.getActionIndex(); +} + +std::string LexerCustomAction::toString() const { + return "custom(" + std::to_string(getRuleIndex()) + ", " + std::to_string(getActionIndex()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h new file mode 100644 index 0000000000..7973271c62 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Executes a custom lexer action by calling <seealso cref="Recognizer#action"/> with the + /// rule and action indexes assigned to the custom action. The implementation of + /// a custom action is added to the generated code for the lexer in an override + /// of <seealso cref="Recognizer#action"/> when the grammar is compiled. + /// + /// <para>This class may represent embedded actions created with the <code>{...}</code> + /// syntax in ANTLR 4, as well as actions created for lexer commands where the + /// command argument could not be evaluated when the grammar was compiled.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerCustomAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CUSTOM; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a custom lexer action with the specified rule and action + /// indexes. + /// </summary> + /// <param name="ruleIndex"> The rule index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + /// <param name="actionIndex"> The action index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + LexerCustomAction(size_t ruleIndex, size_t actionIndex); + + /// <summary> + /// Gets the rule index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The rule index for the custom action. </returns> + size_t getRuleIndex() const { return _ruleIndex; } + + /// <summary> + /// Gets the action index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The action index for the custom action. </returns> + size_t getActionIndex() const { return _actionIndex; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>Custom actions are implemented by calling <seealso cref="Lexer#action"/> with the + /// appropriate rule and action indexes.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const size_t _ruleIndex; + const size_t _actionIndex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp new file mode 100644 index 0000000000..114863702c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerIndexedCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + +} + +LexerIndexedCustomAction::LexerIndexedCustomAction(int offset, Ref<const LexerAction> action) + : LexerAction(LexerActionType::INDEXED_CUSTOM, true), _action(std::move(action)), _offset(offset) {} + +void LexerIndexedCustomAction::execute(Lexer *lexer) const { + // assume the input stream position was properly set by the calling code + getAction()->execute(lexer); +} + +size_t LexerIndexedCustomAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getOffset()); + hash = MurmurHash::update(hash, getAction()); + return MurmurHash::finish(hash, 3); +} + +bool LexerIndexedCustomAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerIndexedCustomAction&>(other); + return getOffset() == lexerAction.getOffset() && + cachedHashCodeEqual(cachedHashCode(), lexerAction.cachedHashCode()) && + *getAction() == *lexerAction.getAction(); +} + +std::string LexerIndexedCustomAction::toString() const { + return "indexedCustom(" + std::to_string(getOffset()) + ", " + getAction()->toString() + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h new file mode 100644 index 0000000000..5693bac62b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This implementation of <seealso cref="LexerAction"/> is used for tracking input offsets + /// for position-dependent actions within a <seealso cref="LexerActionExecutor"/>. + /// + /// <para>This action is not serialized as part of the ATN, and is only required for + /// position-dependent lexer actions which appear at a location other than the + /// end of a rule. For more information about DFA optimizations employed for + /// lexer actions, see <seealso cref="LexerActionExecutor#append"/> and + /// <seealso cref="LexerActionExecutor#fixOffsetBeforeMatch"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerIndexedCustomAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::INDEXED_CUSTOM; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new indexed custom action by associating a character offset + /// with a <seealso cref="LexerAction"/>. + /// + /// <para>Note: This class is only required for lexer actions for which + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}.</para> + /// </summary> + /// <param name="offset"> The offset into the input <seealso cref="CharStream"/>, relative to + /// the token start index, at which the specified lexer action should be + /// executed. </param> + /// <param name="action"> The lexer action to execute at a particular offset in the + /// input <seealso cref="CharStream"/>. </param> + LexerIndexedCustomAction(int offset, Ref<const LexerAction> action); + + /// <summary> + /// Gets the location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. The value is interpreted as an offset relative + /// to the token start index. + /// </summary> + /// <returns> The location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. </returns> + int getOffset() const { return _offset; } + + /// <summary> + /// Gets the lexer action to execute. + /// </summary> + /// <returns> A <seealso cref="LexerAction"/> object which executes the lexer action. </returns> + const Ref<const LexerAction>& getAction() const { return _action; } + + void execute(Lexer *lexer) const override; + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const Ref<const LexerAction> _action; + const int _offset; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp new file mode 100644 index 0000000000..a4ca3b3d79 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerModeAction::LexerModeAction(int mode) : LexerAction(LexerActionType::MODE, false), _mode(mode) {} + +void LexerModeAction::execute(Lexer *lexer) const { + lexer->setMode(getMode()); +} + +size_t LexerModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getMode()); + return MurmurHash::finish(hash, 2); +} + +bool LexerModeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerModeAction&>(other); + return getMode() == lexerAction.getMode(); +} + +std::string LexerModeAction::toString() const { + return "mode(" + std::to_string(getMode()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h new file mode 100644 index 0000000000..6fa61a2e67 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code mode} lexer action by calling <seealso cref="Lexer#mode"/> with + /// the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code mode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#mode"/>. </param> + explicit LexerModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code mode} command. </returns> + int getMode() const { return _mode; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#mode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp new file mode 100644 index 0000000000..30df87b7b6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerMoreAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerMoreAction>& LexerMoreAction::getInstance() { + static const Ref<const LexerMoreAction> instance(new LexerMoreAction()); + return instance; +} + +void LexerMoreAction::execute(Lexer *lexer) const { + lexer->more(); +} + +size_t LexerMoreAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerMoreAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerMoreAction::toString() const { + return "more"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h new file mode 100644 index 0000000000..fc4b8fcbfc --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code more} lexer action by calling <seealso cref="Lexer#more"/>. + /// + /// <para>The {@code more} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerMoreAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MORE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<const LexerMoreAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#more"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code more} command. + LexerMoreAction() : LexerAction(LexerActionType::MORE, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp new file mode 100644 index 0000000000..5192049348 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerPopModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerPopModeAction>& LexerPopModeAction::getInstance() { + static const Ref<const LexerPopModeAction> instance(new LexerPopModeAction()); + return instance; +} + +void LexerPopModeAction::execute(Lexer *lexer) const { + lexer->popMode(); +} + +size_t LexerPopModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerPopModeAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerPopModeAction::toString() const { + return "popMode"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h new file mode 100644 index 0000000000..8d712cad8c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code popMode} lexer action by calling <seealso cref="Lexer#popMode"/>. + /// + /// <para>The {@code popMode} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPopModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::POP_MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<const LexerPopModeAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#popMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code popMode} command. + LexerPopModeAction() : LexerAction(LexerActionType::POP_MODE, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp new file mode 100644 index 0000000000..3ebd21fab2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerPushModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerPushModeAction::LexerPushModeAction(int mode) : LexerAction(LexerActionType::PUSH_MODE, false), _mode(mode) {} + +void LexerPushModeAction::execute(Lexer *lexer) const { + lexer->pushMode(getMode()); +} + +size_t LexerPushModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getMode()); + return MurmurHash::finish(hash, 2); +} + +bool LexerPushModeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerPushModeAction&>(other); + return getMode() == lexerAction.getMode(); +} + +std::string LexerPushModeAction::toString() const { + return "pushMode(" + std::to_string(getMode()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h new file mode 100644 index 0000000000..32b706b583 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code pushMode} lexer action by calling + /// <seealso cref="Lexer#pushMode"/> with the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPushModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::PUSH_MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code pushMode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#pushMode"/>. </param> + explicit LexerPushModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code pushMode} command. </returns> + int getMode() const { return _mode; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#pushMode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp new file mode 100644 index 0000000000..72f9de3e1f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerSkipAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerSkipAction>& LexerSkipAction::getInstance() { + static const Ref<const LexerSkipAction> instance(new LexerSkipAction()); + return instance; +} + +void LexerSkipAction::execute(Lexer *lexer) const { + lexer->skip(); +} + +size_t LexerSkipAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerSkipAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerSkipAction::toString() const { + return "skip"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h new file mode 100644 index 0000000000..afdf4702f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code skip} lexer action by calling <seealso cref="Lexer#skip"/>. + /// + /// <para>The {@code skip} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerSkipAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::SKIP; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// Provides a singleton instance of this parameterless lexer action. + static const Ref<const LexerSkipAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#skip"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code skip} command. + LexerSkipAction() : LexerAction(LexerActionType::SKIP, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp new file mode 100644 index 0000000000..55ccf358ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerTypeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerTypeAction::LexerTypeAction(int type) : LexerAction(LexerActionType::TYPE, false), _type(type) {} + +void LexerTypeAction::execute(Lexer *lexer) const { + lexer->setType(getType()); +} + +size_t LexerTypeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getType()); + return MurmurHash::finish(hash, 2); +} + +bool LexerTypeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerTypeAction&>(other); + return getType() == lexerAction.getType(); +} + +std::string LexerTypeAction::toString() const { + return "type(" + std::to_string(getType()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h new file mode 100644 index 0000000000..1cd7d71fd3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Implements the {@code type} lexer action by calling <seealso cref="Lexer#setType"/> + /// with the assigned type. + class ANTLR4CPP_PUBLIC LexerTypeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::TYPE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code type} action with the specified token type value. </summary> + /// <param name="type"> The type to assign to the token using <seealso cref="Lexer#setType"/>. </param> + explicit LexerTypeAction(int type); + + /// <summary> + /// Gets the type to assign to a token created by the lexer. </summary> + /// <returns> The type to assign to a token created by the lexer. </returns> + int getType() const { return _type; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setType"/> with the + /// value provided by <seealso cref="#getType"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _type; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp new file mode 100644 index 0000000000..aa3f9124c7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LookaheadEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +LookaheadEventInfo::LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->predictedAlt = predictedAlt; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h new file mode 100644 index 0000000000..f5fc24fde2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// This class represents profiling event information for tracking the lookahead + /// depth required in order to make a prediction. + class ANTLR4CPP_PUBLIC LookaheadEventInfo : public DecisionEventInfo { + public: + /// The alternative chosen by adaptivePredict(), not necessarily + /// the outermost alt shown for a rule; left-recursive rules have + /// user-level alts that differ from the rewritten rule with a (...) block + /// and a (..)* loop. + size_t predictedAlt = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="LookaheadEventInfo"/> class with + /// the specified detailed lookahead information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the necessary + /// information to determine the result of a prediction, or {@code null} if + /// the final configuration set is not available </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the prediction was finally made </param> + /// <param name="fullCtx"> {@code true} if the current lookahead is part of an LL + /// prediction; otherwise, {@code false} if the current lookahead is part of + /// an SLL prediction </param> + LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h new file mode 100644 index 0000000000..2616b1c4b8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Mark the end of a * or + loop. + class ANTLR4CPP_PUBLIC LoopEndState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::LOOP_END; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + ATNState *loopBackState = nullptr; + + LoopEndState() : ATNState(ATNStateType::LOOP_END) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp new file mode 100644 index 0000000000..ba796d7188 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/NotSetTransition.h" +#include "atn/ATNState.h" +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::atn; + +NotSetTransition::NotSetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::NOT_SET, target, std::move(set)) {} + +bool NotSetTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol + && !SetTransition::matches(symbol, minVocabSymbol, maxVocabSymbol); +} + +std::string NotSetTransition::toString() const { + return "NOT_SET " + Transition::toString() + " { " + SetTransition::toString() + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h new file mode 100644 index 0000000000..ef937a60fe --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SetTransition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC NotSetTransition final : public SetTransition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::NOT_SET; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + NotSetTransition(ATNState *target, misc::IntervalSet set); + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp new file mode 100644 index 0000000000..48655424d8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/OrderedATNConfigSet.h" + +using namespace antlr4::atn; + +size_t OrderedATNConfigSet::hashCode(const ATNConfig &atnConfig) const { + return atnConfig.hashCode(); +} + +bool OrderedATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const { + return lhs == rhs; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h new file mode 100644 index 0000000000..18bf6bcb21 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC OrderedATNConfigSet final : public ATNConfigSet { + public: + OrderedATNConfigSet() = default; + + private: + size_t hashCode(const ATNConfig &atnConfig) const override; + + bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp new file mode 100644 index 0000000000..95a89ac855 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ProfilingATNSimulator.h" +#include "dfa/DFA.h" + +#include "atn/ParseInfo.h" + +using namespace antlr4::atn; + +ParseInfo::ParseInfo(ProfilingATNSimulator *atnSimulator) : _atnSimulator(atnSimulator) { +} + +ParseInfo::~ParseInfo() { +} + +std::vector<DecisionInfo> ParseInfo::getDecisionInfo() { + return _atnSimulator->getDecisionInfo(); +} + +std::vector<size_t> ParseInfo::getLLDecisions() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + std::vector<size_t> LL; + for (size_t i = 0; i < decisions.size(); ++i) { + long long fallBack = decisions[i].LL_Fallback; + if (fallBack > 0) { + LL.push_back(i); + } + } + return LL; +} + +long long ParseInfo::getTotalTimeInPrediction() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long t = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + t += decisions[i].timeInPrediction; + } + return t; +} + +long long ParseInfo::getTotalSLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); i++) { + k += decisions[i].LL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalSLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +size_t ParseInfo::getDFASize() { + size_t n = 0; + std::vector<dfa::DFA> &decisionToDFA = _atnSimulator->decisionToDFA; + for (size_t i = 0; i < decisionToDFA.size(); ++i) { + n += getDFASize(i); + } + return n; +} + +size_t ParseInfo::getDFASize(size_t decision) { + dfa::DFA &decisionToDFA = _atnSimulator->decisionToDFA[decision]; + return decisionToDFA.states.size(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h new file mode 100644 index 0000000000..7ced7de433 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ProfilingATNSimulator; + + /// This class provides access to specific and aggregate statistics gathered + /// during profiling of a parser. + class ANTLR4CPP_PUBLIC ParseInfo { + public: + ParseInfo(ProfilingATNSimulator *atnSimulator); + ParseInfo(ParseInfo const&) = default; + virtual ~ParseInfo(); + + ParseInfo& operator=(ParseInfo const&) = default; + + /// <summary> + /// Gets an array of <seealso cref="DecisionInfo"/> instances containing the profiling + /// information gathered for each decision in the ATN. + /// </summary> + /// <returns> An array of <seealso cref="DecisionInfo"/> instances, indexed by decision + /// number. </returns> + virtual std::vector<DecisionInfo> getDecisionInfo(); + + /// <summary> + /// Gets the decision numbers for decisions that required one or more + /// full-context predictions during parsing. These are decisions for which + /// <seealso cref="DecisionInfo#LL_Fallback"/> is non-zero. + /// </summary> + /// <returns> A list of decision numbers which required one or more + /// full-context predictions during parsing. </returns> + virtual std::vector<size_t> getLLDecisions(); + + /// <summary> + /// Gets the total time spent during prediction across all decisions made + /// during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#timeInPrediction"/> for all decisions. + /// </summary> + virtual long long getTotalTimeInPrediction(); + + /// <summary> + /// Gets the total number of SLL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#SLL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalSLLLookaheadOps(); + + /// <summary> + /// Gets the total number of LL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#LL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalLLLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalSLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for LL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL and LL + /// prediction across all decisions made during parsing. + /// + /// <para> + /// This value is the sum of <seealso cref="#getTotalSLLATNLookaheadOps"/> and + /// <seealso cref="#getTotalLLATNLookaheadOps"/>.</para> + /// </summary> + virtual long long getTotalATNLookaheadOps(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for all + /// decisions in the ATN. + /// </summary> + virtual size_t getDFASize(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for a + /// particular decision. + /// </summary> + virtual size_t getDFASize(size_t decision); + + protected: + const ProfilingATNSimulator *_atnSimulator; // non-owning, we are created by this simulator. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp new file mode 100644 index 0000000000..ad1da03570 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp @@ -0,0 +1,1387 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "NoViableAltException.h" +#include "atn/DecisionState.h" +#include "ParserRuleContext.h" +#include "misc/IntervalSet.h" +#include "Parser.h" +#include "CommonTokenStream.h" +#include "atn/NotSetTransition.h" +#include "atn/AtomTransition.h" +#include "atn/RuleTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/ActionTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "internal/Synchronization.h" + +#include "atn/StarLoopEntryState.h" +#include "atn/BlockStartState.h" +#include "atn/BlockEndState.h" + +#include "misc/Interval.h" +#include "ANTLRErrorListener.h" + +#include "Vocabulary.h" +#include "support/Arrays.h" +#include "support/Casts.h" + +#include "atn/ParserATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_LIST_ATN_DECISIONS 0 +#define DEBUG_DFA 0 +#define RETRY_DEBUG 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +const bool ParserATNSimulator::TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = ParserATNSimulator::getLrLoopSetting(); + +ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(parser, atn, decisionToDFA, sharedContextCache, ParserATNSimulatorOptions()) {} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache, + const ParserATNSimulatorOptions &options) +: ATNSimulator(atn, sharedContextCache), decisionToDFA(decisionToDFA), parser(parser), + mergeCache(options.getPredictionContextMergeCacheOptions()) { + InitializeInstanceFields(); +} + +void ParserATNSimulator::reset() { +} + +void ParserATNSimulator::clearDFA() { + int size = (int)decisionToDFA.size(); + decisionToDFA.clear(); + for (int d = 0; d < size; ++d) { + decisionToDFA.push_back(dfa::DFA(atn.getDecisionState(d), d)); + } +} + +size_t ParserATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "adaptivePredict decision " << decision << " exec LA(1)==" << getLookaheadName(input) << " line " + << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + _input = input; + _startIndex = input->index(); + _outerContext = outerContext; + dfa::DFA &dfa = decisionToDFA[decision]; + _dfa = &dfa; + + ssize_t m = input->mark(); + size_t index = _startIndex; + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + auto onExit = finally([this, input, index, m] { + if (mergeCache.getOptions().getClearEveryN() != 0) { + if (++_mergeCacheCounter == mergeCache.getOptions().getClearEveryN()) { + mergeCache.clear(); + _mergeCacheCounter = 0; + } + } + _dfa = nullptr; + input->seek(index); + input->release(m); + }); + + dfa::DFAState *s0; + { + SharedLock<SharedMutex> stateLock(atn._stateMutex); + if (dfa.isPrecedenceDfa()) { + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + s0 = dfa.getPrecedenceStartState(parser->getPrecedence()); + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.s0; + } + } + + if (s0 == nullptr) { + auto s0_closure = computeStartState(dfa.atnStartState, &ParserRuleContext::EMPTY, false); + std::unique_ptr<dfa::DFAState> newState; + std::unique_ptr<dfa::DFAState> oldState; + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + dfa::DFAState* ds0 = dfa.s0; + if (dfa.isPrecedenceDfa()) { + /* If this is a precedence DFA, we use applyPrecedenceFilter + * to convert the computed start state to a precedence start + * state. We then use DFA.setPrecedenceStartState to set the + * appropriate start state for the precedence level rather + * than simply setting DFA.s0. + */ + ds0->configs = std::move(s0_closure); // not used for prediction but useful to know start configs anyway + newState = std::make_unique<dfa::DFAState>(applyPrecedenceFilter(ds0->configs.get())); + s0 = addDFAState(dfa, newState.get()); + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + dfa.setPrecedenceStartState(parser->getPrecedence(), s0); + } else { + newState = std::make_unique<dfa::DFAState>(std::move(s0_closure)); + s0 = addDFAState(dfa, newState.get()); + if (ds0 != s0) { + oldState.reset(ds0); + dfa.s0 = s0; + } + } + if (s0 == newState.get()) { + newState.release(); + } + } + + // We can start with an existing DFA. + size_t alt = execATN(dfa, s0, input, index, outerContext != nullptr ? outerContext : &ParserRuleContext::EMPTY); + + return alt; +} + +size_t ParserATNSimulator::execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "execATN decision " << dfa.decision << " exec LA(1)==" << getLookaheadName(input) << + " line " << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + dfa::DFAState *previousD = s0; + +#if DEBUG_ATN == 1 + std::cout << "s0 = " << s0 << std::endl; +#endif + + size_t t = input->LA(1); + + while (true) { // while more work + dfa::DFAState *D = getExistingTargetState(previousD, t); + if (D == nullptr) { + D = computeTargetState(dfa, previousD, t); + } + + if (D == ERROR.get()) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previousD->configs.get(), startIndex, false); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD->configs.get(), outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + + throw e; + } + + if (D->requiresFullContext && _mode != PredictionMode::SLL) { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + BitSet conflictingAlts; + if (D->predicates.size() != 0) { +#if DEBUG_ATN == 1 + std::cout << "DFA state has preds in DFA sim LL failover" << std::endl; +#endif + + size_t conflictIndex = input->index(); + if (conflictIndex != startIndex) { + input->seek(startIndex); + } + + conflictingAlts = evalSemanticContext(D->predicates, outerContext, true); + if (conflictingAlts.count() == 1) { +#if DEBUG_ATN == 1 + std::cout << "Full LL avoided" << std::endl; +#endif + + return conflictingAlts.nextSetBit(0); + } + + if (conflictIndex != startIndex) { + // restore the index so reporting the fallback to full + // context occurs with the index at the correct spot + input->seek(conflictIndex); + } + } + +#if DEBUG_DFA == 1 + std::cout << "ctx sensitive state " << outerContext << " in " << D << std::endl; +#endif + + bool fullCtx = true; + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(dfa.atnStartState, outerContext, fullCtx); + reportAttemptingFullContext(dfa, conflictingAlts, D->configs.get(), startIndex, input->index()); + size_t alt = execATNWithFullContext(dfa, D, s0_closure.get(), input, startIndex, outerContext); + return alt; + } + + if (D->isAcceptState) { + if (D->predicates.empty()) { + return D->prediction; + } + + size_t stopIndex = input->index(); + input->seek(startIndex); + BitSet alts = evalSemanticContext(D->predicates, outerContext, true); + switch (alts.count()) { + case 0: + throw noViableAlt(input, outerContext, D->configs.get(), startIndex, false); + + case 1: + return alts.nextSetBit(0); + + default: + // report ambiguity after predicate evaluation to make sure the correct + // set of ambig alts is reported. + reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D->configs.get()); + return alts.nextSetBit(0); + } + } + + previousD = D; + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } +} + +dfa::DFAState *ParserATNSimulator::getExistingTargetState(dfa::DFAState *previousD, size_t t) { + dfa::DFAState* retval; + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + auto iterator = previousD->edges.find(t); + retval = (iterator == previousD->edges.end()) ? nullptr : iterator->second; + return retval; +} + +dfa::DFAState *ParserATNSimulator::computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) { + std::unique_ptr<ATNConfigSet> reach = computeReachSet(previousD->configs.get(), t, false); + if (reach == nullptr) { + addDFAEdge(dfa, previousD, t, ERROR.get()); + return ERROR.get(); + } + + // create new target state; we'll add to DFA after it's complete + dfa::DFAState *D = new dfa::DFAState(std::move(reach)); /* mem-check: managed by the DFA or deleted below, "reach" is no longer valid now. */ + size_t predictedAlt = getUniqueAlt(D->configs.get()); + + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D->isAcceptState = true; + D->configs->uniqueAlt = predictedAlt; + D->prediction = predictedAlt; + } else if (PredictionModeClass::hasSLLConflictTerminatingPrediction(_mode, D->configs.get())) { + // MORE THAN ONE VIABLE ALTERNATIVE + D->configs->conflictingAlts = getConflictingAlts(D->configs.get()); + D->requiresFullContext = true; + // in SLL-only mode, we will stop at this state and return the minimum alt + D->isAcceptState = true; + D->prediction = D->configs->conflictingAlts.nextSetBit(0); + } + + if (D->isAcceptState && D->configs->hasSemanticContext) { + predicateDFAState(D, atn.getDecisionState(dfa.decision)); + if (D->predicates.size() != 0) { + D->prediction = ATN::INVALID_ALT_NUMBER; + } + } + + // all adds to dfa are done after we've created full D state + dfa::DFAState *state = addDFAEdge(dfa, previousD, t, D); + if (state != D) { + delete D; // If the new state exists already we don't need it and use the existing one instead. + } + return state; +} + +void ParserATNSimulator::predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + size_t nalts = decisionState->transitions.size(); + + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState->configs.get()); + std::vector<Ref<const SemanticContext>> altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState->configs.get(), nalts); + if (!altToPred.empty()) { + dfaState->predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); + dfaState->prediction = ATN::INVALID_ALT_NUMBER; // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState->prediction = altsToCollectPredsFrom.nextSetBit(0); + } +} + +size_t ParserATNSimulator::execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext) { + + bool fullCtx = true; + bool foundExactAmbig = false; + + std::unique_ptr<ATNConfigSet> reach; + ATNConfigSet *previous = s0; + input->seek(startIndex); + size_t t = input->LA(1); + size_t predictedAlt; + + while (true) { + reach = computeReachSet(previous, t, fullCtx); + if (reach == nullptr) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex, previous != s0); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + if (previous != s0) // Don't delete the start set. + delete previous; + previous = nullptr; + + std::vector<BitSet> altSubSets = PredictionModeClass::getConflictingAltSubsets(reach.get()); + reach->uniqueAlt = getUniqueAlt(reach.get()); + // unique prediction? + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + predictedAlt = reach->uniqueAlt; + break; + } + if (_mode != PredictionMode::LL_EXACT_AMBIG_DETECTION) { + predictedAlt = PredictionModeClass::resolvesToJustOneViableAlt(altSubSets); + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + break; + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if (PredictionModeClass::allSubsetsConflict(altSubSets) && PredictionModeClass::allSubsetsEqual(altSubSets)) { + foundExactAmbig = true; + predictedAlt = PredictionModeClass::getSingleViableAlt(altSubSets); + break; + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + previous = reach.release(); + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } + + if (previous != s0) // Don't delete the start set + delete previous; + + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + reportContextSensitivity(dfa, predictedAlt, reach.get(), startIndex, input->index()); + return predictedAlt; + } + + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + /* + In non-exact ambiguity detection mode, we might actually be able to + detect an exact ambiguity, but I'm not going to spend the cycles + needed to check. We only emit ambiguity warnings in exact ambiguity + mode. + + For example, we might know that we have conflicting configurations. + But, that does not mean that there is no way forward without a + conflict. It's possible to have nonconflicting alt subsets as in: + + LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + from + + [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + + In this case, (17,1,[5 $]) indicates there is some next sequence that + would resolve this without conflict to alternative 1. Any other viable + next sequence, however, is associated with a conflict. We stop + looking for input because no amount of further lookahead will alter + the fact that we should predict alternative 1. We just can't say for + sure that there is an ambiguity without looking further. + */ + reportAmbiguity(dfa, D, startIndex, input->index(), foundExactAmbig, reach->getAlts(), reach.get()); + + return predictedAlt; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeReachSet(ATNConfigSet *closure_, size_t t, bool fullCtx) { + + std::unique_ptr<ATNConfigSet> intermediate(new ATNConfigSet(fullCtx)); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ + std::vector<Ref<ATNConfig>> skippedStopStates; + + // First figure out where we can reach on input t + for (const auto &c : closure_->configs) { + if (RuleStopState::is(c->state)) { + assert(c->context->isEmpty()); + + if (fullCtx || t == Token::EOF) { + skippedStopStates.push_back(c); + } + + continue; + } + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + const Transition *trans = c->state->transitions[ti].get(); + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + intermediate->add(std::make_shared<ATNConfig>(*c, target), &mergeCache); + } + } + } + + // Now figure out where the reach operation can take us... + std::unique_ptr<ATNConfigSet> reach; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates, or when the current symbol is EOF. + */ + if (skippedStopStates.empty() && t != Token::EOF) { + if (intermediate->size() == 1) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } else if (getUniqueAlt(intermediate.get()) != ATN::INVALID_ALT_NUMBER) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } + } + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == nullptr) { + reach.reset(new ATNConfigSet(fullCtx)); + ATNConfig::Set closureBusy; + + bool treatEofAsEpsilon = t == Token::EOF; + for (const auto &c : intermediate->configs) { + closure(c, reach.get(), closureBusy, false, fullCtx, treatEofAsEpsilon); + } + } + + if (t == IntStream::EOF) { + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * When reach==intermediate, no closure operation was performed. In + * this case, removeAllConfigsNotInRuleStopState needs to check for + * reachable rule stop states as well as configurations already in + * a rule stop state. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + ATNConfigSet *temp = removeAllConfigsNotInRuleStopState(reach.get(), *reach == *intermediate); + if (temp != reach.get()) + reach.reset(temp); // We got a new set, so use that. + } + + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates.size() > 0 && (!fullCtx || !PredictionModeClass::hasConfigInRuleStopState(reach.get()))) { + assert(!skippedStopStates.empty()); + + for (const auto &c : skippedStopStates) { + reach->add(c, &mergeCache); + } + } + + if (reach->isEmpty()) { + return nullptr; + } + return reach; +} + +ATNConfigSet* ParserATNSimulator::removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, + bool lookToEndOfRule) { + if (PredictionModeClass::allConfigsInRuleStopStates(configs)) { + return configs; + } + + ATNConfigSet *result = new ATNConfigSet(configs->fullCtx); /* mem-check: released by caller */ + + for (const auto &config : configs->configs) { + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + result->add(config, &mergeCache); + continue; + } + + if (lookToEndOfRule && config->state->epsilonOnlyTransitions) { + misc::IntervalSet nextTokens = atn.nextTokens(config->state); + if (nextTokens.contains(Token::EPSILON)) { + ATNState *endOfRuleState = atn.ruleToStopState[config->state->ruleIndex]; + result->add(std::make_shared<ATNConfig>(*config, endOfRuleState), &mergeCache); + } + } + } + + return result; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx) { + // always at least the implicit call to start rule + Ref<const PredictionContext> initialContext = PredictionContext::fromRuleContext(atn, ctx); + std::unique_ptr<ATNConfigSet> configs(new ATNConfigSet(fullCtx)); + + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<ATNConfig> c = std::make_shared<ATNConfig>(target, (int)i + 1, initialContext); + ATNConfig::Set closureBusy; + closure(c, configs.get(), closureBusy, true, fullCtx, false); + } + + return configs; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::applyPrecedenceFilter(ATNConfigSet *configs) { + std::map<size_t, Ref<const PredictionContext>> statesFromAlt1; + std::unique_ptr<ATNConfigSet> configSet(new ATNConfigSet(configs->fullCtx)); + for (const auto &config : configs->configs) { + // handle alt 1 first + if (config->alt != 1) { + continue; + } + + Ref<const SemanticContext> updatedContext = config->semanticContext->evalPrecedence(parser, _outerContext); + if (updatedContext == nullptr) { + // the configuration was eliminated + continue; + } + + statesFromAlt1[config->state->stateNumber] = config->context; + if (updatedContext != config->semanticContext) { + configSet->add(std::make_shared<ATNConfig>(*config, updatedContext), &mergeCache); + } + else { + configSet->add(config, &mergeCache); + } + } + + for (const auto &config : configs->configs) { + if (config->alt == 1) { + // already handled + continue; + } + + if (!config->isPrecedenceFilterSuppressed()) { + /* In the future, this elimination step could be updated to also + * filter the prediction context for alternatives predicting alt>1 + * (basically a graph subtraction algorithm). + */ + auto iterator = statesFromAlt1.find(config->state->stateNumber); + if (iterator != statesFromAlt1.end() && *iterator->second == *config->context) { + // eliminated + continue; + } + } + + configSet->add(config, &mergeCache); + } + + return configSet; +} + +atn::ATNState* ParserATNSimulator::getReachableTarget(const Transition *trans, size_t ttype) { + if (trans->matches(ttype, 0, atn.maxTokenType)) { + return trans->target; + } + + return nullptr; +} + +// Note that caller must memory manage the returned value from this function +std::vector<Ref<const SemanticContext>> ParserATNSimulator::getPredsForAmbigAlts(const BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts) { + // REACH=[1|1|[]|0:0, 1|2|[]|0:1] + /* altToPred starts as an array of all null contexts. The entry at index i + * corresponds to alternative i. altToPred[i] may have one of three values: + * 1. null: no ATNConfig c is found such that c.alt==i + * 2. SemanticContext.NONE: At least one ATNConfig c exists such that + * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + * alt i has at least one un-predicated config. + * 3. Non-NONE Semantic Context: There exists at least one, and for all + * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + * + * From this, it is clear that NONE||anything==NONE. + */ + std::vector<Ref<const SemanticContext>> altToPred(nalts + 1); + + for (const auto &c : configs->configs) { + if (ambigAlts.test(c->alt)) { + altToPred[c->alt] = SemanticContext::Or(altToPred[c->alt], c->semanticContext); + } + } + + size_t nPredAlts = 0; + for (size_t i = 1; i <= nalts; i++) { + if (altToPred[i] == nullptr) { + altToPred[i] = SemanticContext::Empty::Instance; + } else if (altToPred[i] != SemanticContext::Empty::Instance) { + nPredAlts++; + } + } + + // nonambig alts are null in altToPred + if (nPredAlts == 0) { + altToPred.clear(); + } +#if DEBUG_ATN == 1 + std::cout << "getPredsForAmbigAlts result " << Arrays::toString(altToPred) << std::endl; +#endif + + return altToPred; +} + +std::vector<dfa::DFAState::PredPrediction> ParserATNSimulator::getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + const std::vector<Ref<const SemanticContext>> &altToPred) { + bool containsPredicate = std::find_if(altToPred.begin(), altToPred.end(), [](const Ref<const SemanticContext> &context) { + return context != SemanticContext::Empty::Instance; + }) != altToPred.end(); + std::vector<dfa::DFAState::PredPrediction> pairs; + if (containsPredicate) { + for (size_t i = 1; i < altToPred.size(); i++) { + const auto &pred = altToPred[i]; + assert(pred != nullptr); // unpredicted is indicated by SemanticContext.NONE + if (ambigAlts.test(i)) { + pairs.emplace_back(pred, static_cast<int>(i)); + } + } + } + return pairs; +} + +size_t ParserATNSimulator::getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext) +{ + std::pair<ATNConfigSet *, ATNConfigSet *> sets = splitAccordingToSemanticValidity(configs, outerContext); + std::unique_ptr<ATNConfigSet> semValidConfigs(sets.first); + std::unique_ptr<ATNConfigSet> semInvalidConfigs(sets.second); + size_t alt = getAltThatFinishedDecisionEntryRule(semValidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // semantically/syntactically viable path exists + return alt; + } + // Is there a syntactically valid path with a failed pred? + if (!semInvalidConfigs->configs.empty()) { + alt = getAltThatFinishedDecisionEntryRule(semInvalidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // syntactically viable path exists + return alt; + } + } + return ATN::INVALID_ALT_NUMBER; +} + +size_t ParserATNSimulator::getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs) { + misc::IntervalSet alts; + for (const auto &c : configs->configs) { + if (c->getOuterContextDepth() > 0 || (c->state != nullptr && c->state->getStateType() == ATNStateType::RULE_STOP && c->context->hasEmptyPath())) { + alts.add(c->alt); + } + } + if (alts.size() == 0) { + return ATN::INVALID_ALT_NUMBER; + } + return alts.getMinElement(); +} + +std::pair<ATNConfigSet *, ATNConfigSet *> ParserATNSimulator::splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext) { + + // mem-check: both pointers must be freed by the caller. + ATNConfigSet *succeeded(new ATNConfigSet(configs->fullCtx)); + ATNConfigSet *failed(new ATNConfigSet(configs->fullCtx)); + for (const auto &c : configs->configs) { + if (c->semanticContext != SemanticContext::Empty::Instance) { + bool predicateEvaluationResult = evalSemanticContext(c->semanticContext, outerContext, c->alt, configs->fullCtx); + if (predicateEvaluationResult) { + succeeded->add(c); + } else { + failed->add(c); + } + } else { + succeeded->add(c); + } + } + return { succeeded, failed }; +} + +BitSet ParserATNSimulator::evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions, + ParserRuleContext *outerContext, bool complete) { + BitSet predictions; + for (const auto &prediction : predPredictions) { + if (prediction.pred == SemanticContext::Empty::Instance) { + predictions.set(prediction.alt); + if (!complete) { + break; + } + continue; + } + + bool fullCtx = false; // in dfa + bool predicateEvaluationResult = evalSemanticContext(prediction.pred, outerContext, prediction.alt, fullCtx); +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "eval pred " << prediction.toString() << " = " << predicateEvaluationResult << std::endl; +#endif + + if (predicateEvaluationResult) { +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "PREDICT " << prediction.alt << std::endl; +#endif + + predictions.set(prediction.alt); + if (!complete) { + break; + } + } + } + + return predictions; +} + +bool ParserATNSimulator::evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t /*alt*/, bool /*fullCtx*/) { + return pred->eval(parser, parserCallStack); +} + +void ParserATNSimulator::closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) { + const int initialDepth = 0; + closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, initialDepth, treatEofAsEpsilon); + + assert(!fullCtx || !configs->dipsIntoOuterContext); +} + +void ParserATNSimulator::closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, + ATNConfig::Set &closureBusy, bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if (!config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE) { + if (fullCtx) { + configs->add(std::make_shared<ATNConfig>(*config, config->state, PredictionContext::EMPTY), &mergeCache); + continue; + } else { + // we have no context info, just chase follow links (if greedy) +#if DEBUG_ATN == 1 + std::cout << "FALLING off rule " << getRuleName(config->state->ruleIndex) << std::endl; +#endif + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); + } + continue; + } + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state + Ref<ATNConfig> c = std::make_shared<ATNConfig>(returnState, config->alt, newContext, config->semanticContext); + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + // + // This assignment also propagates the + // isPrecedenceFilterSuppressed() value to the new + // configuration. + c->reachesIntoOuterContext = config->reachesIntoOuterContext; + assert(depth > INT_MIN); + + closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon); + } + return; + } else if (fullCtx) { + // reached end of start rule + configs->add(config, &mergeCache); + return; + } else { + // else if we have no context info, just chase follow links (if greedy) + } + } + + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); +} + +void ParserATNSimulator::closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + ATNState *p = config->state; + // optimization + if (!p->epsilonOnlyTransitions) { + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. + configs->add(config, &mergeCache); + } + + for (size_t i = 0; i < p->transitions.size(); i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config.get())) + continue; + + const Transition *t = p->transitions[i].get(); + bool continueCollecting = !(t != nullptr && t->getTransitionType() == TransitionType::ACTION) && collectPredicates; + Ref<ATNConfig> c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon); + if (c != nullptr) { + int newDepth = depth; + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + assert(!fullCtx); + + // target fell off end of rule; mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if this is > 0. + + if (closureBusy.count(c) > 0) { + // avoid infinite recursion for right-recursive rules + continue; + } + closureBusy.insert(c); + + if (_dfa != nullptr && _dfa->isPrecedenceDfa()) { + size_t outermostPrecedenceReturn = downCast<const EpsilonTransition *>(t)->outermostPrecedenceReturn(); + if (outermostPrecedenceReturn == _dfa->atnStartState->ruleIndex) { + c->setPrecedenceFilterSuppressed(true); + } + } + + c->reachesIntoOuterContext++; + + if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + configs->dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method + assert(newDepth > INT_MIN); + + newDepth--; +#if DEBUG_DFA == 1 + std::cout << "dips into outer ctx: " << c << std::endl; +#endif + + } else if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + if (t != nullptr && t->getTransitionType() == TransitionType::RULE) { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if (newDepth >= 0) { + newDepth++; + } + } + + closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon); + } + } +} + +bool ParserATNSimulator::canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) + return false; + + ATNState *p = config->state; + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if (p->getStateType() != ATNStateType::STAR_LOOP_ENTRY || + !((StarLoopEntryState *)p)->isPrecedenceDecision || // Are we the special loop entry/exit state? + config->context->isEmpty() || // If SLL wildcard + config->context->hasEmptyPath()) + { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + size_t numCtxs = config->context->size(); + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + if (returnState->ruleIndex != p->ruleIndex) + return false; + } + + BlockStartState *decisionStartState = (BlockStartState *)p->transitions[0]->target; + size_t blockEndStateNum = decisionStartState->endState->stateNumber; + BlockEndState *blockEndState = (BlockEndState *)atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + size_t returnStateNumber = config->context->getReturnState(i); + ATNState *returnState = atn.states[returnStateNumber]; + // All states must have single outgoing epsilon edge. + if (returnState->transitions.size() != 1 || !returnState->transitions[0]->isEpsilon()) + { + return false; + } + + // Look for prefix op case like 'not expr', (' type ')' expr + ATNState *returnStateTarget = returnState->transitions[0]->target; + if (returnState->getStateType() == ATNStateType::BLOCK_END && returnStateTarget == p) { + continue; + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget->getStateType() == ATNStateType::BLOCK_END && + returnStateTarget->transitions.size() == 1 && + returnStateTarget->transitions[0]->isEpsilon() && + returnStateTarget->transitions[0]->target == p) + { + continue; + } + + // Anything else ain't conforming. + return false; + } + + return true; +} + +std::string ParserATNSimulator::getRuleName(size_t index) { + if (parser != nullptr) { + return parser->getRuleNames()[index]; + } + return "<rule " + std::to_string(index) + ">"; +} + +Ref<ATNConfig> ParserATNSimulator::getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon) { + switch (t->getTransitionType()) { + case TransitionType::RULE: + return ruleTransition(config, static_cast<const RuleTransition*>(t)); + + case TransitionType::PRECEDENCE: + return precedenceTransition(config, static_cast<const PrecedencePredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case TransitionType::PREDICATE: + return predTransition(config, static_cast<const PredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case TransitionType::ACTION: + return actionTransition(config, static_cast<const ActionTransition*>(t)); + + case TransitionType::EPSILON: + return std::make_shared<ATNConfig>(*config, t->target); + + case TransitionType::ATOM: + case TransitionType::RANGE: + case TransitionType::SET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, 0, 1)) { + return std::make_shared<ATNConfig>(*config, t->target); + } + } + + return nullptr; + + default: + return nullptr; + } +} + +Ref<ATNConfig> ParserATNSimulator::actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "ACTION edge " << t->ruleIndex << ":" << t->actionIndex << std::endl; +#endif + + return std::make_shared<ATNConfig>(*config, t->target); +} + +Ref<ATNConfig> ParserATNSimulator::precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->getPrecedence() << ">=_p" << ", ctx dependent=true" << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c; + if (collectPredicates && inContext) { + const auto &predicate = pt->getPredicate(); + + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(predicate, _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(*config, pt->target); // no pred context + } + } else { + Ref<const SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(*config, pt->target, std::move(newSemCtx)); + } + } else { + c = std::make_shared<ATNConfig>(*config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->getRuleIndex() << ":" << pt->getPredIndex() << ", ctx dependent=" << pt->isCtxDependent() << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c = nullptr; + if (collectPredicates && (!pt->isCtxDependent() || (pt->isCtxDependent() && inContext))) { + const auto &predicate = pt->getPredicate(); + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(predicate, _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(*config, pt->target); // no pred context + } + } else { + Ref<const SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(*config, pt->target, std::move(newSemCtx)); + } + } else { + c = std::make_shared<ATNConfig>(*config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "CALL rule " << getRuleName(t->target->ruleIndex) << ", ctx=" << config->context << std::endl; +#endif + + atn::ATNState *returnState = t->followState; + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, returnState->stateNumber); + return std::make_shared<ATNConfig>(*config, t->target, newContext); +} + +BitSet ParserATNSimulator::getConflictingAlts(ATNConfigSet *configs) { + std::vector<BitSet> altsets = PredictionModeClass::getConflictingAltSubsets(configs); + return PredictionModeClass::getAlts(altsets); +} + +BitSet ParserATNSimulator::getConflictingAltsOrUniqueAlt(ATNConfigSet *configs) { + BitSet conflictingAlts; + if (configs->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + conflictingAlts.set(configs->uniqueAlt); + } else { + conflictingAlts = configs->conflictingAlts; + } + return conflictingAlts; +} + +std::string ParserATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + + const dfa::Vocabulary &vocabulary = parser != nullptr ? parser->getVocabulary() : dfa::Vocabulary(); + std::string displayName = vocabulary.getDisplayName(t); + if (displayName == std::to_string(t)) { + return displayName; + } + + return displayName + "<" + std::to_string(t) + ">"; +} + +std::string ParserATNSimulator::getLookaheadName(TokenStream *input) { + return getTokenName(input->LA(1)); +} + +void ParserATNSimulator::dumpDeadEndConfigs(NoViableAltException &nvae) { + std::cerr << "dead end configs: "; + for (const auto &c : nvae.getDeadEndConfigs()->configs) { + std::string trans = "no edges"; + if (c->state->transitions.size() > 0) { + const Transition *t = c->state->transitions[0].get(); + if (t != nullptr && t->getTransitionType() == TransitionType::ATOM) { + const AtomTransition *at = static_cast<const AtomTransition*>(t); + trans = "Atom " + getTokenName(at->_label); + } else if (t != nullptr && t->getTransitionType() == TransitionType::SET) { + const SetTransition *st = static_cast<const SetTransition*>(t); + trans = "Set "; + trans += st->set.toString(); + } else if (t != nullptr && t->getTransitionType() == TransitionType::NOT_SET) { + const SetTransition *st = static_cast<const NotSetTransition*>(t); + trans = "~Set "; + trans += st->set.toString(); + } + } + std::cerr << c->toString(true) + ":" + trans; + } +} + +NoViableAltException ParserATNSimulator::noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs) { + return NoViableAltException(parser, input, input->get(startIndex), input->LT(1), configs, outerContext, deleteConfigs); +} + +size_t ParserATNSimulator::getUniqueAlt(ATNConfigSet *configs) { + size_t alt = ATN::INVALID_ALT_NUMBER; + for (const auto &c : configs->configs) { + if (alt == ATN::INVALID_ALT_NUMBER) { + alt = c->alt; // found first alt + } else if (c->alt != alt) { + return ATN::INVALID_ALT_NUMBER; + } + } + return alt; +} + +dfa::DFAState *ParserATNSimulator::addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to) { +#if DEBUG_DFA == 1 + std::cout << "EDGE " << from << " -> " << to << " upon " << getTokenName(t) << std::endl; +#endif + + if (to == nullptr) { + return nullptr; + } + + { + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + to = addDFAState(dfa, to); // used existing if possible not incoming + } + if (from == nullptr || t > (int)atn.maxTokenType) { + return to; + } + + { + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + from->edges[t] = to; // connect + } + +#if DEBUG_DFA == 1 + std::string dfaText; + if (parser != nullptr) { + dfaText = dfa.toString(parser->getVocabulary()); + } else { + dfaText = dfa.toString(dfa::Vocabulary()); + } + std::cout << "DFA=\n" << dfaText << std::endl; +#endif + + return to; +} + +dfa::DFAState *ParserATNSimulator::addDFAState(dfa::DFA &dfa, dfa::DFAState *D) { + if (D == ERROR.get()) { + return D; + } + + // Optimizing the configs below should not alter the hash code. Thus we can just do an insert + // which will only succeed if an equivalent DFAState does not already exist. + auto [existing, inserted] = dfa.states.insert(D); + if (!inserted) { + return *existing; + } + + // Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`, since + // we already inserted we need to subtract one. + D->stateNumber = static_cast<int>(dfa.states.size() - 1); + if (!D->configs->isReadonly()) { + D->configs->optimizeConfigs(this); + D->configs->setReadonly(true); + } + +#if DEBUG_DFA == 1 + std::cout << "adding new DFA state: " << D << std::endl; +#endif + + return D; +} + +void ParserATNSimulator::reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAttemptingFullContext decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAttemptingFullContext(parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + } +} + +void ParserATNSimulator::reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval(startIndex, stopIndex); + std::cout << "reportContextSensitivity decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); + } +} + +void ParserATNSimulator::reportAmbiguity(dfa::DFA &dfa, dfa::DFAState * /*D*/, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAmbiguity " << ambigAlts << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +void ParserATNSimulator::setPredictionMode(PredictionMode newMode) { + _mode = newMode; +} + +atn::PredictionMode ParserATNSimulator::getPredictionMode() { + return _mode; +} + +Parser* ParserATNSimulator::getParser() { + return parser; +} + +#ifdef _MSC_VER +#pragma warning (disable:4996) // 'getenv': This function or variable may be unsafe. Consider using _dupenv_s instead. +#endif + +bool ParserATNSimulator::getLrLoopSetting() { + char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"); + if (var == nullptr) + return false; + std::string value(var); + return value == "true" || value == "1"; +} + +#ifdef _MSC_VER +#pragma warning (default:4996) +#endif + +void ParserATNSimulator::InitializeInstanceFields() { + _mode = PredictionMode::LL; + _startIndex = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h new file mode 100644 index 0000000000..28fd059dd2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h @@ -0,0 +1,911 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "PredictionMode.h" +#include "dfa/DFAState.h" +#include "atn/ATNSimulator.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextMergeCache.h" +#include "atn/ParserATNSimulatorOptions.h" +#include "SemanticContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + /** + * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. + * + * <p> + * The basic complexity of the adaptive strategy makes it harder to understand. + * We begin with ATN simulation to build paths in a DFA. Subsequent prediction + * requests go through the DFA first. If they reach a state without an edge for + * the current symbol, the algorithm fails over to the ATN simulation to + * complete the DFA path for the current input (until it finds a conflict state + * or uniquely predicting state).</p> + * + * <p> + * All of that is done without using the outer context because we want to create + * a DFA that is not dependent upon the rule invocation stack when we do a + * prediction. One DFA works in all contexts. We avoid using context not + * necessarily because it's slower, although it can be, but because of the DFA + * caching problem. The closure routine only considers the rule invocation stack + * created during prediction beginning in the decision rule. For example, if + * prediction occurs without invoking another rule's ATN, there are no context + * stacks in the configurations. When lack of context leads to a conflict, we + * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing + * strategy (versus full LL(*)).</p> + * + * <p> + * When SLL yields a configuration set with conflict, we rewind the input and + * retry the ATN simulation, this time using full outer context without adding + * to the DFA. Configuration context stacks will be the full invocation stacks + * from the start rule. If we get a conflict using full context, then we can + * definitively say we have a true ambiguity for that input sequence. If we + * don't get a conflict, it implies that the decision is sensitive to the outer + * context. (It is not context-sensitive in the sense of context-sensitive + * grammars.)</p> + * + * <p> + * The next time we reach this DFA state with an SLL conflict, through DFA + * simulation, we will again retry the ATN simulation using full context mode. + * This is slow because we can't save the results and have to "interpret" the + * ATN each time we get that input.</p> + * + * <p> + * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> + * + * <p> + * We could cache results from full context to predicted alternative easily and + * that saves a lot of time but doesn't work in presence of predicates. The set + * of visible predicates from the ATN start state changes depending on the + * context, because closure can fall off the end of a rule. I tried to cache + * tuples (stack context, semantic context, predicted alt) but it was slower + * than interpreting and much more complicated. Also required a huge amount of + * memory. The goal is not to create the world's fastest parser anyway. I'd like + * to keep this algorithm simple. By launching multiple threads, we can improve + * the speed of parsing across a large number of files.</p> + * + * <p> + * There is no strict ordering between the amount of input used by SLL vs LL, + * which makes it really hard to build a cache for full context. Let's say that + * we have input A B C that leads to an SLL conflict with full context X. That + * implies that using X we might only use A B but we could also use A B C D to + * resolve conflict. Input A B C D could predict alternative 1 in one position + * in the input and A B C E could predict alternative 2 in another position in + * input. The conflicting SLL configurations could still be non-unique in the + * full context prediction, which would lead us to requiring more input than the + * original A B C. To make a prediction cache work, we have to track the exact + * input used during the previous prediction. That amounts to a cache that maps + * X to a specific DFA for that context.</p> + * + * <p> + * Something should be done for left-recursive expression predictions. They are + * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry + * with full LL thing Sam does.</p> + * + * <p> + * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> + * + * <p> + * We avoid doing full context retry when the outer context is empty, we did not + * dip into the outer context by falling off the end of the decision state rule, + * or when we force SLL mode.</p> + * + * <p> + * As an example of the not dip into outer context case, consider as super + * constructor calls versus function calls. One grammar might look like + * this:</p> + * + * <pre> + * ctorBody + * : '{' superCall? stat* '}' + * ; + * </pre> + * + * <p> + * Or, you might see something like</p> + * + * <pre> + * stat + * : superCall ';' + * | expression ';' + * | ... + * ; + * </pre> + * + * <p> + * In both cases I believe that no closure operations will dip into the outer + * context. In the first case ctorBody in the worst case will stop at the '}'. + * In the 2nd case it should stop at the ';'. Both cases should stay within the + * entry rule and not dip into the outer context.</p> + * + * <p> + * <strong>PREDICATES</strong></p> + * + * <p> + * Predicates are always evaluated if present in either SLL or LL both. SLL and + * LL simulation deals with predicates differently. SLL collects predicates as + * it performs closure operations like ANTLR v3 did. It delays predicate + * evaluation until it reaches and accept state. This allows us to cache the SLL + * ATN simulation whereas, if we had evaluated predicates on-the-fly during + * closure, the DFA state configuration sets would be different and we couldn't + * build up a suitable DFA.</p> + * + * <p> + * When building a DFA accept state during ATN simulation, we evaluate any + * predicates and return the sole semantically valid alternative. If there is + * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, + * we throw an exception. Alternatives without predicates act like they have + * true predicates. The simple way to think about it is to strip away all + * alternatives with false predicates and choose the minimum alternative that + * remains.</p> + * + * <p> + * When we start in the DFA and reach an accept state that's predicated, we test + * those and return the minimum semantically viable alternative. If no + * alternatives are viable, we throw an exception.</p> + * + * <p> + * During full LL ATN simulation, closure always evaluates predicates and + * on-the-fly. This is crucial to reducing the configuration set size during + * closure. It hits a landmine when parsing with the Java grammar, for example, + * without this on-the-fly evaluation.</p> + * + * <p> + * <strong>SHARING DFA</strong></p> + * + * <p> + * All instances of the same parser share the same decision DFAs through a + * static field. Each instance gets its own ATN simulator but they share the + * same {@link #decisionToDFA} field. They also share a + * {@link PredictionContextCache} object that makes sure that all + * {@link PredictionContext} objects are shared among the DFA states. This makes + * a big size difference.</p> + * + * <p> + * <strong>THREAD SAFETY</strong></p> + * + * <p> + * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when + * it adds a new DFA object to that array. {@link #addDFAEdge} + * locks on the DFA for the current decision when setting the + * {@link DFAState#edges} field. {@link #addDFAState} locks on + * the DFA for the current decision when looking up a DFA state to see if it + * already exists. We must make sure that all requests to add DFA states that + * are equivalent result in the same shared DFA object. This is because lots of + * threads will be trying to update the DFA at once. The + * {@link #addDFAState} method also locks inside the DFA lock + * but this time on the shared context cache when it rebuilds the + * configurations' {@link PredictionContext} objects using cached + * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is + * safe as long as we can guarantee that all threads referencing + * {@code s.edge[t]} get the same physical target {@link DFAState}, or + * {@code null}. Once into the DFA, the DFA simulation does not reference the + * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new + * targets. The DFA simulator will either find {@link DFAState#edges} to be + * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or + * {@code dfa.edges[t]} to be non-null. The + * {@link #addDFAEdge} method could be racing to set the field + * but in either case the DFA simulator works; if {@code null}, and requests ATN + * simulation. It could also race trying to get {@code dfa.edges[t]}, but either + * way it will work because it's not doing a test and set operation.</p> + * + * <p> + * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage + * Parsing)</strong></p> + * + * <p> + * Sam pointed out that if SLL does not give a syntax error, then there is no + * point in doing full LL, which is slower. We only have to try LL if we get a + * syntax error. For maximum speed, Sam starts the parser set to pure SLL + * mode with the {@link BailErrorStrategy}:</p> + * + * <pre> + * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; + * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); + * </pre> + * + * <p> + * If it does not get a syntax error, then we're done. If it does get a syntax + * error, we need to retry with the combined SLL/LL strategy.</p> + * + * <p> + * The reason this works is as follows. If there are no SLL conflicts, then the + * grammar is SLL (at least for that input set). If there is an SLL conflict, + * the full LL analysis must yield a set of viable alternatives which is a + * subset of the alternatives reported by SLL. If the LL set is a singleton, + * then the grammar is LL but not SLL. If the LL set is the same size as the SLL + * set, the decision is SLL. If the LL set has size > 1, then that decision + * is truly ambiguous on the current input. If the LL set is smaller, then the + * SLL conflict resolution might choose an alternative that the full LL would + * rule out as a possibility based upon better context information. If that's + * the case, then the SLL parse will definitely get an error because the full LL + * analysis says it's not viable. If SLL conflict resolution chooses an + * alternative within the LL set, them both SLL and LL would choose the same + * alternative because they both choose the minimum of multiple conflicting + * alternatives.</p> + * + * <p> + * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and + * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL + * parsing will get an error because SLL will pursue alternative 1. If + * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will + * choose the same alternative because alternative one is the minimum of either + * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax + * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> + * + * <p> + * Of course, if the input is invalid, then we will get an error for sure in + * both SLL and LL parsing. Erroneous input will therefore require 2 passes over + * the input.</p> + */ + class ANTLR4CPP_PUBLIC ParserATNSimulator : public ATNSimulator { + public: + /// Testing only! + ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache, + const ParserATNSimulatorOptions &options); + + virtual void reset() override; + virtual void clearDFA() override; + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext); + + static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT; + + std::vector<dfa::DFA> &decisionToDFA; + + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const; + virtual std::string getRuleName(size_t index); + + virtual Ref<ATNConfig> precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx); + + void setPredictionMode(PredictionMode newMode); + PredictionMode getPredictionMode(); + + Parser* getParser(); + + virtual std::string getTokenName(size_t t); + + virtual std::string getLookaheadName(TokenStream *input); + + /// <summary> + /// Used for debugging in adaptivePredict around execATN but I cut + /// it out for clarity now that alg. works well. We can leave this + /// "dead" code for a bit. + /// </summary> + virtual void dumpDeadEndConfigs(NoViableAltException &nvae); + + protected: + Parser *const parser; + + /// <summary> + /// Each prediction operation uses a cache for merge of prediction contexts. + /// Don't keep around as it wastes huge amounts of memory. The merge cache + /// isn't synchronized but we're ok since two threads shouldn't reuse same + /// parser/atnsim object because it can only handle one input at a time. + /// This maps graphs a and b to merged result c. (a,b)->c. We can avoid + /// the merge if we ever see a and b again. Note that (b,a)->c should + /// also be examined during cache lookup. + /// </summary> + PredictionContextMergeCache mergeCache; + size_t _mergeCacheCounter = 0; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream *_input; + size_t _startIndex; + ParserRuleContext *_outerContext; + dfa::DFA *_dfa; // Reference into the decisionToDFA vector. + + /// <summary> + /// Performs ATN simulation to compute a predicted alternative based + /// upon the remaining input, but also updates the DFA cache to avoid + /// having to traverse the ATN again for the same input sequence. + /// + /// There are some key conditions we're looking for after computing a new + /// set of ATN configs (proposed DFA state): + /// if the set is empty, there is no viable alternative for current symbol + /// does the state uniquely predict an alternative? + /// does the state have a conflict that would prevent us from + /// putting it on the work list? + /// + /// We also have some key operations to do: + /// add an edge from previous DFA state to potentially new DFA state, D, + /// upon current symbol but only if adding to work list, which means in all + /// cases except no viable alternative (and possibly non-greedy decisions?) + /// collecting predicates and adding semantic context to DFA accept states + /// adding rule context to context-sensitive DFA accept states + /// consuming an input symbol + /// reporting a conflict + /// reporting an ambiguity + /// reporting a context sensitivity + /// reporting insufficient predicates + /// + /// cover these cases: + /// dead end + /// single alt + /// single alt + preds + /// conflict + /// conflict + preds + /// </summary> + virtual size_t execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t); + + virtual void predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState); + + // comes back with reach.uniqueAlt set to a valid alt + virtual size_t execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext); // how far we got before failing over + + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx); + + /// <summary> + /// Return a configuration set containing only the configurations from + /// {@code configs} which are in a <seealso cref="RuleStopState"/>. If all + /// configurations in {@code configs} are already in a rule stop state, this + /// method simply returns {@code configs}. + /// <p/> + /// When {@code lookToEndOfRule} is true, this method uses + /// <seealso cref="ATN#nextTokens"/> for each configuration in {@code configs} which is + /// not already in a rule stop state to see if a rule stop state is reachable + /// from the configuration via epsilon-only transitions. + /// </summary> + /// <param name="configs"> the configuration set to update </param> + /// <param name="lookToEndOfRule"> when true, this method checks for rule stop states + /// reachable by epsilon-only transitions from each configuration in + /// {@code configs}. + /// </param> + /// <returns> {@code configs} if all configurations in {@code configs} are in a + /// rule stop state, otherwise return a new configuration set containing only + /// the configurations from {@code configs} which are in a rule stop state </returns> + virtual ATNConfigSet* removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, bool lookToEndOfRule); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx); + + /* parrt internal source braindump that doesn't mess up + * external API spec. + + applyPrecedenceFilter is an optimization to avoid highly + nonlinear prediction of expressions and other left recursive + rules. The precedence predicates such as {3>=prec}? Are highly + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /** + * This method transforms the start state computed by + * {@link #computeStartState} to the special start state used by a + * precedence DFA for a particular precedence value. The transformation + * process applies the following changes to the start state's configuration + * set. + * + * <ol> + * <li>Evaluate the precedence predicates for each configuration using + * {@link SemanticContext#evalPrecedence}.</li> + * <li>When {@link ATNConfig#isPrecedenceFilterSuppressed} is {@code false}, + * remove all configurations which predict an alternative greater than 1, + * for which another configuration that predicts alternative 1 is in the + * same ATN state with the same prediction context. This transformation is + * valid for the following reasons: + * <ul> + * <li>The closure block cannot contain any epsilon transitions which bypass + * the body of the closure, so all states reachable via alternative 1 are + * part of the precedence alternatives of the transformed left-recursive + * rule.</li> + * <li>The "primary" portion of a left recursive rule cannot contain an + * epsilon transition, so the only way an alternative other than 1 can exist + * in a state that is also reachable via alternative 1 is by nesting calls + * to the left-recursive rule, with the outer calls not being at the + * preferred precedence level. The + * {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + * configurations which do not meet this condition, and therefore are not + * eligible for elimination during the filtering process.</li> + * </ul> + * </li> + * </ol> + * + * <p> + * The prediction context must be considered by this filter to address + * situations like the following. + * </p> + * <code> + * <pre> + * grammar TA; + * prog: statement* EOF; + * statement: letterA | statement letterA 'b' ; + * letterA: 'a'; + * </pre> + * </code> + * <p> + * If the above grammar, the ATN state immediately before the token + * reference {@code 'a'} in {@code letterA} is reachable from the left edge + * of both the primary and closure blocks of the left-recursive rule + * {@code statement}. The prediction context associated with each of these + * configurations distinguishes between them, and prevents the alternative + * which stepped out to {@code prog} (and then back in to {@code statement} + * from being eliminated by the filter. + * </p> + * + * @param configs The configuration set computed by + * {@link #computeStartState} as the start state for the DFA. + * @return The transformed configuration set representing the start state + * for a precedence DFA at a particular precedence level (determined by + * calling {@link Parser#getPrecedence}). + */ + std::unique_ptr<ATNConfigSet> applyPrecedenceFilter(ATNConfigSet *configs); + + virtual ATNState *getReachableTarget(const Transition *trans, size_t ttype); + + virtual std::vector<Ref<const SemanticContext>> getPredsForAmbigAlts(const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts); + + std::vector<dfa::DFAState::PredPrediction> getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + const std::vector<Ref<const SemanticContext>> &altToPred); + + /** + * This method is used to improve the localization of error messages by + * choosing an alternative rather than throwing a + * {@link NoViableAltException} in particular prediction scenarios where the + * {@link #ERROR} state was reached during ATN simulation. + * + * <p> + * The default implementation of this method uses the following + * algorithm to identify an ATN configuration which successfully parsed the + * decision entry rule. Choosing such an alternative ensures that the + * {@link ParserRuleContext} returned by the calling rule will be complete + * and valid, and the syntax error will be reported later at a more + * localized location.</p> + * + * <ul> + * <li>If a syntactically valid path or paths reach the end of the decision rule and + * they are semantically valid if predicated, return the min associated alt.</li> + * <li>Else, if a semantically invalid but syntactically valid path exist + * or paths exist, return the minimum associated alt. + * </li> + * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> + * </ul> + * + * <p> + * In some scenarios, the algorithm described above could predict an + * alternative which will result in a {@link FailedPredicateException} in + * the parser. Specifically, this could occur if the <em>only</em> configuration + * capable of successfully parsing to the end of the decision rule is + * blocked by a semantic predicate. By choosing this alternative within + * {@link #adaptivePredict} instead of throwing a + * {@link NoViableAltException}, the resulting + * {@link FailedPredicateException} in the parser will identify the specific + * predicate which is preventing the parser from successfully parsing the + * decision rule, which helps developers identify and correct logic errors + * in semantic predicates. + * </p> + * + * @param configs The ATN configurations which were valid immediately before + * the {@link #ERROR} state was reached + * @param outerContext The is the \gamma_0 initial parser context from the paper + * or the parser stack at the instant before prediction commences. + * + * @return The value to return from {@link #adaptivePredict}, or + * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + * identified and {@link #adaptivePredict} should report an error instead. + */ + size_t getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + virtual size_t getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs); + + /** Walk the list of configurations and split them according to + * those that have preds evaluating to true/false. If no pred, assume + * true pred and include in succeeded set. Returns Pair of sets. + * + * Create a new set so as not to alter the incoming parameter. + * + * Assumption: the input stream has been restored to the starting point + * prediction, which is where predicates need to evaluate. + */ + std::pair<ATNConfigSet *, ATNConfigSet *> splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + /// <summary> + /// Look through a list of predicate/alt pairs, returning alts for the + /// pairs that win. A {@code NONE} predicate indicates an alt containing an + /// unpredicated config which behaves as "always true." If !complete + /// then we stop at the first predicate that evaluates to true. This + /// includes pairs with null predicates. + /// </summary> + antlrcpp::BitSet evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions, + ParserRuleContext *outerContext, bool complete); + + /** + * Evaluate a semantic context within a specific parser context. + * + * <p> + * This method might not be called for every semantic context evaluated + * during the prediction process. In particular, we currently do not + * evaluate the following but it may change in the future:</p> + * + * <ul> + * <li>Precedence predicates (represented by + * {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + * through this method.</li> + * <li>Operator predicates (represented by {@link SemanticContext.AND} and + * {@link SemanticContext.OR}) are evaluated as a single semantic + * context, rather than evaluating the operands individually. + * Implementations which require evaluation results from individual + * predicates should override this method to explicitly handle evaluation of + * the operands within operator predicates.</li> + * </ul> + * + * @param pred The semantic context to evaluate + * @param parserCallStack The parser context in which to evaluate the + * semantic context + * @param alt The alternative which is guarded by {@code pred} + * @param fullCtx {@code true} if the evaluation is occurring during LL + * prediction; otherwise, {@code false} if the evaluation is occurring + * during SLL prediction + * + * @since 4.3 + */ + virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx); + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + virtual void closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon); + + virtual void closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + /// Do the actual work of walking epsilon edges. + virtual void closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + virtual Ref<ATNConfig> getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon); + virtual Ref<ATNConfig> actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t); + + virtual Ref<ATNConfig> predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, bool collectPredicates, + bool inContext, bool fullCtx); + + virtual Ref<ATNConfig> ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t); + + /** + * Gets a {@link BitSet} containing the alternatives in {@code configs} + * which are part of one or more conflicting alternative subsets. + * + * @param configs The {@link ATNConfigSet} to analyze. + * @return The alternatives in {@code configs} which are part of one or more + * conflicting alternative subsets. If {@code configs} does not contain any + * conflicting subsets, this method returns an empty {@link BitSet}. + */ + virtual antlrcpp::BitSet getConflictingAlts(ATNConfigSet *configs); + + /// <summary> + /// Sam pointed out a problem with the previous definition, v3, of + /// ambiguous states. If we have another state associated with conflicting + /// alternatives, we should keep going. For example, the following grammar + /// + /// s : (ID | ID ID?) ';' ; + /// + /// When the ATN simulation reaches the state before ';', it has a DFA + /// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + /// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + /// because alternative to has another way to continue, via [6|2|[]]. + /// The key is that we have a single state that has config's only associated + /// with a single alternative, 2, and crucially the state transitions + /// among the configurations are all non-epsilon transitions. That means + /// we don't consider any conflicts that include alternative 2. So, we + /// ignore the conflict between alts 1 and 2. We ignore a set of + /// conflicting alts when there is an intersection with an alternative + /// associated with a single alt state in the state->config-list map. + /// + /// It's also the case that we might have two conflicting configurations but + /// also a 3rd nonconflicting configuration for a different alternative: + /// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + /// + /// a : A | A | A B ; + /// + /// After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not + /// stop working on this state. In the previous example, we're concerned + /// with states associated with the conflicting alternatives. Here alt + /// 3 is not associated with the conflicting configs, but since we can continue + /// looking for input reasonably, I don't declare the state done. We + /// ignore a set of conflicting alts when we have an alternative + /// that we still need to pursue. + /// </summary> + + virtual antlrcpp::BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet *configs); + + virtual NoViableAltException noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs); + + static size_t getUniqueAlt(ATNConfigSet *configs); + + /// <summary> + /// Add an edge to the DFA, if possible. This method calls + /// <seealso cref="#addDFAState"/> to ensure the {@code to} state is present in the + /// DFA. If {@code from} is {@code null}, or if {@code t} is outside the + /// range of edges that can be represented in the DFA tables, this method + /// returns without adding the edge to the DFA. + /// <p/> + /// If {@code to} is {@code null}, this method returns {@code null}. + /// Otherwise, this method returns the <seealso cref="DFAState"/> returned by calling + /// <seealso cref="#addDFAState"/> for the {@code to} state. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="from"> The source state for the edge </param> + /// <param name="t"> The input symbol </param> + /// <param name="to"> The target state for the edge + /// </param> + /// <returns> If {@code to} is {@code null}, this method returns {@code null}; + /// otherwise this method returns the result of calling <seealso cref="#addDFAState"/> + /// on {@code to} </returns> + virtual dfa::DFAState *addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to); + + /// <summary> + /// Add state {@code D} to the DFA if it is not already present, and return + /// the actual instance stored in the DFA. If a state equivalent to {@code D} + /// is already in the DFA, the existing state is returned. Otherwise this + /// method returns {@code D} after adding it to the DFA. + /// <p/> + /// If {@code D} is <seealso cref="#ERROR"/>, this method returns <seealso cref="#ERROR"/> and + /// does not change the DFA. + /// </summary> + /// <param name="dfa"> The dfa </param> + /// <param name="D"> The DFA state to add </param> + /// <returns> The state stored in the DFA. This will be either the existing + /// state if {@code D} is already in the DFA, or {@code D} itself if the + /// state was not already present. </returns> + virtual dfa::DFAState *addDFAState(dfa::DFA &dfa, dfa::DFAState *D); + + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex); + + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex); + + /// If context sensitive parsing, we know it's ambiguity not conflict. + virtual void reportAmbiguity(dfa::DFA &dfa, + dfa::DFAState *D, // the DFA state from execATN() that had SLL conflicts + size_t startIndex, size_t stopIndex, + bool exact, + const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs); // configs that LL not SLL considered conflicting + + private: + // SLL, LL, or LL + exact ambig detection? + PredictionMode _mode; + + static bool getLrLoopSetting(); + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h new file mode 100644 index 0000000000..ea31226d25 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h @@ -0,0 +1,50 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "atn/PredictionContextMergeCacheOptions.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ParserATNSimulatorOptions final { + public: + ParserATNSimulatorOptions& setPredictionContextMergeCacheOptions( + PredictionContextMergeCacheOptions predictionContextMergeCacheOptions) { + _predictionContextMergeCacheOptions = std::move(predictionContextMergeCacheOptions); + return *this; + } + + const PredictionContextMergeCacheOptions& getPredictionContextMergeCacheOptions() const { + return _predictionContextMergeCacheOptions; + } + + private: + PredictionContextMergeCacheOptions _predictionContextMergeCacheOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h new file mode 100644 index 0000000000..b6103dc4d0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// Start of {@code (A|B|...)+} loop. Technically a decision state, but + /// we don't use for code generation; somebody might need it, so I'm defining + /// it for completeness. In reality, the <seealso cref="PlusLoopbackState"/> node is the + /// real decision-making note for {@code A+}. + class ANTLR4CPP_PUBLIC PlusBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + PlusLoopbackState *loopBackState = nullptr; + + PlusBlockStartState() : BlockStartState(ATNStateType::PLUS_BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h new file mode 100644 index 0000000000..07f25aa0c9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: + /// one to the loop back to start of the block and one to exit. + class ANTLR4CPP_PUBLIC PlusLoopbackState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_LOOP_BACK; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + PlusLoopbackState() : DecisionState(ATNStateType::PLUS_LOOP_BACK) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp new file mode 100644 index 0000000000..b8685e9516 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PrecedencePredicateTransition.h" + +using namespace antlr4::atn; + +PrecedencePredicateTransition::PrecedencePredicateTransition(ATNState *target, int precedence) + : Transition(TransitionType::PRECEDENCE, target), _predicate(std::make_shared<SemanticContext::PrecedencePredicate>(precedence)) {} + +bool PrecedencePredicateTransition::isEpsilon() const { + return true; +} + +bool PrecedencePredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string PrecedencePredicateTransition::toString() const { + return "PRECEDENCE " + Transition::toString() + " { precedence: " + std::to_string(getPrecedence()) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h new file mode 100644 index 0000000000..3db79a9b73 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PrecedencePredicateTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PRECEDENCE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + PrecedencePredicateTransition(ATNState *target, int precedence); + + int getPrecedence() const { return _predicate->precedence; } + + bool isEpsilon() const override; + bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + std::string toString() const override; + + const Ref<const SemanticContext::PrecedencePredicate>& getPredicate() const { return _predicate; } + + private: + const std::shared_ptr<const SemanticContext::PrecedencePredicate> _predicate; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp new file mode 100644 index 0000000000..73ee2a2b97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "SemanticContext.h" + +#include "atn/PredicateEvalInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +PredicateEvalInfo::PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx) + : DecisionEventInfo(decision, nullptr, input, startIndex, stopIndex, fullCtx), + semctx(std::move(semctx)), predictedAlt(predictedAlt), evalResult(evalResult) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h new file mode 100644 index 0000000000..f343f541cb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for semantic predicate + /// evaluations which occur during prediction. + /// </summary> + /// <seealso cref= ParserATNSimulator#evalSemanticContext + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC PredicateEvalInfo : public DecisionEventInfo { + public: + /// The semantic context which was evaluated. + const Ref<const SemanticContext> semctx; + + /// <summary> + /// The alternative number for the decision which is guarded by the semantic + /// context <seealso cref="#semctx"/>. Note that other ATN + /// configurations may predict the same alternative which are guarded by + /// other semantic contexts and/or <seealso cref="SemanticContext#NONE"/>. + /// </summary> + const size_t predictedAlt; + + /// The result of evaluating the semantic context <seealso cref="#semctx"/>. + const bool evalResult; + + /// <summary> + /// Constructs a new instance of the <seealso cref="PredicateEvalInfo"/> class with the + /// specified detailed predicate evaluation information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the predicate evaluation was + /// triggered. Note that the input stream may be reset to other positions for + /// the actual evaluation of individual predicates. </param> + /// <param name="semctx"> The semantic context which was evaluated </param> + /// <param name="evalResult"> The results of evaluating the semantic context </param> + /// <param name="predictedAlt"> The alternative number for the decision which is + /// guarded by the semantic context {@code semctx}. See <seealso cref="#predictedAlt"/> + /// for more information. </param> + /// <param name="fullCtx"> {@code true} if the semantic context was + /// evaluated during LL prediction; otherwise, {@code false} if the semantic + /// context was evaluated during SLL prediction + /// </param> + /// <seealso cref= ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) </seealso> + /// <seealso cref= SemanticContext#eval(Recognizer, RuleContext) </seealso> + PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp new file mode 100644 index 0000000000..d76dbd203a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateTransition.h" + +using namespace antlr4::atn; + +PredicateTransition::PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent) + : Transition(TransitionType::PREDICATE, target), _predicate(std::make_shared<SemanticContext::Predicate>(ruleIndex, predIndex, isCtxDependent)) {} + +bool PredicateTransition::isEpsilon() const { + return true; +} + +bool PredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string PredicateTransition::toString() const { + return "PREDICATE " + Transition::toString() + " { ruleIndex: " + std::to_string(getRuleIndex()) + + ", predIndex: " + std::to_string(getPredIndex()) + ", isCtxDependent: " + std::to_string(isCtxDependent()) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h new file mode 100644 index 0000000000..e889b1c198 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// TODO: this is old comment: + /// A tree of semantic predicates from the grammar AST if label==SEMPRED. + /// In the ATN, labels will always be exactly one predicate, but the DFA + /// may have to combine a bunch of them as it collects predicates from + /// multiple ATN configurations into a single DFA state. + class ANTLR4CPP_PUBLIC PredicateTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PREDICATE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + size_t getRuleIndex() const { + return _predicate->ruleIndex; + } + + size_t getPredIndex() const { + return _predicate->predIndex; + } + + bool isCtxDependent() const { + return _predicate->isCtxDependent; + } + + bool isEpsilon() const override; + bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + std::string toString() const override; + + const Ref<const SemanticContext::Predicate>& getPredicate() const { return _predicate; } + + private: + const std::shared_ptr<const SemanticContext::Predicate> _predicate; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp new file mode 100644 index 0000000000..704408f04d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp @@ -0,0 +1,579 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/SingletonPredictionContext.h" +#include "misc/MurmurHash.h" +#include "atn/ArrayPredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "atn/PredictionContextMergeCache.h" +#include "RuleContext.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "support/Arrays.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/PredictionContext.h" + +using namespace antlr4; +using namespace antlr4::misc; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + void combineCommonParents(std::vector<Ref<const PredictionContext>> &parents) { + std::unordered_set<Ref<const PredictionContext>> uniqueParents; + uniqueParents.reserve(parents.size()); + for (const auto &parent : parents) { + uniqueParents.insert(parent); + } + for (auto &parent : parents) { + parent = *uniqueParents.find(parent); + } + } + + Ref<const PredictionContext> getCachedContextImpl(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache, + std::unordered_map<Ref<const PredictionContext>, + Ref<const PredictionContext>> &visited) { + if (context->isEmpty()) { + return context; + } + + { + auto iterator = visited.find(context); + if (iterator != visited.end()) { + return iterator->second; // Not necessarly the same as context. + } + } + + auto cached = contextCache.get(context); + if (cached) { + visited[context] = cached; + return cached; + } + + bool changed = false; + + std::vector<Ref<const PredictionContext>> parents(context->size()); + for (size_t i = 0; i < parents.size(); i++) { + auto parent = getCachedContextImpl(context->getParent(i), contextCache, visited); + if (changed || parent != context->getParent(i)) { + if (!changed) { + parents.clear(); + for (size_t j = 0; j < context->size(); j++) { + parents.push_back(context->getParent(j)); + } + + changed = true; + } + + parents[i] = std::move(parent); + } + } + + if (!changed) { + visited[context] = context; + contextCache.put(context); + return context; + } + + Ref<const PredictionContext> updated; + if (parents.empty()) { + updated = PredictionContext::EMPTY; + } else if (parents.size() == 1) { + updated = SingletonPredictionContext::create(std::move(parents[0]), context->getReturnState(0)); + contextCache.put(updated); + } else { + updated = std::make_shared<ArrayPredictionContext>(std::move(parents), downCast<const ArrayPredictionContext*>(context.get())->returnStates); + contextCache.put(updated); + } + + visited[updated] = updated; + visited[context] = updated; + + return updated; + } + + void getAllContextNodesImpl(const Ref<const PredictionContext> &context, + std::vector<Ref<const PredictionContext>> &nodes, + std::unordered_set<const PredictionContext*> &visited) { + + if (visited.find(context.get()) != visited.end()) { + return; // Already done. + } + + visited.insert(context.get()); + nodes.push_back(context); + + for (size_t i = 0; i < context->size(); i++) { + getAllContextNodesImpl(context->getParent(i), nodes, visited); + } + } + + size_t insertOrAssignNodeId(std::unordered_map<const PredictionContext*, size_t> &nodeIds, size_t &nodeId, const PredictionContext *node) { + auto existing = nodeIds.find(node); + if (existing != nodeIds.end()) { + return existing->second; + } + return nodeIds.insert({node, nodeId++}).first->second; + } + +} + +const Ref<const PredictionContext> PredictionContext::EMPTY = std::make_shared<SingletonPredictionContext>(nullptr, PredictionContext::EMPTY_RETURN_STATE); + +//----------------- PredictionContext ---------------------------------------------------------------------------------- + +PredictionContext::PredictionContext(PredictionContextType contextType) : _contextType(contextType), _hashCode(0) {} + +PredictionContext::PredictionContext(PredictionContext&& other) : _contextType(other._contextType), _hashCode(other._hashCode.exchange(0, std::memory_order_relaxed)) {} + +Ref<const PredictionContext> PredictionContext::fromRuleContext(const ATN &atn, RuleContext *outerContext) { + if (outerContext == nullptr) { + return PredictionContext::EMPTY; + } + + // if we are in RuleContext of start rule, s, then PredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if (outerContext->parent == nullptr || outerContext == &ParserRuleContext::EMPTY) { + return PredictionContext::EMPTY; + } + + // If we have a parent, convert it to a PredictionContext graph + auto parent = PredictionContext::fromRuleContext(atn, RuleContext::is(outerContext->parent) ? downCast<RuleContext*>(outerContext->parent) : nullptr); + const auto *transition = downCast<const RuleTransition*>(atn.states[outerContext->invokingState]->transitions[0].get()); + return SingletonPredictionContext::create(std::move(parent), transition->followState->stateNumber); +} + +bool PredictionContext::hasEmptyPath() const { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one + return getReturnState(size() - 1) == EMPTY_RETURN_STATE; +} + +size_t PredictionContext::hashCode() const { + auto hash = cachedHashCode(); + if (hash == 0) { + hash = hashCodeImpl(); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} + +Ref<const PredictionContext> PredictionContext::merge(Ref<const PredictionContext> a, Ref<const PredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + assert(a && b); + + // share same graph if both same + if (a == b || *a == *b) { + return a; + } + + const auto aType = a->getContextType(); + const auto bType = b->getContextType(); + + if (aType == PredictionContextType::SINGLETON && bType == PredictionContextType::SINGLETON) { + return mergeSingletons(std::static_pointer_cast<const SingletonPredictionContext>(std::move(a)), + std::static_pointer_cast<const SingletonPredictionContext>(std::move(b)), rootIsWildcard, mergeCache); + } + + // At least one of a or b is array. + // If one is $ and rootIsWildcard, return $ as * wildcard. + if (rootIsWildcard) { + if (a == PredictionContext::EMPTY) { + return a; + } + if (b == PredictionContext::EMPTY) { + return b; + } + } + + // convert singleton so both are arrays to normalize + Ref<const ArrayPredictionContext> left; + if (aType == PredictionContextType::SINGLETON) { + left = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*a)); + } else { + left = std::static_pointer_cast<const ArrayPredictionContext>(std::move(a)); + } + Ref<const ArrayPredictionContext> right; + if (bType == PredictionContextType::SINGLETON) { + right = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*b)); + } else { + right = std::static_pointer_cast<const ArrayPredictionContext>(std::move(b)); + } + return mergeArrays(std::move(left), std::move(right), rootIsWildcard, mergeCache); +} + +Ref<const PredictionContext> PredictionContext::mergeSingletons(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + auto rootMerge = mergeRoot(a, b, rootIsWildcard); + if (rootMerge) { + if (mergeCache) { + return mergeCache->put(a, b, std::move(rootMerge)); + } + return rootMerge; + } + + const auto& parentA = a->parent; + const auto& parentB = b->parent; + if (a->returnState == b->returnState) { // a == b + auto parent = merge(parentA, parentB, rootIsWildcard, mergeCache); + + // If parent is same as existing a or b parent or reduced to a parent, return it. + if (parent == parentA) { // ax + bx = ax, if a=b + return a; + } + if (parent == parentB) { // ax + bx = bx, if a=b + return b; + } + + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // new joined parent so create new singleton pointing to it, a' + auto c = SingletonPredictionContext::create(std::move(parent), a->returnState); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + Ref<const PredictionContext> singleParent; + if (a == b || (*parentA == *parentB)) { // ax + bx = [a,b]x + singleParent = parentA; + } + if (singleParent) { // parents are same, sort payloads and use same parent + std::vector<size_t> payloads = { a->returnState, b->returnState }; + if (a->returnState > b->returnState) { + payloads[0] = b->returnState; + payloads[1] = a->returnState; + } + std::vector<Ref<const PredictionContext>> parents = { singleParent, singleParent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + + // parents differ and can't merge them. Just pack together + // into array; can't merge. + // ax + by = [ax,by] + if (a->returnState > b->returnState) { // sort by payload + std::vector<size_t> payloads = { b->returnState, a->returnState }; + std::vector<Ref<const PredictionContext>> parents = { b->parent, a->parent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + std::vector<size_t> payloads = {a->returnState, b->returnState}; + std::vector<Ref<const PredictionContext>> parents = { a->parent, b->parent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; +} + +Ref<const PredictionContext> PredictionContext::mergeRoot(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b, + bool rootIsWildcard) { + if (rootIsWildcard) { + if (a == EMPTY) { // * + b = * + return EMPTY; + } + if (b == EMPTY) { // a + * = * + return EMPTY; + } + } else { + if (a == EMPTY && b == EMPTY) { // $ + $ = $ + return EMPTY; + } + if (a == EMPTY) { // $ + x = [$,x] + std::vector<size_t> payloads = { b->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<const PredictionContext>> parents = { b->parent, nullptr }; + return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + } + if (b == EMPTY) { // x + $ = [$,x] ($ is always first if present) + std::vector<size_t> payloads = { a->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<const PredictionContext>> parents = { a->parent, nullptr }; + return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + } + } + return nullptr; +} + +Ref<const PredictionContext> PredictionContext::mergeArrays(Ref<const ArrayPredictionContext> a, Ref<const ArrayPredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + // merge sorted payloads a + b => M + size_t i = 0; // walks a + size_t j = 0; // walks b + size_t k = 0; // walks target M array + + std::vector<size_t> mergedReturnStates(a->returnStates.size() + b->returnStates.size()); + std::vector<Ref<const PredictionContext>> mergedParents(a->returnStates.size() + b->returnStates.size()); + + // walk and merge to yield mergedParents, mergedReturnStates + while (i < a->returnStates.size() && j < b->returnStates.size()) { + const auto& parentA = a->parents[i]; + const auto& parentB = b->parents[j]; + if (a->returnStates[i] == b->returnStates[j]) { + // same payload (stack tops are equal), must yield merged singleton + size_t payload = a->returnStates[i]; + // $+$ = $ + bool both$ = payload == EMPTY_RETURN_STATE && !parentA && !parentB; + bool ax_ax = (parentA && parentB) && *parentA == *parentB; // ax+ax -> ax + if (both$ || ax_ax) { + mergedParents[k] = parentA; // choose left + mergedReturnStates[k] = payload; + } else { // ax+ay -> a'[x,y] + mergedParents[k] = merge(parentA, parentB, rootIsWildcard, mergeCache); + mergedReturnStates[k] = payload; + } + i++; // hop over left one as usual + j++; // but also skip one in right side since we merge + } else if (a->returnStates[i] < b->returnStates[j]) { // copy a[i] to M + mergedParents[k] = parentA; + mergedReturnStates[k] = a->returnStates[i]; + i++; + } else { // b > a, copy b[j] to M + mergedParents[k] = parentB; + mergedReturnStates[k] = b->returnStates[j]; + j++; + } + k++; + } + + // copy over any payloads remaining in either array + if (i < a->returnStates.size()) { + for (auto p = i; p < a->returnStates.size(); p++) { + mergedParents[k] = a->parents[p]; + mergedReturnStates[k] = a->returnStates[p]; + k++; + } + } else { + for (auto p = j; p < b->returnStates.size(); p++) { + mergedParents[k] = b->parents[p]; + mergedReturnStates[k] = b->returnStates[p]; + k++; + } + } + + // trim merged if we combined a few that had same stack tops + if (k < mergedParents.size()) { // write index < last position; trim + if (k == 1) { // for just one merged element, return singleton top + auto c = SingletonPredictionContext::create(std::move(mergedParents[0]), mergedReturnStates[0]); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + mergedParents.resize(k); + mergedReturnStates.resize(k); + } + + ArrayPredictionContext m(std::move(mergedParents), std::move(mergedReturnStates)); + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if (m == *a) { + if (mergeCache) { + return mergeCache->put(a, b, a); + } + return a; + } + if (m == *b) { + if (mergeCache) { + return mergeCache->put(a, b, b); + } + return b; + } + + combineCommonParents(m.parents); + auto c = std::make_shared<ArrayPredictionContext>(std::move(m)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; +} + +std::string PredictionContext::toDOTString(const Ref<const PredictionContext> &context) { + if (context == nullptr) { + return ""; + } + + std::stringstream ss; + ss << "digraph G {\n" << "rankdir=LR;\n"; + + std::vector<Ref<const PredictionContext>> nodes = getAllContextNodes(context); + std::unordered_map<const PredictionContext*, size_t> nodeIds; + size_t nodeId = 0; + + for (const auto ¤t : nodes) { + if (current->getContextType() == PredictionContextType::SINGLETON) { + std::string s = std::to_string(insertOrAssignNodeId(nodeIds, nodeId, current.get())); + ss << " s" << s; + std::string returnState = std::to_string(current->getReturnState(0)); + if (current == PredictionContext::EMPTY) { + returnState = "$"; + } + ss << " [label=\"" << returnState << "\"];\n"; + continue; + } + Ref<const ArrayPredictionContext> arr = std::static_pointer_cast<const ArrayPredictionContext>(current); + ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, arr.get()) << " [shape=box, label=\"" << "["; + bool first = true; + for (auto inv : arr->returnStates) { + if (!first) { + ss << ", "; + } + if (inv == EMPTY_RETURN_STATE) { + ss << "$"; + } else { + ss << inv; + } + first = false; + } + ss << "]"; + ss << "\"];\n"; + } + + for (const auto ¤t : nodes) { + if (current == EMPTY) { + continue; + } + for (size_t i = 0; i < current->size(); i++) { + if (!current->getParent(i)) { + continue; + } + ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, current.get()) << "->" << "s" << insertOrAssignNodeId(nodeIds, nodeId, current->getParent(i).get()); + if (current->size() > 1) { + ss << " [label=\"parent[" << i << "]\"];\n"; + } else { + ss << ";\n"; + } + } + } + + ss << "}\n"; + return ss.str(); +} + +// The "visited" map is just a temporary structure to control the retrieval process (which is recursive). +Ref<const PredictionContext> PredictionContext::getCachedContext(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache) { + std::unordered_map<Ref<const PredictionContext>, Ref<const PredictionContext>> visited; + return getCachedContextImpl(context, contextCache, visited); +} + +std::vector<Ref<const PredictionContext>> PredictionContext::getAllContextNodes(const Ref<const PredictionContext> &context) { + std::vector<Ref<const PredictionContext>> nodes; + std::unordered_set<const PredictionContext*> visited; + getAllContextNodesImpl(context, nodes, visited); + return nodes; +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, int currentState) const { + return toStrings(recognizer, EMPTY, currentState); +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, const Ref<const PredictionContext> &stop, int currentState) const { + + std::vector<std::string> result; + + for (size_t perm = 0; ; perm++) { + size_t offset = 0; + bool last = true; + const PredictionContext *p = this; + size_t stateNumber = currentState; + + std::stringstream ss; + ss << "["; + bool outerContinue = false; + while (!p->isEmpty() && p != stop.get()) { + size_t index = 0; + if (p->size() > 0) { + size_t bits = 1; + while ((1ULL << bits) < p->size()) { + bits++; + } + + size_t mask = (1 << bits) - 1; + index = (perm >> offset) & mask; + last &= index >= p->size() - 1; + if (index >= p->size()) { + outerContinue = true; + break; + } + offset += bits; + } + + if (recognizer != nullptr) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + const ATN &atn = recognizer->getATN(); + ATNState *s = atn.states[stateNumber]; + std::string ruleName = recognizer->getRuleNames()[s->ruleIndex]; + ss << ruleName; + } else if (p->getReturnState(index) != EMPTY_RETURN_STATE) { + if (!p->isEmpty()) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + ss << p->getReturnState(index); + } + } + stateNumber = p->getReturnState(index); + p = p->getParent(index).get(); + } + + if (outerContinue) + continue; + + ss << "]"; + result.push_back(ss.str()); + + if (last) { + break; + } + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h new file mode 100644 index 0000000000..967355af17 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h @@ -0,0 +1,225 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <atomic> + +#include "Recognizer.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "atn/PredictionContextType.h" + +namespace antlr4 { + + class RuleContext; + +namespace atn { + + class ATN; + class ArrayPredictionContext; + class SingletonPredictionContext; + class PredictionContextCache; + class PredictionContextMergeCache; + + class ANTLR4CPP_PUBLIC PredictionContext { + public: + /// Represents $ in local context prediction, which means wildcard. + /// *+x = *. + static const Ref<const PredictionContext> EMPTY; + + /// Represents $ in an array in full context mode, when $ + /// doesn't mean wildcard: $ + x = [$,x]. Here, + /// $ = EMPTY_RETURN_STATE. + // ml: originally Integer.MAX_VALUE, which would be -1 for us, but this is already used in places where + // -1 is converted to unsigned, so we use a different value here. Any value does the job provided it doesn't + // conflict with real return states. + static constexpr size_t EMPTY_RETURN_STATE = std::numeric_limits<size_t>::max() - 9; + + // dispatch + static Ref<const PredictionContext> merge(Ref<const PredictionContext> a, + Ref<const PredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + /// <summary> + /// Merge two <seealso cref="SingletonPredictionContext"/> instances. + /// + /// <p/> + /// + /// Stack tops equal, parents merge is same; return left graph.<br/> + /// <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Same stack top, parents differ; merge parents giving array node, then + /// remainders of those graphs. A new root node is created to point to the + /// merged parents.<br/> + /// <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to same parent. Make array node for the + /// root where both element in the root point to the same (original) + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to different parents. Make array node for + /// the root where each element points to the corresponding original + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/> + /// </summary> + /// <param name="a"> the first <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="b"> the second <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="rootIsWildcard"> {@code true} if this is a local-context merge, + /// otherwise false to indicate a full-context merge </param> + /// <param name="mergeCache"> </param> + static Ref<const PredictionContext> mergeSingletons(Ref<const SingletonPredictionContext> a, + Ref<const SingletonPredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + /** + * Handle case where at least one of {@code a} or {@code b} is + * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + * to represent {@link #EMPTY}. + * + * <h2>Local-Context Merges</h2> + * + * <p>These local-context merge operations are used when {@code rootIsWildcard} + * is true.</p> + * + * <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br> + * <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + * {@code #EMPTY}; return left graph.<br> + * <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p> + * + * <p>Special case of last merge if local context.<br> + * <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p> + * + * <h2>Full-Context Merges</h2> + * + * <p>These full-context merge operations are used when {@code rootIsWildcard} + * is false.</p> + * + * <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p> + * + * <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and + * null parent).<br> + * <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p> + * + * @param a the first {@link SingletonPredictionContext} + * @param b the second {@link SingletonPredictionContext} + * @param rootIsWildcard {@code true} if this is a local-context merge, + * otherwise false to indicate a full-context merge + */ + static Ref<const PredictionContext> mergeRoot(Ref<const SingletonPredictionContext> a, + Ref<const SingletonPredictionContext> b, + bool rootIsWildcard); + + /** + * Merge two {@link ArrayPredictionContext} instances. + * + * <p>Different tops, different parents.<br> + * <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, same parents.<br> + * <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, different parents.<br> + * <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, all shared parents.<br> + * <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p> + * + * <p>Equal tops, merge parents and reduce top to + * {@link SingletonPredictionContext}.<br> + * <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p> + */ + static Ref<const PredictionContext> mergeArrays(Ref<const ArrayPredictionContext> a, + Ref<const ArrayPredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + static std::string toDOTString(const Ref<const PredictionContext> &context); + + static Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache); + + static std::vector<Ref<const PredictionContext>> getAllContextNodes(const Ref<const PredictionContext> &context); + + /// Convert a RuleContext tree to a PredictionContext graph. + /// Return EMPTY if outerContext is empty. + static Ref<const PredictionContext> fromRuleContext(const ATN &atn, RuleContext *outerContext); + + PredictionContext(const PredictionContext&) = delete; + + virtual ~PredictionContext() = default; + + PredictionContext& operator=(const PredictionContext&) = delete; + PredictionContext& operator=(PredictionContext&&) = delete; + + PredictionContextType getContextType() const { return _contextType; } + + virtual size_t size() const = 0; + virtual const Ref<const PredictionContext>& getParent(size_t index) const = 0; + virtual size_t getReturnState(size_t index) const = 0; + + /// This means only the EMPTY (wildcard? not sure) context is in set. + virtual bool isEmpty() const = 0; + bool hasEmptyPath() const; + + size_t hashCode() const; + + virtual bool equals(const PredictionContext &other) const = 0; + + virtual std::string toString() const = 0; + + std::vector<std::string> toStrings(Recognizer *recognizer, int currentState) const; + std::vector<std::string> toStrings(Recognizer *recognizer, + const Ref<const PredictionContext> &stop, + int currentState) const; + + protected: + explicit PredictionContext(PredictionContextType contextType); + + PredictionContext(PredictionContext&& other); + + virtual size_t hashCodeImpl() const = 0; + + size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); } + + private: + const PredictionContextType _contextType; + mutable std::atomic<size_t> _hashCode; + }; + + inline bool operator==(const PredictionContext &lhs, const PredictionContext &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const PredictionContext &lhs, const PredictionContext &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::PredictionContext> { + size_t operator()(const ::antlr4::atn::PredictionContext &predictionContext) const { + return predictionContext.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp new file mode 100644 index 0000000000..031a35cbf7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp @@ -0,0 +1,56 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "atn/PredictionContextCache.h" + +using namespace antlr4::atn; + +void PredictionContextCache::put(const Ref<const PredictionContext> &value) { + assert(value); + + _data.insert(value); +} + +Ref<const PredictionContext> PredictionContextCache::get( + const Ref<const PredictionContext> &value) const { + assert(value); + + auto iterator = _data.find(value); + if (iterator == _data.end()) { + return nullptr; + } + return *iterator; +} + +size_t PredictionContextCache::PredictionContextHasher::operator()( + const Ref<const PredictionContext> &predictionContext) const { + return predictionContext->hashCode(); +} + +bool PredictionContextCache::PredictionContextComparer::operator()( + const Ref<const PredictionContext> &lhs, + const Ref<const PredictionContext> &rhs) const { + return *lhs == *rhs; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h new file mode 100644 index 0000000000..78c8210d97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h @@ -0,0 +1,63 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "atn/PredictionContext.h" +#include "FlatHashSet.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextCache final { + public: + PredictionContextCache() = default; + + PredictionContextCache(const PredictionContextCache&) = delete; + PredictionContextCache(PredictionContextCache&&) = delete; + + PredictionContextCache& operator=(const PredictionContextCache&) = delete; + PredictionContextCache& operator=(PredictionContextCache&&) = delete; + + void put(const Ref<const PredictionContext> &value); + + Ref<const PredictionContext> get(const Ref<const PredictionContext> &value) const; + + private: + struct ANTLR4CPP_PUBLIC PredictionContextHasher final { + size_t operator()(const Ref<const PredictionContext> &predictionContext) const; + }; + + struct ANTLR4CPP_PUBLIC PredictionContextComparer final { + bool operator()(const Ref<const PredictionContext> &lhs, + const Ref<const PredictionContext> &rhs) const; + }; + + FlatHashSet<Ref<const PredictionContext>, + PredictionContextHasher, PredictionContextComparer> _data; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp new file mode 100644 index 0000000000..7160b59998 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp @@ -0,0 +1,167 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "atn/PredictionContextMergeCache.h" + +#include "misc/MurmurHash.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; + +PredictionContextMergeCache::PredictionContextMergeCache( + const PredictionContextMergeCacheOptions &options) : _options(options) {} + +Ref<const PredictionContext> PredictionContextMergeCache::put( + const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2, + Ref<const PredictionContext> value) { + assert(key1); + assert(key2); + + if (getOptions().getMaxSize() == 0) { + // Cache is effectively disabled. + return value; + } + + auto [existing, inserted] = _entries.try_emplace(std::make_pair(key1.get(), key2.get())); + if (inserted) { + try { + existing->second.reset(new Entry()); + } catch (...) { + _entries.erase(existing); + throw; + } + existing->second->key = std::make_pair(key1, key2); + existing->second->value = std::move(value); + pushToFront(existing->second.get()); + } else { + if (existing->second->value != value) { + existing->second->value = std::move(value); + } + moveToFront(existing->second.get()); + } + compact(existing->second.get()); + return existing->second->value; +} + +Ref<const PredictionContext> PredictionContextMergeCache::get( + const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2) const { + assert(key1); + assert(key2); + + if (getOptions().getMaxSize() == 0) { + // Cache is effectively disabled. + return nullptr; + } + + auto iterator = _entries.find(std::make_pair(key1.get(), key2.get())); + if (iterator == _entries.end()) { + return nullptr; + } + moveToFront(iterator->second.get()); + return iterator->second->value; +} + +void PredictionContextMergeCache::clear() { + Container().swap(_entries); + _head = _tail = nullptr; + _size = 0; +} + +void PredictionContextMergeCache::moveToFront(Entry *entry) const { + if (entry->prev == nullptr) { + assert(entry == _head); + return; + } + entry->prev->next = entry->next; + if (entry->next != nullptr) { + entry->next->prev = entry->prev; + } else { + assert(entry == _tail); + _tail = entry->prev; + } + entry->prev = nullptr; + entry->next = _head; + _head->prev = entry; + _head = entry; + assert(entry->prev == nullptr); +} + +void PredictionContextMergeCache::pushToFront(Entry *entry) { + ++_size; + entry->prev = nullptr; + entry->next = _head; + if (_head != nullptr) { + _head->prev = entry; + _head = entry; + } else { + assert(entry->next == nullptr); + _head = entry; + _tail = entry; + } + assert(entry->prev == nullptr); +} + +void PredictionContextMergeCache::remove(Entry *entry) { + if (entry->prev != nullptr) { + entry->prev->next = entry->next; + } else { + assert(entry == _head); + _head = entry->next; + } + if (entry->next != nullptr) { + entry->next->prev = entry->prev; + } else { + assert(entry == _tail); + _tail = entry->prev; + } + --_size; + _entries.erase(std::make_pair(entry->key.first.get(), entry->key.second.get())); +} + +void PredictionContextMergeCache::compact(const Entry *preserve) { + Entry *entry = _tail; + while (entry != nullptr && _size > getOptions().getMaxSize()) { + Entry *next = entry->prev; + if (entry != preserve) { + remove(entry); + } + entry = next; + } +} + +size_t PredictionContextMergeCache::PredictionContextHasher::operator()( + const PredictionContextPair &value) const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, value.first->hashCode()); + hash = MurmurHash::update(hash, value.second->hashCode()); + return MurmurHash::finish(hash, 2); +} + +bool PredictionContextMergeCache::PredictionContextComparer::operator()( + const PredictionContextPair &lhs, const PredictionContextPair &rhs) const { + return *lhs.first == *rhs.first && *lhs.second == *rhs.second; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h new file mode 100644 index 0000000000..efaeaef578 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h @@ -0,0 +1,101 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <utility> + +#include "atn/PredictionContext.h" +#include "atn/PredictionContextMergeCacheOptions.h" +#include "FlatHashMap.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextMergeCache final { + public: + PredictionContextMergeCache() + : PredictionContextMergeCache(PredictionContextMergeCacheOptions()) {} + + explicit PredictionContextMergeCache(const PredictionContextMergeCacheOptions &options); + + PredictionContextMergeCache(const PredictionContextMergeCache&) = delete; + PredictionContextMergeCache(PredictionContextMergeCache&&) = delete; + + PredictionContextMergeCache& operator=(const PredictionContextMergeCache&) = delete; + PredictionContextMergeCache& operator=(PredictionContextMergeCache&&) = delete; + + Ref<const PredictionContext> put(const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2, + Ref<const PredictionContext> value); + + Ref<const PredictionContext> get(const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2) const; + + const PredictionContextMergeCacheOptions& getOptions() const { return _options; } + + void clear(); + + private: + using PredictionContextPair = std::pair<const PredictionContext*, const PredictionContext*>; + + struct ANTLR4CPP_PUBLIC PredictionContextHasher final { + size_t operator()(const PredictionContextPair &value) const; + }; + + struct ANTLR4CPP_PUBLIC PredictionContextComparer final { + bool operator()(const PredictionContextPair &lhs, const PredictionContextPair &rhs) const; + }; + + struct ANTLR4CPP_PUBLIC Entry final { + std::pair<Ref<const PredictionContext>, Ref<const PredictionContext>> key; + Ref<const PredictionContext> value; + Entry *prev = nullptr; + Entry *next = nullptr; + }; + + void moveToFront(Entry *entry) const; + + void pushToFront(Entry *entry); + + void remove(Entry *entry); + + void compact(const Entry *preserve); + + using Container = FlatHashMap<PredictionContextPair, std::unique_ptr<Entry>, + PredictionContextHasher, PredictionContextComparer>; + + const PredictionContextMergeCacheOptions _options; + + Container _entries; + + mutable Entry *_head = nullptr; + mutable Entry *_tail = nullptr; + + size_t _size = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h new file mode 100644 index 0000000000..7331cc17e0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h @@ -0,0 +1,71 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <limits> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextMergeCacheOptions final { + public: + PredictionContextMergeCacheOptions() = default; + + size_t getMaxSize() const { return _maxSize; } + + bool hasMaxSize() const { return getMaxSize() != std::numeric_limits<size_t>::max(); } + + PredictionContextMergeCacheOptions& setMaxSize(size_t maxSize) { + _maxSize = maxSize; + return *this; + } + + size_t getClearEveryN() const { + return _clearEveryN; + } + + bool hasClearEveryN() const { return getClearEveryN() != 0; } + + PredictionContextMergeCacheOptions& setClearEveryN(uint64_t clearEveryN) { + _clearEveryN = clearEveryN; + return *this; + } + + PredictionContextMergeCacheOptions& neverClear() { + return setClearEveryN(0); + } + + private: + size_t _maxSize = std::numeric_limits<size_t>::max(); + uint64_t _clearEveryN = 1; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h new file mode 100644 index 0000000000..c8c4473e13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + enum class PredictionContextType : size_t { + SINGLETON = 1, + ARRAY = 2, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp new file mode 100644 index 0000000000..9db0b8bdb9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp @@ -0,0 +1,202 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" +#include "SemanticContext.h" + +#include "PredictionMode.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +struct AltAndContextConfigHasher +{ + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ + size_t operator () (ATNConfig *o) const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, o->state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, o->context); + return misc::MurmurHash::finish(hashCode, 2); + } +}; + +struct AltAndContextConfigComparer { + bool operator()(ATNConfig *a, ATNConfig *b) const + { + if (a == b) { + return true; + } + return a->state->stateNumber == b->state->stateNumber && *a->context == *b->context; + } +}; + +bool PredictionModeClass::hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs) { + /* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { + return true; + } + + bool heuristic; + + // Pure SLL mode parsing or SLL+LL if: + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL; costs more time + // since we'll often fail over anyway. + if (mode == PredictionMode::SLL || !configs->hasSemanticContext) { + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(configs); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + } else { + // dup configs, tossing out semantic predicates + ATNConfigSet dup(true); + for (auto &config : configs->configs) { + Ref<ATNConfig> c = std::make_shared<ATNConfig>(*config, SemanticContext::Empty::Instance); + dup.add(c); + } + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(&dup); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(&dup); + } + + return heuristic; +} + +bool PredictionModeClass::hasConfigInRuleStopState(ATNConfigSet *configs) { + for (const auto &config : configs->configs) { + if (RuleStopState::is(config->state)) { + return true; + } + } + + return false; +} + +bool PredictionModeClass::allConfigsInRuleStopStates(ATNConfigSet *configs) { + for (const auto &config : configs->configs) { + if (!RuleStopState::is(config->state)) { + return false; + } + } + + return true; +} + +size_t PredictionModeClass::resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + return getSingleViableAlt(altsets); +} + +bool PredictionModeClass::allSubsetsConflict(const std::vector<antlrcpp::BitSet>& altsets) { + return !hasNonConflictingAltSet(altsets); +} + +bool PredictionModeClass::hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() == 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::hasConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() > 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::allSubsetsEqual(const std::vector<antlrcpp::BitSet>& altsets) { + if (altsets.empty()) { + return true; + } + + const antlrcpp::BitSet& first = *altsets.begin(); + for (const antlrcpp::BitSet& alts : altsets) { + if (alts != first) { + return false; + } + } + return true; +} + +size_t PredictionModeClass::getUniqueAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all = getAlts(altsets); + if (all.count() == 1) { + return all.nextSetBit(0); + } + return ATN::INVALID_ALT_NUMBER; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all; + for (const auto &alts : altsets) { + all |= alts; + } + + return all; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(ATNConfigSet *configs) { + antlrcpp::BitSet alts; + for (const auto &config : configs->configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<antlrcpp::BitSet> PredictionModeClass::getConflictingAltSubsets(ATNConfigSet *configs) { + std::unordered_map<ATNConfig*, antlrcpp::BitSet, AltAndContextConfigHasher, AltAndContextConfigComparer> configToAlts; + for (auto &config : configs->configs) { + configToAlts[config.get()].set(config->alt); + } + std::vector<antlrcpp::BitSet> values; + values.reserve(configToAlts.size()); + for (const auto &pair : configToAlts) { + values.push_back(pair.second); + } + return values; +} + +std::unordered_map<ATNState*, antlrcpp::BitSet> PredictionModeClass::getStateToAltMap(ATNConfigSet *configs) { + std::unordered_map<ATNState*, antlrcpp::BitSet> m; + for (const auto &c : configs->configs) { + m[c->state].set(c->alt); + } + return m; +} + +bool PredictionModeClass::hasStateAssociatedWithOneAlt(ATNConfigSet *configs) { + auto x = getStateToAltMap(configs); + for (const auto &pair : x){ + if (pair.second.count() == 1) return true; + } + return false; +} + +size_t PredictionModeClass::getSingleViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet viableAlts; + for (const auto &alts : altsets) { + size_t minAlt = alts.nextSetBit(0); + + viableAlts.set(minAlt); + if (viableAlts.count() > 1) // more than 1 viable alt + { + return ATN::INVALID_ALT_NUMBER; + } + } + + return viableAlts.nextSetBit(0); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h new file mode 100644 index 0000000000..4868ea2ff2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h @@ -0,0 +1,436 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ + enum class PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + * <p> + * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + SLL, + + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + * <p> + * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for <em>exactly which</em> alternatives are + * ambiguous.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL, + + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + * <p> + * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL_EXACT_AMBIG_DETECTION + }; + + class ANTLR4CPP_PUBLIC PredictionModeClass { + public: + /** + * Computes the SLL prediction termination condition. + * + * <p> + * This method computes the SLL prediction termination condition for both of + * the following cases.</p> + * + * <ul> + * <li>The usual SLL+LL fallback upon SLL conflict</li> + * <li>Pure SLL without LL fallback</li> + * </ul> + * + * <p><strong>COMBINED SLL+LL PARSING</strong></p> + * + * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are + * ensured regardless of how the termination condition is computed by this + * method. Due to the substantially higher cost of LL prediction, the + * prediction should only fall back to LL when the additional lookahead + * cannot lead to a unique SLL prediction.</p> + * + * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only + * conflicting subsets should fall back to full LL, even if the + * configuration sets don't resolve to the same alternative (e.g. + * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + * configuration, SLL could continue with the hopes that more lookahead will + * resolve via one of those non-conflicting configurations.</p> + * + * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing) + * stops when it sees only conflicting configuration subsets. In contrast, + * full LL keeps going when there is uncertainty.</p> + * + * <p><strong>HEURISTIC</strong></p> + * + * <p>As a heuristic, we stop prediction when we see any conflicting subset + * unless we see a state that only has one alternative associated with it. + * The single-alt-state thing lets prediction continue upon rules like + * (otherwise, it would admit defeat too soon):</p> + * + * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p> + * + * <p>When the ATN simulation reaches the state before {@code ';'}, it has a + * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + * processing this node because alternative to has another way to continue, + * via {@code [6|2|[]]}.</p> + * + * <p>It also let's us continue for this rule:</p> + * + * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p> + * + * <p>After matching input A, we reach the stop state for rule A, state 1. + * State 8 is the state right before B. Clearly alternatives 1 and 2 + * conflict and no amount of further lookahead will separate the two. + * However, alternative 3 will be able to continue and so we do not stop + * working on this state. In the previous example, we're concerned with + * states associated with the conflicting alternatives. Here alt 3 is not + * associated with the conflicting configs, but since we can continue + * looking for input reasonably, don't declare the state done.</p> + * + * <p><strong>PURE SLL PARSING</strong></p> + * + * <p>To handle pure SLL parsing, all we have to do is make sure that we + * combine stack contexts for configurations that differ only by semantic + * predicate. From there, we can do the usual SLL termination heuristic.</p> + * + * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p> + * + * <p>SLL decisions don't evaluate predicates until after they reach DFA stop + * states because they need to create the DFA cache that works in all + * semantic situations. In contrast, full LL evaluates predicates collected + * during start state computation so it can ignore predicates thereafter. + * This means that SLL termination detection can totally ignore semantic + * predicates.</p> + * + * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + * semantic predicate contexts so we might see two configurations like the + * following.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p> + * + * <p>Before testing these configurations against others, we have to merge + * {@code x} and {@code x'} (without modifying the existing configurations). + * For example, we test {@code (x+x')==x''} when looking for conflicts in + * the following configurations.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p> + * + * <p>If the configuration set has predicates (as indicated by + * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + * the configurations to strip out all of the predicates so that a standard + * {@link ATNConfigSet} will merge everything ignoring predicates.</p> + */ + static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs); + + /// <summary> + /// Checks if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool hasConfigInRuleStopState(ATNConfigSet *configs); + + /// <summary> + /// Checks if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool allConfigsInRuleStopStates(ATNConfigSet *configs); + + /** + * Full LL prediction termination. + * + * <p>Can we stop looking ahead during ATN simulation or is there some + * uncertainty as to which alternative we will ultimately pick, after + * consuming more input? Even if there are partial conflicts, we might know + * that everything is going to resolve to the same minimum alternative. That + * means we can stop since no more lookahead will change that fact. On the + * other hand, there might be multiple conflicts that resolve to different + * minimums. That means we need more look ahead to decide which of those + * alternatives we should predict.</p> + * + * <p>The basic idea is to split the set of configurations {@code C}, into + * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + * non-conflicting configurations. Two configurations conflict if they have + * identical {@link ATNConfig#state} and {@link ATNConfig#context} values + * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + * and {@code (s, j, ctx, _)} for {@code i!=j}.</p> + * + * <p>Reduce these configuration subsets to the set of possible alternatives. + * You can compute the alternative subsets in one pass as follows:</p> + * + * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + * {@code C} holding {@code s} and {@code ctx} fixed.</p> + * + * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p> + * + * <pre> + * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not + * alt and not pred + * </pre> + * + * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p> + * + * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with + * {@code s} and {@code ctx}.</p> + * + * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If + * the union of these alternative subsets is a singleton, then no amount of + * more lookahead will help us. We will always pick that alternative. If, + * however, there is more than one alternative, then we are uncertain which + * alternative to predict and must continue looking for resolution. We may + * or may not discover an ambiguity in the future, even if there are no + * conflicting subsets this round.</p> + * + * <p>The biggest sin is to terminate early because it means we've made a + * decision but were uncertain as to the eventual outcome. We haven't used + * enough lookahead. On the other hand, announcing a conflict too late is no + * big deal; you will still have the conflict. It's just inefficient. It + * might even look until the end of file.</p> + * + * <p>No special consideration for semantic predicates is required because + * predicates are evaluated on-the-fly for full LL prediction, ensuring that + * no configuration contains a semantic context during the termination + * check.</p> + * + * <p><strong>CONFLICTING CONFIGS</strong></p> + * + * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + * when {@code i!=j} but {@code x=x'}. Because we merge all + * {@code (s, i, _)} configurations together, that means that there are at + * most {@code n} configurations associated with state {@code s} for + * {@code n} possible alternatives in the decision. The merged stacks + * complicate the comparison of configuration contexts {@code x} and + * {@code x'}. Sam checks to see if one is a subset of the other by calling + * merge and checking to see if the merged result is either {@code x} or + * {@code x'}. If the {@code x} associated with lowest alternative {@code i} + * is the superset, then {@code i} is the only possible prediction since the + * others resolve to {@code min(i)} as well. However, if {@code x} is + * associated with {@code j>i} then at least one stack configuration for + * {@code j} is not in conflict with alternative {@code i}. The algorithm + * should keep going, looking for more lookahead due to the uncertainty.</p> + * + * <p>For simplicity, I'm doing a equality check between {@code x} and + * {@code x'} that lets the algorithm continue to consume lookahead longer + * than necessary. The reason I like the equality is of course the + * simplicity but also because that is the test you need to detect the + * alternatives that are actually in conflict.</p> + * + * <p><strong>CONTINUE/STOP RULE</strong></p> + * + * <p>Continue if union of resolved alternative sets from non-conflicting and + * conflicting alternative subsets has more than one alternative. We are + * uncertain about which alternative to predict.</p> + * + * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + * alternatives are still in the running for the amount of input we've + * consumed at this point. The conflicting sets let us to strip away + * configurations that won't lead to more states because we resolve + * conflicts to the configuration with a minimum alternate for the + * conflicting set.</p> + * + * <p><strong>CASES</strong></p> + * + * <ul> + * + * <li>no conflicts and more than 1 alternative in set => continue</li> + * + * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1,3}} => continue + * </li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1}} => stop and predict 1</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {1}} = {@code {1}} => stop and predict 1, can announce + * ambiguity {@code {1,2}}</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {2}} = {@code {1,2}} => continue</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {3}} = {@code {1,3}} => continue</li> + * + * </ul> + * + * <p><strong>EXACT AMBIGUITY DETECTION</strong></p> + * + * <p>If all states report the same conflicting set of alternatives, then we + * know we have the exact ambiguity set.</p> + * + * <p><code>|A_<em>i</em>|>1</code> and + * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p> + * + * <p>In other words, we continue examining lookahead until all {@code A_i} + * have more than one alternative and all {@code A_i} are the same. If + * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + * because the resolved set is {@code {1}}. To determine what the real + * ambiguity is, we have to know whether the ambiguity is between one and + * two or one and three so we keep going. We can only stop prediction when + * we need exact ambiguity detection when the sets look like + * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p> + */ + static size_t resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} contains more + /// than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every <seealso cref="BitSet"/> in {@code altsets} + /// has + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool allSubsetsConflict(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// exactly one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> 1, otherwise {@code false} + /// </returns> + static bool hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// more than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool hasConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} is equivalent. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every member of {@code altsets} is equal to the + /// others, otherwise {@code false} </returns> + static bool allSubsetsEqual(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Returns the unique alternative predicted by all alternative subsets in + /// {@code altsets}. If no such alternative exists, this method returns + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + static size_t getUniqueAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Gets the complete set of represented alternatives for a collection of + /// alternative subsets. This method returns the union of each <seealso + /// cref="BitSet"/> + /// in {@code altsets}. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> the set of represented alternatives in {@code altsets} </returns> + static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets); + + /** Get union of all alts from configs. @since 4.5.1 */ + static antlrcpp::BitSet getAlts(ATNConfigSet *configs); + + /// <summary> + /// This function gets the conflicting alt subsets from a configuration set. + /// For each configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and + /// x, not + /// alt and not pred + /// </pre> + /// </summary> + static std::vector<antlrcpp::BitSet> getConflictingAltSubsets(ATNConfigSet *configs); + + /// <summary> + /// Get a map from state to alt subset from a configuration set. For each + /// configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c.<seealso cref="ATNConfig#state state"/>] U= c.<seealso + /// cref="ATNConfig#alt alt"/> + /// </pre> + /// </summary> + static std::unordered_map<ATNState*, antlrcpp::BitSet> getStateToAltMap(ATNConfigSet *configs); + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs); + + static size_t getSingleViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp new file mode 100644 index 0000000000..9fd86d67d4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp @@ -0,0 +1,179 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateEvalInfo.h" +#include "atn/LookaheadEventInfo.h" +#include "Parser.h" +#include "atn/ATNConfigSet.h" +#include "support/CPPUtils.h" + +#include "atn/ProfilingATNSimulator.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::dfa; +using namespace antlrcpp; + +using namespace std::chrono; + +ProfilingATNSimulator::ProfilingATNSimulator(Parser *parser) + : ParserATNSimulator(parser, parser->getInterpreter<ParserATNSimulator>()->atn, + parser->getInterpreter<ParserATNSimulator>()->decisionToDFA, + parser->getInterpreter<ParserATNSimulator>()->getSharedContextCache()) { + for (size_t i = 0; i < atn.decisionToState.size(); i++) { + _decisions.push_back(DecisionInfo(i)); + } +} + +size_t ProfilingATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + auto onExit = finally([this](){ + _currentDecision = 0; // Originally -1, but that makes no sense (index into a vector and init value is also 0). + }); + + _sllStopIndex = -1; + _llStopIndex = -1; + _currentDecision = decision; + high_resolution_clock::time_point start = high_resolution_clock::now(); + size_t alt = ParserATNSimulator::adaptivePredict(input, decision, outerContext); + high_resolution_clock::time_point stop = high_resolution_clock::now(); + _decisions[decision].timeInPrediction += duration_cast<nanoseconds>(stop - start).count(); + _decisions[decision].invocations++; + + long long SLL_k = _sllStopIndex - _startIndex + 1; + _decisions[decision].SLL_TotalLook += SLL_k; + _decisions[decision].SLL_MinLook = _decisions[decision].SLL_MinLook == 0 ? SLL_k : std::min(_decisions[decision].SLL_MinLook, SLL_k); + if (SLL_k > _decisions[decision].SLL_MaxLook) { + _decisions[decision].SLL_MaxLook = SLL_k; + _decisions[decision].SLL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _sllStopIndex, false); + } + + if (_llStopIndex >= 0) { + long long LL_k = _llStopIndex - _startIndex + 1; + _decisions[decision].LL_TotalLook += LL_k; + _decisions[decision].LL_MinLook = _decisions[decision].LL_MinLook == 0 ? LL_k : std::min(_decisions[decision].LL_MinLook, LL_k); + if (LL_k > _decisions[decision].LL_MaxLook) { + _decisions[decision].LL_MaxLook = LL_k; + _decisions[decision].LL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _llStopIndex, true); + } + } + + return alt; +} + +DFAState* ProfilingATNSimulator::getExistingTargetState(DFAState *previousD, size_t t) { + // this method is called after each time the input position advances + // during SLL prediction + _sllStopIndex = (int)_input->index(); + + DFAState *existingTargetState = ParserATNSimulator::getExistingTargetState(previousD, t); + if (existingTargetState != nullptr) { + _decisions[_currentDecision].SLL_DFATransitions++; // count only if we transition over a DFA state + if (existingTargetState == ERROR.get()) { + _decisions[_currentDecision].errors.push_back( + ErrorInfo(_currentDecision, previousD->configs.get(), _input, _startIndex, _sllStopIndex, false) + ); + } + } + + _currentState = existingTargetState; + return existingTargetState; +} + +DFAState* ProfilingATNSimulator::computeTargetState(DFA &dfa, DFAState *previousD, size_t t) { + DFAState *state = ParserATNSimulator::computeTargetState(dfa, previousD, t); + _currentState = state; + return state; +} + +std::unique_ptr<ATNConfigSet> ProfilingATNSimulator::computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) { + if (fullCtx) { + // this method is called after each time the input position advances + // during full context prediction + _llStopIndex = (int)_input->index(); + } + + std::unique_ptr<ATNConfigSet> reachConfigs = ParserATNSimulator::computeReachSet(closure, t, fullCtx); + if (fullCtx) { + _decisions[_currentDecision].LL_ATNTransitions++; // count computation even if error + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + // TODO: does not handle delayed errors per getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule() + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _llStopIndex, true)); + } + } else { + ++_decisions[_currentDecision].SLL_ATNTransitions; + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _sllStopIndex, false)); + } + } + return reachConfigs; +} + +bool ProfilingATNSimulator::evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) { + bool result = ParserATNSimulator::evalSemanticContext(pred, parserCallStack, alt, fullCtx); + if (!(std::dynamic_pointer_cast<const SemanticContext::PrecedencePredicate>(pred) != nullptr)) { + bool fullContext = _llStopIndex >= 0; + int stopIndex = fullContext ? _llStopIndex : _sllStopIndex; + _decisions[_currentDecision].predicateEvals.push_back( + PredicateEvalInfo(_currentDecision, _input, _startIndex, stopIndex, pred, result, alt, fullCtx)); + } + + return result; +} + +void ProfilingATNSimulator::reportAttemptingFullContext(DFA &dfa, const BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (conflictingAlts.count() > 0) { + conflictingAltResolvedBySLL = conflictingAlts.nextSetBit(0); + } else { + conflictingAltResolvedBySLL = configs->getAlts().nextSetBit(0); + } + _decisions[_currentDecision].LL_Fallback++; + ParserATNSimulator::reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportContextSensitivity(DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (prediction != conflictingAltResolvedBySLL) { + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + ParserATNSimulator::reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportAmbiguity(DFA &dfa, DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const BitSet &ambigAlts, ATNConfigSet *configs) { + size_t prediction; + if (ambigAlts.count() > 0) { + prediction = ambigAlts.nextSetBit(0); + } else { + prediction = configs->getAlts().nextSetBit(0); + } + if (configs->fullCtx && prediction != conflictingAltResolvedBySLL) { + // Even though this is an ambiguity we are reporting, we can + // still detect some context sensitivities. Both SLL and LL + // are showing a conflict, hence an ambiguity, but if they resolve + // to different minimum alternatives we have also identified a + // context sensitivity. + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + _decisions[_currentDecision].ambiguities.push_back( + AmbiguityInfo(_currentDecision, configs, ambigAlts, _input, startIndex, stopIndex, configs->fullCtx) + ); + ParserATNSimulator::reportAmbiguity(dfa, D, startIndex, stopIndex, exact, ambigAlts, configs); +} + +std::vector<DecisionInfo> ProfilingATNSimulator::getDecisionInfo() const { + return _decisions; +} + +DFAState* ProfilingATNSimulator::getCurrentState() const { + return _currentState; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h new file mode 100644 index 0000000000..551efb8556 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ParserATNSimulator.h" +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ProfilingATNSimulator : public ParserATNSimulator { + public: + explicit ProfilingATNSimulator(Parser *parser); + + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) override; + + virtual std::vector<DecisionInfo> getDecisionInfo() const; + virtual dfa::DFAState* getCurrentState() const; + + protected: + std::vector<DecisionInfo> _decisions; + + int _sllStopIndex = 0; + int _llStopIndex = 0; + + size_t _currentDecision = 0; + dfa::DFAState *_currentState; + + /// <summary> + /// At the point of LL failover, we record how SLL would resolve the conflict so that + /// we can determine whether or not a decision / input pair is context-sensitive. + /// If LL gives a different result than SLL's predicted alternative, we have a + /// context sensitivity for sure. The converse is not necessarily true, however. + /// It's possible that after conflict resolution chooses minimum alternatives, + /// SLL could get the same answer as LL. Regardless of whether or not the result indicates + /// an ambiguity, it is not treated as a context sensitivity because LL prediction + /// was not required in order to produce a correct prediction for this decision and input sequence. + /// It may in fact still be a context sensitivity but we don't know by looking at the + /// minimum alternatives for the current input. + /// </summary> + size_t conflictingAltResolvedBySLL = 0; + + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t) override; + virtual dfa::DFAState* computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) override; + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) override; + virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) override; + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportAmbiguity(dfa::DFA &dfa, dfa::DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp new file mode 100644 index 0000000000..342e550de9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" + +#include "atn/RangeTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +RangeTransition::RangeTransition(ATNState *target, size_t from, size_t to) : Transition(TransitionType::RANGE, target), from(from), to(to) { +} + +misc::IntervalSet RangeTransition::label() const { + return misc::IntervalSet::of((int)from, (int)to); +} + +bool RangeTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return symbol >= from && symbol <= to; +} + +std::string RangeTransition::toString() const { + return "RANGE " + Transition::toString() + " { from: " + std::to_string(from) + ", to: " + std::to_string(to) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h new file mode 100644 index 0000000000..b75c60e247 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RangeTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::RANGE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const size_t from; + const size_t to; + + RangeTransition(ATNState *target, size_t from, size_t to); + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h new file mode 100644 index 0000000000..549491514b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleStartState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::RULE_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + RuleStopState *stopState = nullptr; + bool isLeftRecursiveRule = false; + + RuleStartState() : ATNState(ATNStateType::RULE_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h new file mode 100644 index 0000000000..7792a1265c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// The last node in the ATN for a rule, unless that rule is the start symbol. + /// In that case, there is one transition to EOF. Later, we might encode + /// references to all calls to this rule to compute FOLLOW sets for + /// error handling. + class ANTLR4CPP_PUBLIC RuleStopState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::RULE_STOP; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + RuleStopState() : ATNState(ATNStateType::RULE_STOP) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp new file mode 100644 index 0000000000..ba50dd03dd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStartState.h" +#include "atn/RuleTransition.h" + +using namespace antlr4::atn; + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState) + : RuleTransition(ruleStart, ruleIndex, 0, followState) { +} + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState) + : Transition(TransitionType::RULE, ruleStart), ruleIndex(ruleIndex), precedence(precedence) { + this->followState = followState; +} + +bool RuleTransition::isEpsilon() const { + return true; +} + +bool RuleTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string RuleTransition::toString() const { + std::stringstream ss; + ss << "RULE " << Transition::toString() << " { ruleIndex: " << ruleIndex << ", precedence: " << precedence << + ", followState: " << std::hex << followState << " }"; + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h new file mode 100644 index 0000000000..396ef700f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::RULE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + /// Ptr to the rule definition object for this rule ref. + const size_t ruleIndex; // no Rule object at runtime + + const int precedence; + + /// What node to begin computations following ref to rule. + ATNState *followState; + + /// @deprecated Use + /// <seealso cref="#RuleTransition(RuleStartState, size_t, int, ATNState)"/> instead. + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState); + + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState); + RuleTransition(RuleTransition const&) = delete; + RuleTransition& operator=(RuleTransition const&) = delete; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp new file mode 100644 index 0000000000..7d7fe068df --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp @@ -0,0 +1,418 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <functional> +#include <unordered_set> + +#include "misc/MurmurHash.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" + +#include "SemanticContext.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + struct SemanticContextHasher final { + size_t operator()(const SemanticContext *semanticContext) const { + return semanticContext->hashCode(); + } + }; + + struct SemanticContextComparer final { + bool operator()(const SemanticContext *lhs, const SemanticContext *rhs) const { + return *lhs == *rhs; + } + }; + + template <typename Comparer> + void insertSemanticContext(const Ref<const SemanticContext> &semanticContext, + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> &operandSet, + std::vector<Ref<const SemanticContext>> &operandList, + Ref<const SemanticContext::PrecedencePredicate> &precedencePredicate, + Comparer comparer) { + if (semanticContext != nullptr) { + if (semanticContext->getContextType() == SemanticContextType::PRECEDENCE) { + if (precedencePredicate == nullptr || comparer(downCast<const SemanticContext::PrecedencePredicate*>(semanticContext.get())->precedence, precedencePredicate->precedence)) { + precedencePredicate = std::static_pointer_cast<const SemanticContext::PrecedencePredicate>(semanticContext); + } + } else { + auto [existing, inserted] = operandSet.insert(semanticContext.get()); + if (inserted) { + operandList.push_back(semanticContext); + } + } + } + } + + template <typename Comparer> + void insertSemanticContext(Ref<const SemanticContext> &&semanticContext, + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> &operandSet, + std::vector<Ref<const SemanticContext>> &operandList, + Ref<const SemanticContext::PrecedencePredicate> &precedencePredicate, + Comparer comparer) { + if (semanticContext != nullptr) { + if (semanticContext->getContextType() == SemanticContextType::PRECEDENCE) { + if (precedencePredicate == nullptr || comparer(downCast<const SemanticContext::PrecedencePredicate*>(semanticContext.get())->precedence, precedencePredicate->precedence)) { + precedencePredicate = std::static_pointer_cast<const SemanticContext::PrecedencePredicate>(std::move(semanticContext)); + } + } else { + auto [existing, inserted] = operandSet.insert(semanticContext.get()); + if (inserted) { + operandList.push_back(std::move(semanticContext)); + } + } + } + } + + size_t predictOperandCapacity(const Ref<const SemanticContext> &x) { + switch (x->getContextType()) { + case SemanticContextType::AND: + return downCast<const SemanticContext::AND&>(*x).getOperands().size(); + case SemanticContextType::OR: + return downCast<const SemanticContext::OR&>(*x).getOperands().size(); + default: + return 1; + } + } + + size_t predictOperandCapacity(const Ref<const SemanticContext> &a, const Ref<const SemanticContext> &b) { + return predictOperandCapacity(a) + predictOperandCapacity(b); + } + +} + +//------------------ Predicate ----------------------------------------------------------------------------------------- + +SemanticContext::Predicate::Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent) + : SemanticContext(SemanticContextType::PREDICATE), ruleIndex(ruleIndex), predIndex(predIndex), isCtxDependent(isCtxDependent) {} + +bool SemanticContext::Predicate::eval(Recognizer *parser, RuleContext *parserCallStack) const { + RuleContext *localctx = nullptr; + if (isCtxDependent) { + localctx = parserCallStack; + } + return parser->sempred(localctx, ruleIndex, predIndex); +} + +size_t SemanticContext::Predicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(getContextType())); + hashCode = misc::MurmurHash::update(hashCode, ruleIndex); + hashCode = misc::MurmurHash::update(hashCode, predIndex); + hashCode = misc::MurmurHash::update(hashCode, isCtxDependent ? 1 : 0); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +bool SemanticContext::Predicate::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const Predicate &p = downCast<const Predicate&>(other); + return ruleIndex == p.ruleIndex && predIndex == p.predIndex && isCtxDependent == p.isCtxDependent; +} + +std::string SemanticContext::Predicate::toString() const { + return std::string("{") + std::to_string(ruleIndex) + std::string(":") + std::to_string(predIndex) + std::string("}?"); +} + +//------------------ PrecedencePredicate ------------------------------------------------------------------------------- + +SemanticContext::PrecedencePredicate::PrecedencePredicate(int precedence) : SemanticContext(SemanticContextType::PRECEDENCE), precedence(precedence) {} + +bool SemanticContext::PrecedencePredicate::eval(Recognizer *parser, RuleContext *parserCallStack) const { + return parser->precpred(parserCallStack, precedence); +} + +Ref<const SemanticContext> SemanticContext::PrecedencePredicate::evalPrecedence(Recognizer *parser, + RuleContext *parserCallStack) const { + if (parser->precpred(parserCallStack, precedence)) { + return SemanticContext::Empty::Instance; + } + return nullptr; +} + +size_t SemanticContext::PrecedencePredicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(getContextType())); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(precedence)); + return misc::MurmurHash::finish(hashCode, 2); +} + +bool SemanticContext::PrecedencePredicate::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const PrecedencePredicate &predicate = downCast<const PrecedencePredicate&>(other); + return precedence == predicate.precedence; +} + +std::string SemanticContext::PrecedencePredicate::toString() const { + return "{" + std::to_string(precedence) + ">=prec}?"; +} + +//------------------ AND ----------------------------------------------------------------------------------------------- + +SemanticContext::AND::AND(Ref<const SemanticContext> a, Ref<const SemanticContext> b) : Operator(SemanticContextType::AND) { + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> operands; + Ref<const SemanticContext::PrecedencePredicate> precedencePredicate; + + _opnds.reserve(predictOperandCapacity(a, b) + 1); + + if (a->getContextType() == SemanticContextType::AND) { + for (const auto &operand : downCast<const AND*>(a.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::less<int>{}); + } + } else { + insertSemanticContext(std::move(a), operands, _opnds, precedencePredicate, std::less<int>{}); + } + + if (b->getContextType() == SemanticContextType::AND) { + for (const auto &operand : downCast<const AND*>(b.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::less<int>{}); + } + } else { + insertSemanticContext(std::move(b), operands, _opnds, precedencePredicate, std::less<int>{}); + } + + if (precedencePredicate != nullptr) { + // interested in the transition with the lowest precedence + auto [existing, inserted] = operands.insert(precedencePredicate.get()); + if (inserted) { + _opnds.push_back(std::move(precedencePredicate)); + } + } +} + +const std::vector<Ref<const SemanticContext>>& SemanticContext::AND::getOperands() const { + return _opnds; +} + +bool SemanticContext::AND::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const AND &context = downCast<const AND&>(other); + return Arrays::equals(getOperands(), context.getOperands()); +} + +size_t SemanticContext::AND::hashCode() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + return misc::MurmurHash::hashCode(getOperands(), hash); +} + +bool SemanticContext::AND::eval(Recognizer *parser, RuleContext *parserCallStack) const { + for (const auto &opnd : getOperands()) { + if (!opnd->eval(parser, parserCallStack)) { + return false; + } + } + return true; +} + +Ref<const SemanticContext> SemanticContext::AND::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const { + bool differs = false; + std::vector<Ref<const SemanticContext>> operands; + for (const auto &context : getOperands()) { + auto evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == nullptr) { + // The AND context is false if any element is false. + return nullptr; + } + if (evaluated != Empty::Instance) { + // Reduce the result by skipping true elements. + operands.push_back(std::move(evaluated)); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were true, so the AND context is true. + return Empty::Instance; + } + + Ref<const SemanticContext> result = std::move(operands[0]); + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::And(std::move(result), std::move(operands[i])); + } + + return result; +} + +std::string SemanticContext::AND::toString() const { + std::string tmp; + for (const auto &var : getOperands()) { + tmp += var->toString() + " && "; + } + return tmp; +} + +//------------------ OR ------------------------------------------------------------------------------------------------ + +SemanticContext::OR::OR(Ref<const SemanticContext> a, Ref<const SemanticContext> b) : Operator(SemanticContextType::OR) { + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> operands; + Ref<const SemanticContext::PrecedencePredicate> precedencePredicate; + + _opnds.reserve(predictOperandCapacity(a, b) + 1); + + if (a->getContextType() == SemanticContextType::OR) { + for (const auto &operand : downCast<const OR*>(a.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::greater<int>{}); + } + } else { + insertSemanticContext(std::move(a), operands, _opnds, precedencePredicate, std::greater<int>{}); + } + + if (b->getContextType() == SemanticContextType::OR) { + for (const auto &operand : downCast<const OR*>(b.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::greater<int>{}); + } + } else { + insertSemanticContext(std::move(b), operands, _opnds, precedencePredicate, std::greater<int>{}); + } + + if (precedencePredicate != nullptr) { + // interested in the transition with the highest precedence + auto [existing, inserted] = operands.insert(precedencePredicate.get()); + if (inserted) { + _opnds.push_back(std::move(precedencePredicate)); + } + } +} + +const std::vector<Ref<const SemanticContext>>& SemanticContext::OR::getOperands() const { + return _opnds; +} + +bool SemanticContext::OR::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const OR &context = downCast<const OR&>(other); + return Arrays::equals(getOperands(), context.getOperands()); +} + +size_t SemanticContext::OR::hashCode() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + return misc::MurmurHash::hashCode(getOperands(), hash); +} + +bool SemanticContext::OR::eval(Recognizer *parser, RuleContext *parserCallStack) const { + for (const auto &opnd : getOperands()) { + if (opnd->eval(parser, parserCallStack)) { + return true; + } + } + return false; +} + +Ref<const SemanticContext> SemanticContext::OR::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const { + bool differs = false; + std::vector<Ref<const SemanticContext>> operands; + for (const auto &context : getOperands()) { + auto evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == Empty::Instance) { + // The OR context is true if any element is true. + return Empty::Instance; + } + if (evaluated != nullptr) { + // Reduce the result by skipping false elements. + operands.push_back(std::move(evaluated)); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were false, so the OR context is false. + return nullptr; + } + + Ref<const SemanticContext> result = std::move(operands[0]); + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::Or(std::move(result), std::move(operands[i])); + } + + return result; +} + +std::string SemanticContext::OR::toString() const { + std::string tmp; + for(const auto &var : getOperands()) { + tmp += var->toString() + " || "; + } + return tmp; +} + +//------------------ SemanticContext ----------------------------------------------------------------------------------- + +const Ref<const SemanticContext> SemanticContext::Empty::Instance = std::make_shared<Predicate>(INVALID_INDEX, INVALID_INDEX, false); + +Ref<const SemanticContext> SemanticContext::evalPrecedence(Recognizer * /*parser*/, RuleContext * /*parserCallStack*/) const { + return shared_from_this(); +} + +Ref<const SemanticContext> SemanticContext::And(Ref<const SemanticContext> a, Ref<const SemanticContext> b) { + if (!a || a == Empty::Instance) { + return b; + } + + if (!b || b == Empty::Instance) { + return a; + } + + Ref<AND> result = std::make_shared<AND>(std::move(a), std::move(b)); + if (result->getOperands().size() == 1) { + return result->getOperands()[0]; + } + + return result; +} + +Ref<const SemanticContext> SemanticContext::Or(Ref<const SemanticContext> a, Ref<const SemanticContext> b) { + if (!a) { + return b; + } + if (!b) { + return a; + } + + if (a == Empty::Instance || b == Empty::Instance) { + return Empty::Instance; + } + + Ref<OR> result = std::make_shared<OR>(std::move(a), std::move(b)); + if (result->getOperands().size() == 1) { + return result->getOperands()[0]; + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h new file mode 100644 index 0000000000..8116fc0b56 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h @@ -0,0 +1,237 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "support/CPPUtils.h" +#include "atn/SemanticContextType.h" + +namespace antlr4 { +namespace atn { + + /// A tree structure used to record the semantic context in which + /// an ATN configuration is valid. It's either a single predicate, + /// a conjunction "p1 && p2", or a sum of products "p1||p2". + /// + /// I have scoped the AND, OR, and Predicate subclasses of + /// SemanticContext within the scope of this outer class. + class ANTLR4CPP_PUBLIC SemanticContext : public std::enable_shared_from_this<SemanticContext> { + public: + virtual ~SemanticContext() = default; + + SemanticContextType getContextType() const { return _contextType; } + + /// <summary> + /// For context independent predicates, we evaluate them without a local + /// context (i.e., null context). That way, we can evaluate them without + /// having to create proper rule-specific context during prediction (as + /// opposed to the parser, which creates them naturally). In a practical + /// sense, this avoids a cast exception from RuleContext to myruleContext. + /// <p/> + /// For context dependent predicates, we must pass in a local context so that + /// references such as $arg evaluate properly as _localctx.arg. We only + /// capture context dependent predicates in the context in which we begin + /// prediction, so we passed in the outer context here in case of context + /// dependent predicate evaluation. + /// </summary> + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) const = 0; + + /** + * Evaluate the precedence predicates for the context and reduce the result. + * + * @param parser The parser instance. + * @param parserCallStack + * @return The simplified semantic context after precedence predicates are + * evaluated, which will be one of the following values. + * <ul> + * <li>{@link #NONE}: if the predicate simplifies to {@code true} after + * precedence predicates are evaluated.</li> + * <li>{@code null}: if the predicate simplifies to {@code false} after + * precedence predicates are evaluated.</li> + * <li>{@code this}: if the semantic context is not changed as a result of + * precedence predicate evaluation.</li> + * <li>A non-{@code null} {@link SemanticContext}: the new simplified + * semantic context after precedence predicates are evaluated.</li> + * </ul> + */ + virtual Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const; + + virtual size_t hashCode() const = 0; + + virtual bool equals(const SemanticContext &other) const = 0; + + virtual std::string toString() const = 0; + + static Ref<const SemanticContext> And(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + /// See also: ParserATNSimulator::getPredsForAmbigAlts. + static Ref<const SemanticContext> Or(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + class Empty; + class Predicate; + class PrecedencePredicate; + class Operator; + class AND; + class OR; + + protected: + explicit SemanticContext(SemanticContextType contextType) : _contextType(contextType) {} + + private: + const SemanticContextType _contextType; + }; + + inline bool operator==(const SemanticContext &lhs, const SemanticContext &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const SemanticContext &lhs, const SemanticContext &rhs) { + return !operator==(lhs, rhs); + } + + class ANTLR4CPP_PUBLIC SemanticContext::Empty : public SemanticContext{ + public: + /** + * The default {@link SemanticContext}, which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ + static const Ref<const SemanticContext> Instance; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::Predicate final : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::PREDICATE; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::PrecedencePredicate final : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::PRECEDENCE; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + const int precedence; + + explicit PrecedencePredicate(int precedence); + + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + }; + + /** + * This is the base class for semantic context "operators", which operate on + * a collection of semantic context "operands". + * + * @since 4.3 + */ + class ANTLR4CPP_PUBLIC SemanticContext::Operator : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { + const auto contextType = semanticContext.getContextType(); + return contextType == SemanticContextType::AND || contextType == SemanticContextType::OR; + } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + /** + * Gets the operands for the semantic context operator. + * + * @return a collection of {@link SemanticContext} operands for the + * operator. + * + * @since 4.3 + */ + + virtual const std::vector<Ref<const SemanticContext>>& getOperands() const = 0; + + protected: + using SemanticContext::SemanticContext; + }; + + /** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ + class ANTLR4CPP_PUBLIC SemanticContext::AND final : public SemanticContext::Operator { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::AND; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + AND(Ref<const SemanticContext> a, Ref<const SemanticContext> b) ; + + const std::vector<Ref<const SemanticContext>>& getOperands() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered.</p> + */ + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + + private: + std::vector<Ref<const SemanticContext>> _opnds; + }; + + /** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ + class ANTLR4CPP_PUBLIC SemanticContext::OR final : public SemanticContext::Operator { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::OR; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + OR(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + const std::vector<Ref<const SemanticContext>>& getOperands() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered. + */ + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + + private: + std::vector<Ref<const SemanticContext>> _opnds; + }; + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::SemanticContext> { + size_t operator()(const ::antlr4::atn::SemanticContext &semanticContext) const { + return semanticContext.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h new file mode 100644 index 0000000000..bca6e421d2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + enum class SemanticContextType : size_t { + PREDICATE = 1, + PRECEDENCE = 2, + AND = 3, + OR = 4, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h new file mode 100644 index 0000000000..a723589bc3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <vector> + +#include "antlr4-common.h" +#include "misc/MurmurHash.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SerializedATNView final { + public: + using value_type = int32_t; + using size_type = size_t; + using difference_type = ptrdiff_t; + using reference = int32_t&; + using const_reference = const int32_t&; + using pointer = int32_t*; + using const_pointer = const int32_t*; + using iterator = const_pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + SerializedATNView() = default; + + SerializedATNView(const_pointer data, size_type size) : _data(data), _size(size) {} + + SerializedATNView(const std::vector<int32_t> &serializedATN) : _data(serializedATN.data()), _size(serializedATN.size()) {} + + SerializedATNView(const SerializedATNView&) = default; + + SerializedATNView& operator=(const SerializedATNView&) = default; + + const_iterator begin() const { return data(); } + + const_iterator cbegin() const { return data(); } + + const_iterator end() const { return data() + size(); } + + const_iterator cend() const { return data() + size(); } + + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + bool empty() const { return size() == 0; } + + const_pointer data() const { return _data; } + + size_type size() const { return _size; } + + size_type size_bytes() const { return size() * sizeof(value_type); } + + const_reference operator[](size_type index) const { return _data[index]; } + + private: + const_pointer _data = nullptr; + size_type _size = 0; + }; + + inline bool operator==(const SerializedATNView &lhs, const SerializedATNView &rhs) { + return (lhs.data() == rhs.data() && lhs.size() == rhs.size()) || + (lhs.size() == rhs.size() && std::memcmp(lhs.data(), rhs.data(), lhs.size_bytes()) == 0); + } + + inline bool operator!=(const SerializedATNView &lhs, const SerializedATNView &rhs) { + return !operator==(lhs, rhs); + } + + inline bool operator<(const SerializedATNView &lhs, const SerializedATNView &rhs) { + int diff = std::memcmp(lhs.data(), rhs.data(), std::min(lhs.size_bytes(), rhs.size_bytes())); + return diff < 0 || (diff == 0 && lhs.size() < rhs.size()); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::SerializedATNView> { + size_t operator()(const ::antlr4::atn::SerializedATNView &serializedATNView) const { + return ::antlr4::misc::MurmurHash::hashCode(serializedATNView.data(), serializedATNView.size()); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp new file mode 100644 index 0000000000..95ec514edb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp @@ -0,0 +1,28 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "misc/IntervalSet.h" + +#include "atn/SetTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +SetTransition::SetTransition(TransitionType transitionType, ATNState *target, misc::IntervalSet aSet) + : Transition(transitionType, target), set(aSet.isEmpty() ? misc::IntervalSet::of(Token::INVALID_TYPE) : std::move(aSet)) { +} + +misc::IntervalSet SetTransition::label() const { + return set; +} + +bool SetTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return set.contains(symbol); +} + +std::string SetTransition::toString() const { + return "SET " + Transition::toString() + " { set: " + set.toString() + "}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h new file mode 100644 index 0000000000..3a3343ec25 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A transition containing a set of values. </summary> + class ANTLR4CPP_PUBLIC SetTransition : public Transition { + public: + static bool is(const Transition &transition) { + const auto transitionType = transition.getTransitionType(); + return transitionType == TransitionType::SET || transitionType == TransitionType::NOT_SET; + } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const misc::IntervalSet set; + + SetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::SET, target, std::move(set)) {} + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + protected: + SetTransition(TransitionType transitionType, ATNState *target, misc::IntervalSet set); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp new file mode 100644 index 0000000000..66a91936e9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/SingletonPredictionContext.h" + +#include "support/Casts.h" +#include "misc/MurmurHash.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + +} + +SingletonPredictionContext::SingletonPredictionContext(Ref<const PredictionContext> parent, size_t returnState) + : PredictionContext(PredictionContextType::SINGLETON), parent(std::move(parent)), returnState(returnState) { + assert(returnState != ATNState::INVALID_STATE_NUMBER); +} + +Ref<const SingletonPredictionContext> SingletonPredictionContext::create(Ref<const PredictionContext> parent, size_t returnState) { + if (returnState == EMPTY_RETURN_STATE && parent == nullptr) { + // someone can pass in the bits of an array ctx that mean $ + return std::dynamic_pointer_cast<const SingletonPredictionContext>(EMPTY); + } + return std::make_shared<SingletonPredictionContext>(std::move(parent), returnState); +} + +bool SingletonPredictionContext::isEmpty() const { + return parent == nullptr && returnState == EMPTY_RETURN_STATE; +} + +size_t SingletonPredictionContext::size() const { + return 1; +} + +const Ref<const PredictionContext>& SingletonPredictionContext::getParent(size_t index) const { + assert(index == 0); + static_cast<void>(index); + return parent; +} + +size_t SingletonPredictionContext::getReturnState(size_t index) const { + assert(index == 0); + static_cast<void>(index); + return returnState; +} + +size_t SingletonPredictionContext::hashCodeImpl() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + hash = misc::MurmurHash::update(hash, parent); + hash = misc::MurmurHash::update(hash, returnState); + return misc::MurmurHash::finish(hash, 3); +} + +bool SingletonPredictionContext::equals(const PredictionContext &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const auto &singleton = downCast<const SingletonPredictionContext&>(other); + return returnState == singleton.returnState && + cachedHashCodeEqual(cachedHashCode(), singleton.cachedHashCode()) && + (parent == singleton.parent || (parent != nullptr && singleton.parent != nullptr && *parent == *singleton.parent)); +} + +std::string SingletonPredictionContext::toString() const { + //std::string up = !parent.expired() ? parent.lock()->toString() : ""; + std::string up = parent != nullptr ? parent->toString() : ""; + if (up.length() == 0) { + if (returnState == EMPTY_RETURN_STATE) { + return "$"; + } + return std::to_string(returnState); + } + return std::to_string(returnState) + " " + up; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h new file mode 100644 index 0000000000..1784c4f045 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SingletonPredictionContext final : public PredictionContext { + public: + static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::SINGLETON; } + + static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); } + + static Ref<const SingletonPredictionContext> create(Ref<const PredictionContext> parent, size_t returnState); + + // Usually a parent is linked via a weak ptr. Not so here as we have kinda reverse reference chain. + // There are no child contexts stored here and often the parent context is left dangling when it's + // owning ATNState is released. In order to avoid having this context released as well (leaving all other contexts + // which got this one as parent with a null reference) we use a shared_ptr here instead, to keep those left alone + // parent contexts alive. + const Ref<const PredictionContext> parent; + const size_t returnState; + + SingletonPredictionContext(Ref<const PredictionContext> parent, size_t returnState); + + bool isEmpty() const override; + size_t size() const override; + const Ref<const PredictionContext>& getParent(size_t index) const override; + size_t getReturnState(size_t index) const override; + bool equals(const PredictionContext &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h new file mode 100644 index 0000000000..17fd43fde8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// The block that begins a closure loop. + class ANTLR4CPP_PUBLIC StarBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + StarBlockStartState() : BlockStartState(ATNStateType::STAR_BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h new file mode 100644 index 0000000000..a62eb812b1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopEntryState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_LOOP_ENTRY; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + /** + * Indicates whether this state can benefit from a precedence DFA during SLL + * decision making. + * + * <p>This is a computed property that is calculated during ATN deserialization + * and stored for use in {@link ParserATNSimulator} and + * {@link ParserInterpreter}.</p> + * + * @see DFA#isPrecedenceDfa() + */ + bool isPrecedenceDecision = false; + + StarLoopbackState *loopBackState = nullptr; + + StarLoopEntryState() : DecisionState(ATNStateType::STAR_LOOP_ENTRY) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp new file mode 100644 index 0000000000..6dddbc0d4e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/StarLoopEntryState.h" +#include "atn/Transition.h" +#include "support/Casts.h" + +#include "atn/StarLoopbackState.h" + +using namespace antlr4::atn; + +StarLoopEntryState *StarLoopbackState::getLoopEntryState() const { + if (transitions[0]->target != nullptr && transitions[0]->target->getStateType() == ATNStateType::STAR_LOOP_ENTRY) { + return antlrcpp::downCast<StarLoopEntryState*>(transitions[0]->target); + } + return nullptr; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h new file mode 100644 index 0000000000..04ef9db095 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopbackState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_LOOP_BACK; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + StarLoopbackState() : ATNState(ATNStateType::STAR_LOOP_BACK) {} + + StarLoopEntryState *getLoopEntryState() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h new file mode 100644 index 0000000000..8e41636283 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The Tokens rule start state linking to each lexer rule start state. + class ANTLR4CPP_PUBLIC TokensStartState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::TOKEN_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + TokensStartState() : DecisionState(ATNStateType::TOKEN_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp new file mode 100644 index 0000000000..b918cddfcf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "support/Arrays.h" + +#include "atn/Transition.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +Transition::Transition(TransitionType transitionType, ATNState *target) : _transitionType(transitionType) { + if (target == nullptr) { + throw NullPointerException("target cannot be null."); + } + + this->target = target; +} + +bool Transition::isEpsilon() const { + return false; +} + +misc::IntervalSet Transition::label() const { + return misc::IntervalSet::EMPTY_SET; +} + +std::string Transition::toString() const { + std::stringstream ss; + ss << "(Transition " << std::hex << this << ", target: " << std::hex << target << ')'; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h new file mode 100644 index 0000000000..4c88d698ae --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" +#include "atn/TransitionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// An ATN transition between any two ATN states. Subclasses define + /// atom, set, epsilon, action, predicate, rule transitions. + /// <p/> + /// This is a one way link. It emanates from a state (usually via a list of + /// transitions) and has a target state. + /// <p/> + /// Since we never have to change the ATN transitions once we construct it, + /// we can fix these transitions as specific classes. The DFA transitions + /// on the other hand need to update the labels as it adds transitions to + /// the states. We'll use the term Edge for the DFA to distinguish them from + /// ATN transitions. + /// </summary> + class ANTLR4CPP_PUBLIC Transition { + public: + /// The target of this transition. + // ml: this is a reference into the ATN. + ATNState *target; + + virtual ~Transition() = default; + + TransitionType getTransitionType() const { return _transitionType; } + + /** + * Determines if the transition is an "epsilon" transition. + * + * <p>The default implementation returns {@code false}.</p> + * + * @return {@code true} if traversing this transition in the ATN does not + * consume an input symbol; otherwise, {@code false} if traversing this + * transition consumes (matches) an input symbol. + */ + virtual bool isEpsilon() const; + virtual misc::IntervalSet label() const; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const = 0; + + virtual std::string toString() const; + + Transition(Transition const&) = delete; + Transition& operator=(Transition const&) = delete; + + protected: + Transition(TransitionType transitionType, ATNState *target); + + private: + const TransitionType _transitionType; + }; + + using ConstTransitionPtr = std::unique_ptr<const Transition>; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp new file mode 100644 index 0000000000..78769b2ada --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp @@ -0,0 +1,27 @@ +#include "atn/TransitionType.h" + +std::string antlr4::atn::transitionTypeName(TransitionType transitionType) { + switch (transitionType) { + case TransitionType::EPSILON: + return "EPSILON"; + case TransitionType::RANGE: + return "RANGE"; + case TransitionType::RULE: + return "RULE"; + case TransitionType::PREDICATE: + return "PREDICATE"; + case TransitionType::ATOM: + return "ATOM"; + case TransitionType::ACTION: + return "ACTION"; + case TransitionType::SET: + return "SET"; + case TransitionType::NOT_SET: + return "NOT_SET"; + case TransitionType::WILDCARD: + return "WILDCARD"; + case TransitionType::PRECEDENCE: + return "PRECEDENCE"; + } + return "UNKNOWN"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h new file mode 100644 index 0000000000..d5d5f3bd97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <string> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + // Constants for transition serialization. + enum class TransitionType : size_t { + EPSILON = 1, + RANGE = 2, + RULE = 3, + PREDICATE = 4, // e.g., {isType(input.LT(1))}? + ATOM = 5, + ACTION = 6, + SET = 7, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET = 8, + WILDCARD = 9, + PRECEDENCE = 10, + }; + + ANTLR4CPP_PUBLIC std::string transitionTypeName(TransitionType transitionType); + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp new file mode 100644 index 0000000000..03ec00d399 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNState.h" + +#include "atn/WildcardTransition.h" + +using namespace antlr4::atn; + +WildcardTransition::WildcardTransition(ATNState *target) : Transition(TransitionType::WILDCARD, target) { +} + +bool WildcardTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol; +} + +std::string WildcardTransition::toString() const { + return "WILDCARD " + Transition::toString() + " {}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h new file mode 100644 index 0000000000..d8d663f1fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC WildcardTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::WILDCARD; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + explicit WildcardTransition(ATNState *target); + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp new file mode 100644 index 0000000000..4cc0ab7cc1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFASerializer.h" +#include "dfa/LexerDFASerializer.h" +#include "support/CPPUtils.h" +#include "atn/StarLoopEntryState.h" +#include "atn/ATNConfigSet.h" +#include "support/Casts.h" + +#include "dfa/DFA.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlrcpp; + +DFA::DFA(atn::DecisionState *atnStartState) : DFA(atnStartState, 0) { +} + +DFA::DFA(atn::DecisionState *atnStartState, size_t decision) + : atnStartState(atnStartState), s0(nullptr), decision(decision) { + + _precedenceDfa = false; + if (atn::StarLoopEntryState::is(atnStartState)) { + if (downCast<atn::StarLoopEntryState*>(atnStartState)->isPrecedenceDecision) { + _precedenceDfa = true; + s0 = new DFAState(std::unique_ptr<atn::ATNConfigSet>(new atn::ATNConfigSet())); + s0->isAcceptState = false; + s0->requiresFullContext = false; + } + } +} + +DFA::DFA(DFA &&other) : atnStartState(other.atnStartState), s0(other.s0), decision(other.decision) { + // Source states are implicitly cleared by the move. + states = std::move(other.states); + + other.atnStartState = nullptr; + other.decision = 0; + other.s0 = nullptr; + _precedenceDfa = other._precedenceDfa; + other._precedenceDfa = false; +} + +DFA::~DFA() { + bool s0InList = (s0 == nullptr); + for (auto *state : states) { + if (state == s0) + s0InList = true; + delete state; + } + + if (!s0InList) { + delete s0; + } +} + +bool DFA::isPrecedenceDfa() const { + return _precedenceDfa; +} + +DFAState* DFA::getPrecedenceStartState(int precedence) const { + assert(_precedenceDfa); // Only precedence DFAs may contain a precedence start state. + + auto iterator = s0->edges.find(precedence); + if (iterator == s0->edges.end()) + return nullptr; + + return iterator->second; +} + +void DFA::setPrecedenceStartState(int precedence, DFAState *startState) { + if (!isPrecedenceDfa()) { + throw IllegalStateException("Only precedence DFAs may contain a precedence start state."); + } + + if (precedence < 0) { + return; + } + + s0->edges[precedence] = startState; +} + +std::vector<DFAState *> DFA::getStates() const { + std::vector<DFAState *> result; + for (auto *state : states) + result.push_back(state); + + std::sort(result.begin(), result.end(), [](DFAState *o1, DFAState *o2) -> bool { + return o1->stateNumber < o2->stateNumber; + }); + + return result; +} + +std::string DFA::toString(const Vocabulary &vocabulary) const { + if (s0 == nullptr) { + return ""; + } + + DFASerializer serializer(this, vocabulary); + return serializer.toString(); +} + +std::string DFA::toLexerString() const { + if (s0 == nullptr) { + return ""; + } + LexerDFASerializer serializer(this); + + return serializer.toString(); +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h new file mode 100644 index 0000000000..360eda8ba7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFAState.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC DFA final { + private: + struct DFAStateHasher final { + size_t operator()(const DFAState *dfaState) const { + return dfaState->hashCode(); + } + }; + + struct DFAStateComparer final { + bool operator()(const DFAState *lhs, const DFAState *rhs) const { + return lhs == rhs || *lhs == *rhs; + } + }; + + public: + /// A set of all DFA states. Use a map so we can get old state back. + /// Set only allows you to see if it's there. + + /// From which ATN state did we create this DFA? + atn::DecisionState *atnStartState; + std::unordered_set<DFAState*, DFAStateHasher, DFAStateComparer> states; // States are owned by this class. + DFAState *s0; + size_t decision; + + explicit DFA(atn::DecisionState *atnStartState); + DFA(atn::DecisionState *atnStartState, size_t decision); + DFA(const DFA &other) = delete; + DFA(DFA &&other); + ~DFA(); + + /** + * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + * start state {@link #s0} which is not stored in {@link #states}. The + * {@link DFAState#edges} array for this start state contains outgoing edges + * supplying individual start states corresponding to specific precedence + * values. + * + * @return {@code true} if this is a precedence DFA; otherwise, + * {@code false}. + * @see Parser#getPrecedence() + */ + bool isPrecedenceDfa() const; + + /** + * Get the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @return The start state corresponding to the specified precedence, or + * {@code null} if no start state exists for the specified precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + DFAState* getPrecedenceStartState(int precedence) const; + + /** + * Set the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @param startState The start state corresponding to the specified + * precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + void setPrecedenceStartState(int precedence, DFAState *startState); + + /// Return a list of all states in this DFA, ordered by state number. + std::vector<DFAState *> getStates() const; + + std::string toString(const Vocabulary &vocabulary) const; + + std::string toLexerString() const; + + private: + /** + * {@code true} if this DFA is for a precedence decision; otherwise, + * {@code false}. This is the backing field for {@link #isPrecedenceDfa}. + */ + bool _precedenceDfa; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp new file mode 100644 index 0000000000..64d01769de --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "Vocabulary.h" + +#include "dfa/DFASerializer.h" + +using namespace antlr4::dfa; + +DFASerializer::DFASerializer(const DFA *dfa, const Vocabulary &vocabulary) : _dfa(dfa), _vocabulary(vocabulary) { +} + +std::string DFASerializer::toString() const { + if (_dfa->s0 == nullptr) { + return ""; + } + + std::stringstream ss; + std::vector<DFAState *> states = _dfa->getStates(); + for (auto *s : states) { + for (size_t i = 0; i < s->edges.size(); i++) { + DFAState *t = s->edges[i]; + if (t != nullptr && t->stateNumber != INT32_MAX) { + ss << getStateString(s); + std::string label = getEdgeLabel(i); + ss << "-" << label << "->" << getStateString(t) << "\n"; + } + } + } + + return ss.str(); +} + +std::string DFASerializer::getEdgeLabel(size_t i) const { + return _vocabulary.getDisplayName(i); // ml: no longer needed -1 as we use a map for edges, without offset. +} + +std::string DFASerializer::getStateString(DFAState *s) const { + size_t n = s->stateNumber; + + const std::string baseStateStr = std::string(s->isAcceptState ? ":" : "") + "s" + std::to_string(n) + + (s->requiresFullContext ? "^" : ""); + + if (s->isAcceptState) { + if (!s->predicates.empty()) { + std::string buf; + for (size_t i = 0; i < s->predicates.size(); i++) { + buf.append(s->predicates[i].toString()); + } + return baseStateStr + "=>" + buf; + } else { + return baseStateStr + "=>" + std::to_string(s->prediction); + } + } else { + return baseStateStr; + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h new file mode 100644 index 0000000000..b541714078 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Vocabulary.h" + +namespace antlr4 { +namespace dfa { + + /// A DFA walker that knows how to dump them to serialized strings. + class ANTLR4CPP_PUBLIC DFASerializer { + public: + DFASerializer(const DFA *dfa, const Vocabulary &vocabulary); + + virtual ~DFASerializer() = default; + + std::string toString() const; + + protected: + virtual std::string getEdgeLabel(size_t i) const; + std::string getStateString(DFAState *s) const; + + private: + const DFA *_dfa; + const Vocabulary &_vocabulary; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp new file mode 100644 index 0000000000..e591b204c7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" +#include "atn/SemanticContext.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" + +#include "dfa/DFAState.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; + +std::string DFAState::PredPrediction::toString() const { + return std::string("(") + pred->toString() + ", " + std::to_string(alt) + ")"; +} + +std::set<size_t> DFAState::getAltSet() const { + std::set<size_t> alts; + if (configs != nullptr) { + for (size_t i = 0; i < configs->size(); i++) { + alts.insert(configs->get(i)->alt); + } + } + return alts; +} + +size_t DFAState::hashCode() const { + return configs != nullptr ? configs->hashCode() : 0; +} + +bool DFAState::equals(const DFAState &other) const { + if (this == std::addressof(other)) { + return true; + } + return configs == other.configs || + (configs != nullptr && other.configs != nullptr && *configs == *other.configs); +} + +std::string DFAState::toString() const { + std::stringstream ss; + ss << stateNumber; + if (configs) { + ss << ":" << configs->toString(); + } + if (isAcceptState) { + ss << " => "; + if (!predicates.empty()) { + for (size_t i = 0; i < predicates.size(); i++) { + ss << predicates[i].toString(); + } + } else { + ss << prediction; + } + } + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h new file mode 100644 index 0000000000..f555cc45cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h @@ -0,0 +1,154 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +#include "atn/ATNConfigSet.h" +#include "FlatHashMap.h" + +namespace antlr4 { +namespace dfa { + + /// <summary> + /// A DFA state represents a set of possible ATN configurations. + /// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + /// to keep track of all possible states the ATN can be in after + /// reading each input symbol. That is to say, after reading + /// input a1a2..an, the DFA is in a state that represents the + /// subset T of the states of the ATN that are reachable from the + /// ATN's start state along some path labeled a1a2..an." + /// In conventional NFA->DFA conversion, therefore, the subset T + /// would be a bitset representing the set of states the + /// ATN could be in. We need to track the alt predicted by each + /// state as well, however. More importantly, we need to maintain + /// a stack of states, tracking the closure operations as they + /// jump from rule to rule, emulating rule invocations (method calls). + /// I have to add a stack to simulate the proper lookahead sequences for + /// the underlying LL grammar from which the ATN was derived. + /// <p/> + /// I use a set of ATNConfig objects not simple states. An ATNConfig + /// is both a state (ala normal conversion) and a RuleContext describing + /// the chain of rules (if any) followed to arrive at that state. + /// <p/> + /// A DFA state may have multiple references to a particular state, + /// but with different ATN contexts (with same or different alts) + /// meaning that state was reached via a different set of rule invocations. + /// </summary> + class ANTLR4CPP_PUBLIC DFAState final { + public: + struct ANTLR4CPP_PUBLIC PredPrediction final { + public: + Ref<const atn::SemanticContext> pred; // never null; at least SemanticContext.NONE + int alt; + + PredPrediction() = delete; + + PredPrediction(const PredPrediction&) = default; + PredPrediction(PredPrediction&&) = default; + + PredPrediction(Ref<const atn::SemanticContext> pred, int alt) : pred(std::move(pred)), alt(alt) {} + + PredPrediction& operator=(const PredPrediction&) = default; + PredPrediction& operator=(PredPrediction&&) = default; + + std::string toString() const; + }; + + std::unique_ptr<atn::ATNConfigSet> configs; + + /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + /// <seealso cref="Token#EOF"/> maps to {@code edges[0]}. + // ml: this is a sparse list, so we use a map instead of a vector. + // Watch out: we no longer have the -1 offset, as it isn't needed anymore. + FlatHashMap<size_t, DFAState*> edges; + + /// if accept state, what ttype do we match or alt do we predict? + /// This is set to <seealso cref="ATN#INVALID_ALT_NUMBER"/> when <seealso cref="#predicates"/>{@code !=null} or + /// <seealso cref="#requiresFullContext"/>. + size_t prediction = 0; + + Ref<const atn::LexerActionExecutor> lexerActionExecutor; + + /// <summary> + /// During SLL parsing, this is a list of predicates associated with the + /// ATN configurations of the DFA state. When we have predicates, + /// <seealso cref="#requiresFullContext"/> is {@code false} since full context prediction evaluates predicates + /// on-the-fly. If this is not null, then <seealso cref="#prediction"/> is + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// <p/> + /// We only use these for non-<seealso cref="#requiresFullContext"/> but conflicting states. That + /// means we know from the context (it's $ or we don't dip into outer + /// context) that it's an ambiguity not a conflict. + /// <p/> + /// This list is computed by <seealso cref="ParserATNSimulator#predicateDFAState"/>. + /// </summary> + std::vector<PredPrediction> predicates; + + int stateNumber = -1; + + bool isAcceptState = false; + + /// <summary> + /// Indicates that this state was created during SLL prediction that + /// discovered a conflict between the configurations in the state. Future + /// <seealso cref="ParserATNSimulator#execATN"/> invocations immediately jumped doing + /// full context prediction if this field is true. + /// </summary> + bool requiresFullContext = false; + + /// Map a predicate to a predicted alternative. + DFAState() = default; + + explicit DFAState(int stateNumber) : stateNumber(stateNumber) {} + + explicit DFAState(std::unique_ptr<atn::ATNConfigSet> configs) : configs(std::move(configs)) {} + + /// <summary> + /// Get the set of all alts mentioned by all ATN configurations in this + /// DFA state. + /// </summary> + std::set<size_t> getAltSet() const; + + size_t hashCode() const; + + /// Two DFAState instances are equal if their ATN configuration sets + /// are the same. This method is used to see if a state already exists. + /// + /// Because the number of alternatives and number of ATN configurations are + /// finite, there is a finite number of DFA states that can be processed. + /// This is necessary to show that the algorithm terminates. + /// + /// Cannot test the DFA state numbers here because in + /// ParserATNSimulator#addDFAState we need to know if any other state + /// exists that has this exact set of ATN configurations. The + /// stateNumber is irrelevant. + bool equals(const DFAState &other) const; + + std::string toString() const; + }; + + inline bool operator==(const DFAState &lhs, const DFAState &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const DFAState &lhs, const DFAState &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace dfa +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::dfa::DFAState> { + size_t operator()(const ::antlr4::dfa::DFAState &dfaState) const { + return dfaState.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp new file mode 100644 index 0000000000..20ed734743 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Vocabulary.h" + +#include "dfa/LexerDFASerializer.h" + +using namespace antlr4::dfa; + +LexerDFASerializer::LexerDFASerializer(const DFA *dfa) : DFASerializer(dfa, Vocabulary()) { +} + +std::string LexerDFASerializer::getEdgeLabel(size_t i) const { + return std::string("'") + static_cast<char>(i) + "'"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h new file mode 100644 index 0000000000..eed7f4f0c5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFASerializer.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC LexerDFASerializer final : public DFASerializer { + public: + explicit LexerDFASerializer(const DFA *dfa); + + protected: + std::string getEdgeLabel(size_t i) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.cpp b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.cpp new file mode 100644 index 0000000000..dd30ef971b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.cpp @@ -0,0 +1,100 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "internal/Synchronization.h" + +using namespace antlr4::internal; + +void Mutex::lock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.Lock(); +#else + _impl.lock(); +#endif +} + +bool Mutex::try_lock() { +#if ANTLR4CPP_USING_ABSEIL + return _impl.TryLock(); +#else + return _impl.try_lock(); +#endif +} + +void Mutex::unlock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.Unlock(); +#else + _impl.unlock(); +#endif +} + +void SharedMutex::lock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.WriterLock(); +#else + _impl.lock(); +#endif +} + +bool SharedMutex::try_lock() { +#if ANTLR4CPP_USING_ABSEIL + return _impl.WriterTryLock(); +#else + return _impl.try_lock(); +#endif +} + +void SharedMutex::unlock() { +#if ANTLR4CPP_USING_ABSEIL + _impl.WriterUnlock(); +#else + _impl.unlock(); +#endif +} + +void SharedMutex::lock_shared() { +#if ANTLR4CPP_USING_ABSEIL + _impl.ReaderLock(); +#else + _impl.lock_shared(); +#endif +} + +bool SharedMutex::try_lock_shared() { +#if ANTLR4CPP_USING_ABSEIL + return _impl.ReaderTryLock(); +#else + return _impl.try_lock_shared(); +#endif +} + +void SharedMutex::unlock_shared() { +#if ANTLR4CPP_USING_ABSEIL + _impl.ReaderUnlock(); +#else + _impl.unlock_shared(); +#endif +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.h b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.h new file mode 100644 index 0000000000..0f1ff9587d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/internal/Synchronization.h @@ -0,0 +1,154 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "antlr4-common.h" + +#include <mutex> +#include <shared_mutex> +#include <utility> + +#if ANTLR4CPP_USING_ABSEIL +#error #include "absl/base/call_once.h" +#error #include "absl/base/thread_annotations.h" +#error #include "absl/synchronization/mutex.h" +#define ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS ABSL_NO_THREAD_SAFETY_ANALYSIS +#else +#define ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS +#endif + +// By default ANTLRv4 uses synchronization primitives provided by the C++ standard library. In most +// deployments this is fine, however in some using custom synchronization primitives may be +// preferred. This header allows that by optionally supporting some alternative implementations and +// allowing for more easier patching of other alternatives. + +namespace antlr4::internal { + + // Must be compatible with C++ standard library Mutex requirement. + class ANTLR4CPP_PUBLIC Mutex final { + public: + Mutex() = default; + + // No copying or moving, we are as strict as possible to support other implementations. + Mutex(const Mutex&) = delete; + Mutex(Mutex&&) = delete; + + // No copying or moving, we are as strict as possible to support other implementations. + Mutex& operator=(const Mutex&) = delete; + Mutex& operator=(Mutex&&) = delete; + + void lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + bool try_lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void unlock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + private: +#if ANTLR4CPP_USING_ABSEIL + absl::Mutex _impl; +#else + std::mutex _impl; +#endif + }; + + template <typename Mutex> + using UniqueLock = std::unique_lock<Mutex>; + + // Must be compatible with C++ standard library SharedMutex requirement. + class ANTLR4CPP_PUBLIC SharedMutex final { + public: + SharedMutex() = default; + + // No copying or moving, we are as strict as possible to support other implementations. + SharedMutex(const SharedMutex&) = delete; + SharedMutex(SharedMutex&&) = delete; + + // No copying or moving, we are as strict as possible to support other implementations. + SharedMutex& operator=(const SharedMutex&) = delete; + SharedMutex& operator=(SharedMutex&&) = delete; + + void lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + bool try_lock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void unlock() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void lock_shared() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + bool try_lock_shared() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + void unlock_shared() ANTLR4CPP_NO_THREAD_SAFTEY_ANALYSIS; + + private: +#if ANTLR4CPP_USING_ABSEIL + absl::Mutex _impl; +#else + std::shared_mutex _impl; +#endif + }; + + template <typename Mutex> + using SharedLock = std::shared_lock<Mutex>; + + class OnceFlag; + + template <typename Callable, typename... Args> + void call_once(OnceFlag &onceFlag, Callable &&callable, Args&&... args); + + // Must be compatible with std::once_flag. + class ANTLR4CPP_PUBLIC OnceFlag final { + public: + constexpr OnceFlag() = default; + + // No copying or moving, we are as strict as possible to support other implementations. + OnceFlag(const OnceFlag&) = delete; + OnceFlag(OnceFlag&&) = delete; + + // No copying or moving, we are as strict as possible to support other implementations. + OnceFlag& operator=(const OnceFlag&) = delete; + OnceFlag& operator=(OnceFlag&&) = delete; + + private: + template <typename Callable, typename... Args> + friend void call_once(OnceFlag &onceFlag, Callable &&callable, Args&&... args); + +#if ANTLR4CPP_USING_ABSEIL + absl::once_flag _impl; +#else + std::once_flag _impl; +#endif + }; + + template <typename Callable, typename... Args> + void call_once(OnceFlag &onceFlag, Callable &&callable, Args&&... args) { +#if ANTLR4CPP_USING_ABSEIL + absl::call_once(onceFlag._impl, std::forward<Callable>(callable), std::forward<Args>(args)...); +#else + std::call_once(onceFlag._impl, std::forward<Callable>(callable), std::forward<Args>(args)...); +#endif + } + +} // namespace antlr4::internal diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.cpp new file mode 100644 index 0000000000..1a236eccfb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.cpp @@ -0,0 +1,124 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/ATNDeserializer.h" +#include "Vocabulary.h" + +#include "misc/InterpreterDataReader.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; +using namespace antlr4::misc; + +InterpreterData::InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames) +: vocabulary(literalNames, symbolicNames) { +} + +InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) { + // The structure of the data file is very simple. Everything is line based with empty lines + // separating the different parts. For lexers the layout is: + // token literal names: + // ... + // + // token symbolic names: + // ... + // + // rule names: + // ... + // + // channel names: + // ... + // + // mode names: + // ... + // + // atn: + // <a single line with comma separated int values> enclosed in a pair of squared brackets. + // + // Data for a parser does not contain channel and mode names. + + std::ifstream input(fileName); + if (!input.good()) + return {}; + + std::vector<std::string> literalNames; + std::vector<std::string> symbolicNames; + + std::string line; + + std::getline(input, line, '\n'); + assert(line == "token literal names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + literalNames.push_back(line == "null" ? "" : line); + }; + + std::getline(input, line, '\n'); + assert(line == "token symbolic names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + symbolicNames.push_back(line == "null" ? "" : line); + }; + InterpreterData result(literalNames, symbolicNames); + + std::getline(input, line, '\n'); + assert(line == "rule names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.ruleNames.push_back(line); + }; + + std::getline(input, line, '\n'); + if (line == "channel names:") { + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.channels.push_back(line); + }; + + std::getline(input, line, '\n'); + assert(line == "mode names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.modes.push_back(line); + }; + } + + std::vector<int32_t> serializedATN; + + std::getline(input, line, '\n'); + assert(line == "atn:"); + std::getline(input, line, '\n'); + std::stringstream tokenizer(line); + std::string value; + while (tokenizer.good()) { + std::getline(tokenizer, value, ','); + unsigned long number; + if (value[0] == '[') + number = std::strtoul(&value[1], nullptr, 10); + else + number = std::strtoul(value.c_str(), nullptr, 10); + serializedATN.push_back(static_cast<int32_t>(number)); + } + + ATNDeserializer deserializer; + result.atn = deserializer.deserialize(serializedATN); + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.h b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.h new file mode 100644 index 0000000000..4b83dd129d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/InterpreterDataReader.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/ATN.h" +#include "Vocabulary.h" + +namespace antlr4 { +namespace misc { + + struct InterpreterData { + std::unique_ptr<atn::ATN> atn; + dfa::Vocabulary vocabulary; + std::vector<std::string> ruleNames; + std::vector<std::string> channels; // Only valid for lexer grammars. + std::vector<std::string> modes; // ditto + + InterpreterData() {}; // For invalid content. + InterpreterData(std::vector<std::string> const& literalNames, std::vector<std::string> const& symbolicNames); + }; + + // A class to read plain text interpreter data produced by ANTLR. + class ANTLR4CPP_PUBLIC InterpreterDataReader { + public: + static InterpreterData parseFile(std::string const& fileName); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.cpp new file mode 100644 index 0000000000..f0d0bfb491 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" + +using namespace antlr4::misc; + +const Interval Interval::INVALID; + +size_t Interval::hashCode() const { + size_t hash = 23; + hash = hash * 31 + static_cast<size_t>(a); + hash = hash * 31 + static_cast<size_t>(b); + return hash; +} + +bool Interval::startsBeforeDisjoint(const Interval &other) const { + return a < other.a && b < other.a; +} + +bool Interval::startsBeforeNonDisjoint(const Interval &other) const { + return a <= other.a && b >= other.a; +} + +bool Interval::startsAfter(const Interval &other) const { + return a > other.a; +} + +bool Interval::startsAfterDisjoint(const Interval &other) const { + return a > other.b; +} + +bool Interval::startsAfterNonDisjoint(const Interval &other) const { + return a > other.a && a <= other.b; // b >= other.b implied +} + +bool Interval::disjoint(const Interval &other) const { + return startsBeforeDisjoint(other) || startsAfterDisjoint(other); +} + +bool Interval::adjacent(const Interval &other) const { + return a == other.b + 1 || b == other.a - 1; +} + +bool Interval::properlyContains(const Interval &other) const { + return other.a >= a && other.b <= b; +} + +Interval Interval::Union(const Interval &other) const { + return Interval(std::min(a, other.a), std::max(b, other.b)); +} + +Interval Interval::intersection(const Interval &other) const { + return Interval(std::max(a, other.a), std::min(b, other.b)); +} + +std::string Interval::toString() const { + return std::to_string(a) + ".." + std::to_string(b); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.h b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.h new file mode 100644 index 0000000000..32abf629a8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Interval.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + // Helpers to convert certain unsigned symbols (e.g. Token::EOF) to their original numeric value (e.g. -1) + // and vice versa. This is needed mostly for intervals to keep their original order and for toString() + // methods to print the original numeric value (e.g. for tests). + constexpr size_t numericToSymbol(ssize_t v) { return static_cast<size_t>(v); } + constexpr ssize_t symbolToNumeric(size_t v) { return static_cast<ssize_t>(v); } + + /// An immutable inclusive interval a..b + class ANTLR4CPP_PUBLIC Interval final { + public: + static const Interval INVALID; + + // Must stay signed to guarantee the correct sort order. + ssize_t a; + ssize_t b; + + constexpr Interval() : Interval(static_cast<ssize_t>(-1), static_cast<ssize_t>(-2)) {} + + constexpr explicit Interval(size_t a_, size_t b_) : Interval(symbolToNumeric(a_), symbolToNumeric(b_)) {} + + constexpr Interval(ssize_t a_, ssize_t b_) : a(a_), b(b_) {} + + /// return number of elements between a and b inclusively. x..x is length 1. + /// if b < a, then length is 0. 9..10 has length 2. + constexpr size_t length() const { return b >= a ? static_cast<size_t>(b - a + 1) : 0; } + + constexpr bool operator==(const Interval &other) const { return a == other.a && b == other.b; } + + size_t hashCode() const; + + /// <summary> + /// Does this start completely before other? Disjoint </summary> + bool startsBeforeDisjoint(const Interval &other) const; + + /// <summary> + /// Does this start at or before other? Nondisjoint </summary> + bool startsBeforeNonDisjoint(const Interval &other) const; + + /// <summary> + /// Does this.a start after other.b? May or may not be disjoint </summary> + bool startsAfter(const Interval &other) const; + + /// <summary> + /// Does this start completely after other? Disjoint </summary> + bool startsAfterDisjoint(const Interval &other) const; + + /// <summary> + /// Does this start after other? NonDisjoint </summary> + bool startsAfterNonDisjoint(const Interval &other) const; + + /// <summary> + /// Are both ranges disjoint? I.e., no overlap? </summary> + bool disjoint(const Interval &other) const; + + /// <summary> + /// Are two intervals adjacent such as 0..41 and 42..42? </summary> + bool adjacent(const Interval &other) const; + + bool properlyContains(const Interval &other) const; + + /// <summary> + /// Return the interval computed from combining this and other </summary> + Interval Union(const Interval &other) const; + + /// <summary> + /// Return the interval in common between this and o </summary> + Interval intersection(const Interval &other) const; + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp new file mode 100644 index 0000000000..d230bf45f6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp @@ -0,0 +1,501 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "Exceptions.h" +#include "Vocabulary.h" + +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::misc; + +IntervalSet const IntervalSet::COMPLETE_CHAR_SET = + IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); + +IntervalSet const IntervalSet::EMPTY_SET; + +IntervalSet::IntervalSet() : _intervals() { +} + +IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { + _intervals = set._intervals; +} + +IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) { +} + +IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) { +} + +IntervalSet& IntervalSet::operator=(const IntervalSet& other) { + _intervals = other._intervals; + return *this; +} + +IntervalSet& IntervalSet::operator=(IntervalSet&& other) { + _intervals = move(other._intervals); + return *this; +} + +IntervalSet IntervalSet::of(ssize_t a) { + return IntervalSet({ Interval(a, a) }); +} + +IntervalSet IntervalSet::of(ssize_t a, ssize_t b) { + return IntervalSet({ Interval(a, b) }); +} + +void IntervalSet::clear() { + _intervals.clear(); +} + +void IntervalSet::add(ssize_t el) { + add(el, el); +} + +void IntervalSet::add(ssize_t a, ssize_t b) { + add(Interval(a, b)); +} + +void IntervalSet::add(const Interval &addition) { + if (addition.b < addition.a) { + return; + } + + // find position in list + for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) { + Interval r = *iterator; + if (addition == r) { + return; + } + + if (addition.adjacent(r) || !addition.disjoint(r)) { + // next to each other, make a single larger interval + Interval bigger = addition.Union(r); + *iterator = bigger; + + // make sure we didn't just create an interval that + // should be merged with next interval in list + while (iterator + 1 != _intervals.end()) { + Interval next = *++iterator; + if (!bigger.adjacent(next) && bigger.disjoint(next)) { + break; + } + + // if we bump up against or overlap next, merge + iterator = _intervals.erase(iterator);// remove this one + --iterator; // move backwards to what we just set + *iterator = bigger.Union(next); // set to 3 merged ones + // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result + } + return; + } + + if (addition.startsBeforeDisjoint(r)) { + // insert before r + //--iterator; + _intervals.insert(iterator, addition); + return; + } + + // if disjoint and after r, a future iteration will handle it + } + + // ok, must be after last interval (and disjoint from last interval) + // just add it + _intervals.push_back(addition); +} + +IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) { + IntervalSet result; + for (const auto &s : sets) { + result.addAll(s); + } + return result; +} + +IntervalSet& IntervalSet::addAll(const IntervalSet &set) { + // walk set and add each interval + for (auto const& interval : set._intervals) { + add(interval); + } + return *this; +} + +IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const { + return complement(IntervalSet::of(minElement, maxElement)); +} + +IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const { + return vocabulary.subtract(*this); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &other) const { + return subtract(*this, other); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) { + if (left.isEmpty()) { + return IntervalSet(); + } + + if (right.isEmpty()) { + // right set has no elements; just return the copy of the current set + return left; + } + + IntervalSet result(left); + size_t resultI = 0; + size_t rightI = 0; + while (resultI < result._intervals.size() && rightI < right._intervals.size()) { + Interval &resultInterval = result._intervals[resultI]; + const Interval &rightInterval = right._intervals[rightI]; + + // operation: (resultInterval - rightInterval) and update indexes + + if (rightInterval.b < resultInterval.a) { + rightI++; + continue; + } + + if (rightInterval.a > resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent; + Interval afterCurrent; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent.a > -1) { // -1 is the default value + if (afterCurrent.a > -1) { + // split the current interval into two + result._intervals[resultI] = beforeCurrent; + result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent); + resultI++; + rightI++; + } else { + // replace the current interval + result._intervals[resultI] = beforeCurrent; + resultI++; + } + } else { + if (afterCurrent.a > -1) { + // replace the current interval + result._intervals[resultI] = afterCurrent; + rightI++; + } else { + // remove the current interval (thus no need to increment resultI) + result._intervals.erase(result._intervals.begin() + resultI); + } + } + } + + // If rightI reached right.intervals.size(), no more intervals to subtract from result. + // If resultI reached result.intervals.size(), we would be subtracting from an empty set. + // Either way, we are done. + return result; +} + +IntervalSet IntervalSet::Or(const IntervalSet &a) const { + IntervalSet result; + result.addAll(*this); + result.addAll(a); + return result; +} + +IntervalSet IntervalSet::And(const IntervalSet &other) const { + IntervalSet intersection; + size_t i = 0; + size_t j = 0; + + // iterate down both interval lists looking for nondisjoint intervals + while (i < _intervals.size() && j < other._intervals.size()) { + Interval mine = _intervals[i]; + Interval theirs = other._intervals[j]; + + if (mine.startsBeforeDisjoint(theirs)) { + // move this iterator looking for interval that might overlap + i++; + } else if (theirs.startsBeforeDisjoint(mine)) { + // move other iterator looking for interval that might overlap + j++; + } else if (mine.properlyContains(theirs)) { + // overlap, add intersection, get next theirs + intersection.add(mine.intersection(theirs)); + j++; + } else if (theirs.properlyContains(mine)) { + // overlap, add intersection, get next mine + intersection.add(mine.intersection(theirs)); + i++; + } else if (!mine.disjoint(theirs)) { + // overlap, add intersection + intersection.add(mine.intersection(theirs)); + + // Move the iterator of lower range [a..b], but not + // the upper range as it may contain elements that will collide + // with the next iterator. So, if mine=[0..115] and + // theirs=[115..200], then intersection is 115 and move mine + // but not theirs as theirs may collide with the next range + // in thisIter. + // move both iterators to next ranges + if (mine.startsAfterNonDisjoint(theirs)) { + j++; + } else if (theirs.startsAfterNonDisjoint(mine)) { + i++; + } + } + } + + return intersection; +} + + +bool IntervalSet::contains(ssize_t el) const { + if (_intervals.empty() || el < _intervals.front().a || el > _intervals.back().b) { + return false; + } + + return std::binary_search(_intervals.begin(), _intervals.end(), Interval(el, el), [](const Interval &lhs, const Interval &rhs) { + return lhs.b < rhs.a; + }); +} + +bool IntervalSet::isEmpty() const { + return _intervals.empty(); +} + +ssize_t IntervalSet::getSingleElement() const { + if (_intervals.size() == 1) { + if (_intervals[0].a == _intervals[0].b) { + return _intervals[0].a; + } + } + + return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work? +} + +ssize_t IntervalSet::getMaxElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.back().b; +} + +ssize_t IntervalSet::getMinElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.front().a; +} + +std::vector<Interval> const& IntervalSet::getIntervals() const { + return _intervals; +} + +size_t IntervalSet::hashCode() const { + size_t hash = MurmurHash::initialize(); + for (const auto &interval : _intervals) { + hash = MurmurHash::update(hash, interval.a); + hash = MurmurHash::update(hash, interval.b); + } + + return MurmurHash::finish(hash, _intervals.size() * 2); +} + +bool IntervalSet::operator == (const IntervalSet &other) const { + if (_intervals.empty() && other._intervals.empty()) + return true; + + if (_intervals.size() != other._intervals.size()) + return false; + + return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin()); +} + +std::string IntervalSet::toString() const { + return toString(false); +} + +std::string IntervalSet::toString(bool elemAreChar) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + if (a == -1) { + ss << "<EOF>"; + } else if (elemAreChar) { + ss << "'" << static_cast<char>(a) << "'"; + } else { + ss << a; + } + } else { + if (elemAreChar) { + ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'"; + } else { + ss << a << ".." << b; + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + ss << elementName(vocabulary, a); + } else { + for (ssize_t i = a; i <= b; i++) { + if (i > a) { + ss << ", "; + } + ss << elementName(vocabulary, i); + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const { + if (a == -1) { + return "<EOF>"; + } else if (a == -2) { + return "<EPSILON>"; + } else { + return vocabulary.getDisplayName(a); + } +} + +size_t IntervalSet::size() const { + size_t result = 0; + for (const auto &interval : _intervals) { + result += size_t(interval.b - interval.a + 1); + } + return result; +} + +std::vector<ssize_t> IntervalSet::toList() const { + std::vector<ssize_t> result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.push_back(v); + } + } + return result; +} + +std::set<ssize_t> IntervalSet::toSet() const { + std::set<ssize_t> result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.insert(v); + } + } + return result; +} + +ssize_t IntervalSet::get(size_t i) const { + size_t index = 0; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + if (index == i) { + return v; + } + index++; + } + } + return -1; +} + +void IntervalSet::remove(ssize_t el) { + for (size_t i = 0; i < _intervals.size(); ++i) { + Interval &interval = _intervals[i]; + ssize_t a = interval.a; + ssize_t b = interval.b; + if (el < a) { + break; // list is sorted and el is before this interval; not here + } + + // if whole interval x..x, rm + if (el == a && el == b) { + _intervals.erase(_intervals.begin() + (long)i); + break; + } + // if on left edge x..b, adjust left + if (el == a) { + interval.a++; + break; + } + // if on right edge a..x, adjust right + if (el == b) { + interval.b--; + break; + } + // if in middle a..x..b, split interval + if (el > a && el < b) { // found in this interval + ssize_t oldb = interval.b; + interval.b = el - 1; // [a..x-1] + add(el + 1, oldb); // add [x+1..b] + + break; // ml: not in the Java code but I believe we also should stop searching here, as we found x. + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.h b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.h new file mode 100644 index 0000000000..49565dc691 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.h @@ -0,0 +1,188 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/Interval.h" +#include "Exceptions.h" + +namespace antlr4 { +namespace misc { + + /** + * This class implements the {@link IntSet} backed by a sorted array of + * non-overlapping intervals. It is particularly efficient for representing + * large collections of numbers, where the majority of elements appear as part + * of a sequential range of numbers that are all part of the set. For example, + * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. + * + * <p> + * This class is able to represent sets containing any combination of values in + * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE} + * (inclusive).</p> + */ + class ANTLR4CPP_PUBLIC IntervalSet final { + public: + static IntervalSet const COMPLETE_CHAR_SET; + static IntervalSet const EMPTY_SET; + + private: + /// The list of sorted, disjoint intervals. + std::vector<Interval> _intervals; + + explicit IntervalSet(std::vector<Interval>&& intervals); + + public: + IntervalSet(); + IntervalSet(IntervalSet const& set); + IntervalSet(IntervalSet&& set); + + template<typename T1, typename... T_NEXT> + IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet() { + // The first int argument is an ignored count for compatibility + // with the previous varargs based interface. + addItems(t1, std::forward<T_NEXT>(next)...); + } + + IntervalSet& operator=(IntervalSet const& set); + IntervalSet& operator=(IntervalSet&& set); + + /// Create a set with a single element, el. + static IntervalSet of(ssize_t a); + + /// Create a set with all ints within range [a..b] (inclusive) + static IntervalSet of(ssize_t a, ssize_t b); + + void clear(); + + /// Add a single element to the set. An isolated element is stored + /// as a range el..el. + void add(ssize_t el); + + /// Add interval; i.e., add all integers from a to b to set. + /// If b<a, do nothing. + /// Keep list in sorted order (by left range value). + /// If overlap, combine ranges. For example, + /// If this is {1..5, 10..20}, adding 6..7 yields + /// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. + void add(ssize_t a, ssize_t b); + + /// combine all sets in the array returned the or'd value + static IntervalSet Or(const std::vector<IntervalSet> &sets); + + // Copy on write so we can cache a..a intervals and sets of that. + void add(const Interval &addition); + IntervalSet& addAll(const IntervalSet &set); + + template<typename T1, typename... T_NEXT> + void addItems(T1 t1, T_NEXT&&... next) { + add(t1); + addItems(std::forward<T_NEXT>(next)...); + } + + IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; + + /// Given the set of possible values (rather than, say UNICODE or MAXINT), + /// return a new set containing all elements in vocabulary, but not in + /// this. The computation is (vocabulary - this). + /// + /// 'this' is assumed to be either a subset or equal to vocabulary. + IntervalSet complement(const IntervalSet &vocabulary) const; + + /// Compute this-other via this&~other. + /// Return a new set containing all elements in this but not in other. + /// other is assumed to be a subset of this; + /// anything that is in other but not in this will be ignored. + IntervalSet subtract(const IntervalSet &other) const; + + /** + * Compute the set difference between two interval sets. The specific + * operation is {@code left - right}. If either of the input sets is + * {@code null}, it is treated as though it was an empty set. + */ + static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right); + + IntervalSet Or(const IntervalSet &a) const; + + /// Return a new set with the intersection of this set with other. Because + /// the intervals are sorted, we can use an iterator for each list and + /// just walk them together. This is roughly O(min(n,m)) for interval + /// list lengths n and m. + IntervalSet And(const IntervalSet &other) const; + + /// Is el in any range of this set? + bool contains(ssize_t el) const; + + /// return true if this set has no members + bool isEmpty() const; + + /// If this set is a single integer, return it otherwise Token.INVALID_TYPE. + ssize_t getSingleElement() const; + + /** + * Returns the maximum value contained in the set. + * + * @return the maximum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMaxElement() const; + + /** + * Returns the minimum value contained in the set. + * + * @return the minimum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMinElement() const; + + /// <summary> + /// Return a list of Interval objects. </summary> + std::vector<Interval> const& getIntervals() const; + + size_t hashCode() const; + + /// Are two IntervalSets equal? Because all intervals are sorted + /// and disjoint, equals is a simple linear walk over both lists + /// to make sure they are the same. + bool operator == (const IntervalSet &other) const; + std::string toString() const; + std::string toString(bool elemAreChar) const; + + std::string toString(const dfa::Vocabulary &vocabulary) const; + + protected: + std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; + + public: + size_t size() const; + std::vector<ssize_t> toList() const; + std::set<ssize_t> toSet() const; + + /// Get the ith element of ordered set. Used only by RandomPhrase so + /// don't bother to implement if you're not doing that for a new + /// ANTLR code gen target. + ssize_t get(size_t i) const; + void remove(ssize_t el); + + private: + void addItems() { /* No-op */ } + }; + +} // namespace atn +} // namespace antlr4 + +// Hash function for IntervalSet. + +namespace std { + using antlr4::misc::IntervalSet; + + template <> struct hash<IntervalSet> + { + size_t operator() (const IntervalSet &x) const + { + return x.hashCode(); + } + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.cpp new file mode 100644 index 0000000000..09072c9f7e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.cpp @@ -0,0 +1,120 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "misc/MurmurHash.h" + +using namespace antlr4::misc; + +// A variation of the MurmurHash3 implementation (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp) +// Here we unrolled the loop used there into individual calls to update(), as we usually hash object fields +// instead of entire buffers. + +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#include <stdlib.h> + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) + +#elif ANTLR4CPP_HAVE_BUILTIN(__builtin_rotateleft32) && ANTLR4CPP_HAVE_BUILTIN(__builtin_rotateleft64) + +#define ROTL32(x, y) __builtin_rotateleft32(x, y) +#define ROTL64(x, y) __builtin_rotateleft64(x, y) + +#else // defined(_MSC_VER) + +// Other compilers + +namespace { + +constexpr uint32_t ROTL32(uint32_t x, int r) { + return (x << r) | (x >> (32 - r)); +} +constexpr uint64_t ROTL64(uint64_t x, int r) { + return (x << r) | (x >> (64 - r)); +} + +} + +#endif // !defined(_MSC_VER) + +#if SIZE_MAX == UINT64_MAX + +size_t MurmurHash::update(size_t hash, size_t value) { + size_t k1 = value; + k1 *= UINT64_C(0x87c37b91114253d5); + k1 = ROTL64(k1, 31); + k1 *= UINT64_C(0x4cf5ad432745937f); + + hash ^= k1; + hash = ROTL64(hash, 27); + hash = hash * 5 + UINT64_C(0x52dce729); + + return hash; +} + +size_t MurmurHash::finish(size_t hash, size_t entryCount) { + hash ^= entryCount * 8; + hash ^= hash >> 33; + hash *= UINT64_C(0xff51afd7ed558ccd); + hash ^= hash >> 33; + hash *= UINT64_C(0xc4ceb9fe1a85ec53); + hash ^= hash >> 33; + return hash; +} + +#elif SIZE_MAX == UINT32_MAX + +size_t MurmurHash::update(size_t hash, size_t value) { + size_t k1 = value; + k1 *= UINT32_C(0xCC9E2D51); + k1 = ROTL32(k1, 15); + k1 *= UINT32_C(0x1B873593); + + hash ^= k1; + hash = ROTL32(hash, 13); + hash = hash * 5 + UINT32_C(0xE6546B64); + + return hash; +} + +size_t MurmurHash::finish(size_t hash, size_t entryCount) { + hash ^= entryCount * 4; + hash ^= hash >> 16; + hash *= UINT32_C(0x85EBCA6B); + hash ^= hash >> 13; + hash *= UINT32_C(0xC2B2AE35); + hash ^= hash >> 16; + return hash; +} + +#else +#error "Expected sizeof(size_t) to be 4 or 8." +#endif + +size_t MurmurHash::update(size_t hash, const void *data, size_t size) { + size_t value; + const uint8_t *bytes = static_cast<const uint8_t*>(data); + while (size >= sizeof(size_t)) { + std::memcpy(&value, bytes, sizeof(size_t)); + hash = update(hash, value); + bytes += sizeof(size_t); + size -= sizeof(size_t); + } + if (size != 0) { + value = 0; + std::memcpy(&value, bytes, size); + hash = update(hash, value); + } + return hash; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.h b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.h new file mode 100644 index 0000000000..cde7ac7906 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/MurmurHash.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstdint> +#include <type_traits> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC MurmurHash final { + private: + static constexpr size_t DEFAULT_SEED = 0; + + /// Initialize the hash using the default seed value. + /// Returns the intermediate hash value. + public: + static size_t initialize() { return initialize(DEFAULT_SEED); } + + /// Initialize the hash using the specified seed. + static size_t initialize(size_t seed) { return seed; } + + /// Update the intermediate hash value for the next input {@code value}. + /// <param name="hash"> the intermediate hash value </param> + /// <param name="value"> the value to add to the current hash </param> + /// Returns the updated intermediate hash value. + static size_t update(size_t hash, size_t value); + + /** + * Update the intermediate hash value for the next input {@code value}. + * + * @param hash the intermediate hash value + * @param value the value to add to the current hash + * @return the updated intermediate hash value + */ + template <class T> + static size_t update(size_t hash, Ref<T> const& value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + template <class T> + static size_t update(size_t hash, T *value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + static size_t update(size_t hash, const void *data, size_t size); + + template <typename T> + static size_t update(size_t hash, const T *data, size_t size) { + return update(hash, static_cast<const void*>(data), size * sizeof(std::remove_reference_t<T>)); + } + + /// <summary> + /// Apply the final computation steps to the intermediate value {@code hash} + /// to form the final result of the MurmurHash 3 hash function. + /// </summary> + /// <param name="hash"> the intermediate hash value </param> + /// <param name="entryCount"> the number of calls to update() before calling finish() </param> + /// <returns> the final hash result </returns> + static size_t finish(size_t hash, size_t entryCount); + + /// Utility function to compute the hash code of an array using the MurmurHash3 algorithm. + /// + /// @param <T> the array element type </param> + /// <param name="data"> the array data </param> + /// <param name="seed"> the seed for the MurmurHash algorithm </param> + /// <returns> the hash code of the data </returns> + template<typename T> // where T is C array type + static size_t hashCode(const std::vector<Ref<T>> &data, size_t seed = DEFAULT_SEED) { + size_t hash = initialize(seed); + for (auto &entry : data) { + hash = update(hash, entry); + } + return finish(hash, data.size()); + } + + static size_t hashCode(const void *data, size_t size, size_t seed = DEFAULT_SEED) { + size_t hash = initialize(seed); + hash = update(hash, data, size); + return finish(hash, size); + } + + template <typename T> + static size_t hashCode(const T *data, size_t size, size_t seed = DEFAULT_SEED) { + return hashCode(static_cast<const void*>(data), size * sizeof(std::remove_reference_t<T>), seed); + } + + private: + MurmurHash() = delete; + + MurmurHash(const MurmurHash&) = delete; + + MurmurHash& operator=(const MurmurHash&) = delete; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.cpp new file mode 100644 index 0000000000..c35f1921c4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.cpp @@ -0,0 +1,4 @@ +#include "misc/Predicate.h" + +antlr4::misc::Predicate::~Predicate() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.h b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.h new file mode 100644 index 0000000000..1032d53fed --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/Predicate.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC Predicate { + public: + virtual ~Predicate(); + + virtual bool test(tree::ParseTree *t) = 0; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp new file mode 100644 index 0000000000..a1ed50d456 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Any.cpp @@ -0,0 +1,8 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Any.h" + +using namespace antlrcpp; diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Any.h b/contrib/libs/antlr4_cpp_runtime/src/support/Any.h new file mode 100644 index 0000000000..fa5df58946 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Any.h @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +// A standard C++ class loosely modeled after boost::Any. + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + using Any = std::any; + +} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp new file mode 100644 index 0000000000..b3c4f94f2f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "Exceptions.h" + +#include "support/Arrays.h" + +using namespace antlrcpp; + +std::string Arrays::listToString(const std::vector<std::string> &list, const std::string &separator) +{ + std::stringstream ss; + bool firstEntry = true; + + ss << '['; + for (const auto &entry : list) { + ss << entry; + if (firstEntry) { + ss << separator; + firstEntry = false; + } + } + + ss << ']'; + return ss.str(); +} + +template <> +std::string Arrays::toString(const std::vector<antlr4::tree::ParseTree*> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto *value : source) { + result += value->toStringTree(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h new file mode 100644 index 0000000000..04b852d986 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Arrays.h @@ -0,0 +1,149 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Arrays { + public: + + static std::string listToString(const std::vector<std::string> &list, const std::string &separator); + + template <typename T> + static bool equals(const std::vector<T> &a, const std::vector<T> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) + if (!(a[i] == b[i])) + return false; + + return true; + } + + template <typename T> + static bool equals(const std::vector<T *> &a, const std::vector<T *> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template <typename T> + static bool equals(const std::vector<Ref<T>> &a, const std::vector<Ref<T>> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template <typename T> + static bool equals(const std::vector<std::unique_ptr<T>> &a, const std::vector<std::unique_ptr<T>> &b) { + if (a.size() != b.size()) + return false; + + for (size_t i = 0; i < a.size(); ++i) { + if (!a[i] && !b[i]) + continue; + if (!a[i] || !b[i]) + return false; + if (a[i] == b[i]) + continue; + + if (!(*a[i] == *b[i])) + return false; + } + + return true; + } + + template <typename T> + static std::string toString(const std::vector<T> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value.toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template <typename T> + static std::string toString(const std::vector<Ref<T>> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template <typename T> + static std::string toString(const std::vector<std::unique_ptr<T>> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto &value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + template <typename T> + static std::string toString(const std::vector<T *> &source) { + std::string result = "["; + bool firstEntry = true; + for (auto value : source) { + result += value->toString(); + if (firstEntry) { + result += ", "; + firstEntry = false; + } + } + return result + "]"; + } + + }; + + template <> + std::string Arrays::toString(const std::vector<antlr4::tree::ParseTree *> &source); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h b/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h new file mode 100644 index 0000000000..bb30364be0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/BitSet.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC BitSet : public std::bitset<2048> { + public: + size_t nextSetBit(size_t pos) const { + for (size_t i = pos; i < size(); i++){ + if (test(i)) { + return i; + } + } + + return INVALID_INDEX; + } + + // Prints a list of every index for which the bitset contains a bit in true. + friend std::wostream& operator << (std::wostream& os, const BitSet& obj) + { + os << "{"; + size_t total = obj.count(); + for (size_t i = 0; i < obj.size(); i++){ + if (obj.test(i)){ + os << i; + --total; + if (total > 1){ + os << ", "; + } + } + } + + os << "}"; + return os; + } + + static std::string subStringRepresentation(const std::vector<BitSet>::iterator &begin, + const std::vector<BitSet>::iterator &end) { + std::string result; + std::vector<BitSet>::iterator vectorIterator; + + for (vectorIterator = begin; vectorIterator != end; vectorIterator++) { + result += vectorIterator->toString(); + } + // Grab the end + result += end->toString(); + + return result; + } + + std::string toString() const { + std::stringstream stream; + stream << "{"; + bool valueAdded = false; + for (size_t i = 0; i < size(); ++i){ + if (test(i)){ + if (valueAdded) { + stream << ", "; + } + stream << i; + valueAdded = true; + } + } + + stream << "}"; + return stream.str(); + } + + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp new file mode 100644 index 0000000000..95321b3dc1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.cpp @@ -0,0 +1,207 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/CPPUtils.h" + +namespace antlrcpp { + + std::string join(const std::vector<std::string> &strings, const std::string &separator) { + std::string str; + bool firstItem = true; + for (const std::string &s : strings) { + if (!firstItem) { + str.append(separator); + } + firstItem = false; + str.append(s); + } + return str; + } + + std::map<std::string, size_t> toMap(const std::vector<std::string> &keys) { + std::map<std::string, size_t> result; + for (size_t i = 0; i < keys.size(); ++i) { + result.insert({ keys[i], i }); + } + return result; + } + + std::string escapeWhitespace(std::string str, bool escapeSpaces) { + std::string result; + for (auto c : str) { + switch (c) { + case '\n': + result += "\\n"; + break; + + case '\r': + result += "\\r"; + break; + + case '\t': + result += "\\t"; + break; + + case ' ': + if (escapeSpaces) { + result += "\u00B7"; + break; + } + result += c; + break; + + default: + result += c; + break; + } + } + + return result; + } + + std::string toHexString(const int t) { + std::stringstream stream; + stream << std::uppercase << std::hex << t; + return stream.str(); + } + + std::string arrayToString(const std::vector<std::string> &data) { + std::string answer; + size_t toReserve = 0; + for (const auto &sub : data) { + toReserve += sub.size(); + } + answer.reserve(toReserve); + for (const auto &sub: data) { + answer.append(sub); + } + return answer; + } + + std::string replaceString(const std::string &s, const std::string &from, const std::string &to) { + std::string::size_type p; + std::string ss, res; + + ss = s; + p = ss.find(from); + while (p != std::string::npos) { + if (p > 0) + res.append(ss.substr(0, p)).append(to); + else + res.append(to); + ss = ss.substr(p + from.size()); + p = ss.find(from); + } + res.append(ss); + + return res; + } + + std::vector<std::string> split(const std::string &s, const std::string &sep, int count) { + std::vector<std::string> parts; + std::string ss = s; + + std::string::size_type p; + + if (s.empty()) + return parts; + + if (count == 0) + count= -1; + + p = ss.find(sep); + while (!ss.empty() && p != std::string::npos && (count < 0 || count > 0)) { + parts.push_back(ss.substr(0, p)); + ss = ss.substr(p+sep.size()); + + --count; + p = ss.find(sep); + } + parts.push_back(ss); + + return parts; + } + + //-------------------------------------------------------------------------------------------------- + + // Debugging helper. Adds indentation to all lines in the given string. + std::string indent(const std::string &s, const std::string &indentation, bool includingFirst) { + std::vector<std::string> parts = split(s, "\n", -1); + for (size_t i = 0; i < parts.size(); ++i) { + if (i == 0 && !includingFirst) + continue; + parts[i].insert(0, indentation); + } + + return join(parts, "\n"); + } + + //-------------------------------------------------------------------------------------------------- + + // Recursively get the error from a, possibly nested, exception. +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + // No nested exceptions before VS 2015. + template <typename T> + std::exception_ptr get_nested(const T &/*e*/) { + try { + return nullptr; + } + catch (const std::bad_cast &) { + return nullptr; + } + } +#else + template <typename T> + std::exception_ptr get_nested(const T &e) { + try { + auto nested = dynamic_cast<const std::nested_exception&>(e); + return nested.nested_ptr(); + } + catch (const std::bad_cast &) { + return nullptr; + } + } +#endif + + std::string what(std::exception_ptr eptr) { + if (!eptr) { + throw std::bad_exception(); + } + + std::string result; + std::size_t nestCount = 0; + + next: { + try { + std::exception_ptr yeptr; + std::swap(eptr, yeptr); + std::rethrow_exception(yeptr); + } + catch (const std::exception &e) { + result += e.what(); + eptr = get_nested(e); + } + catch (const std::string &e) { + result += e; + } + catch (const char *e) { + result += e; + } + catch (...) { + result += "cannot be determined"; + } + + if (eptr) { + result += " ("; + ++nestCount; + goto next; + } + } + + result += std::string(nestCount, ')'); + return result; + } + +} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h new file mode 100644 index 0000000000..2eb1a36037 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/CPPUtils.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + ANTLR4CPP_PUBLIC std::string join(const std::vector<std::string> &strings, const std::string &separator); + ANTLR4CPP_PUBLIC std::map<std::string, size_t> toMap(const std::vector<std::string> &keys); + ANTLR4CPP_PUBLIC std::string escapeWhitespace(std::string str, bool escapeSpaces); + ANTLR4CPP_PUBLIC std::string toHexString(const int t); + ANTLR4CPP_PUBLIC std::string arrayToString(const std::vector<std::string> &data); + ANTLR4CPP_PUBLIC std::string replaceString(const std::string &s, const std::string &from, const std::string &to); + ANTLR4CPP_PUBLIC std::vector<std::string> split(const std::string &s, const std::string &sep, int count); + ANTLR4CPP_PUBLIC std::string indent(const std::string &s, const std::string &indentation, bool includingFirst = true); + + // Using RAII + a lambda to implement a "finally" replacement. + template <typename OnEnd> + struct FinalAction { + FinalAction(OnEnd f) : _cleanUp { std::move(f) } {} + FinalAction(FinalAction &&other) : + _cleanUp(std::move(other._cleanUp)), _enabled(other._enabled) { + other._enabled = false; // Don't trigger the lambda after ownership has moved. + } + ~FinalAction() { if (_enabled) _cleanUp(); } + + void disable() { _enabled = false; } + private: + OnEnd _cleanUp; + bool _enabled {true}; + }; + + template <typename OnEnd> + FinalAction<OnEnd> finally(OnEnd f) { + return FinalAction<OnEnd>(std::move(f)); + } + + // Convenience functions to avoid lengthy dynamic_cast() != nullptr checks in many places. + template <typename T1, typename T2> + inline bool is(T2 *obj) { // For pointer types. + return dynamic_cast<typename std::add_const<T1>::type>(obj) != nullptr; + } + + template <typename T1, typename T2> + inline bool is(Ref<T2> const& obj) { // For shared pointers. + return dynamic_cast<T1 *>(obj.get()) != nullptr; + } + + template <typename T> + std::string toString(const T &o) { + std::stringstream ss; + // typeid gives the mangled class name, but that's all what's possible + // in a portable way. + ss << typeid(o).name() << "@" << std::hex << reinterpret_cast<uintptr_t>(&o); + return ss.str(); + } + + // Get the error text from an exception pointer or the current exception. + ANTLR4CPP_PUBLIC std::string what(std::exception_ptr eptr = std::current_exception()); + +} // namespace antlrcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h b/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h new file mode 100644 index 0000000000..2ded955dcd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Casts.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2012-2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> +#include <memory> +#include <type_traits> + +namespace antlrcpp { + + template <typename To, typename From> + To downCast(From* from) { + static_assert(std::is_pointer_v<To>, "Target type not a pointer."); + static_assert(std::is_base_of_v<From, std::remove_pointer_t<To>>, "Target type not derived from source type."); + #if !defined(__GNUC__) || defined(__GXX_RTTI) + assert(from == nullptr || dynamic_cast<To>(from) != nullptr); + #endif + return static_cast<To>(from); + } + + template <typename To, typename From> + To downCast(From& from) { + static_assert(std::is_lvalue_reference_v<To>, "Target type not a lvalue reference."); + static_assert(std::is_base_of_v<From, std::remove_reference_t<To>>, "Target type not derived from source type."); + #if !defined(__GNUC__) || defined(__GXX_RTTI) + assert(dynamic_cast<std::add_pointer_t<std::remove_reference_t<To>>>(std::addressof(from)) != nullptr); + #endif + return static_cast<To>(from); + } + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h b/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h new file mode 100644 index 0000000000..8e960676cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Declarations.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +namespace antlr4 { + class ANTLRErrorListener; + class ANTLRErrorStrategy; + class ANTLRFileStream; + class ANTLRInputStream; + class BailErrorStrategy; + class BaseErrorListener; + class BufferedTokenStream; + class CharStream; + class CommonToken; + class CommonTokenFactory; + class CommonTokenStream; + class ConsoleErrorListener; + class DefaultErrorStrategy; + class DiagnosticErrorListener; + class EmptyStackException; + class FailedPredicateException; + class IllegalArgumentException; + class IllegalStateException; + class InputMismatchException; + class IntStream; + class InterpreterRuleContext; + class Lexer; + class LexerInterpreter; + class LexerNoViableAltException; + class ListTokenSource; + class NoSuchElementException; + class NoViableAltException; + class NullPointerException; + class ParseCancellationException; + class Parser; + class ParserInterpreter; + class ParserRuleContext; + class ProxyErrorListener; + class RecognitionException; + class Recognizer; + class RuleContext; + class Token; + template<typename Symbol> class TokenFactory; + class TokenSource; + class TokenStream; + class TokenStreamRewriter; + class UnbufferedCharStream; + class UnbufferedTokenStream; + class WritableToken; + + namespace misc { + class InterpreterDataReader; + class Interval; + class IntervalSet; + class MurmurHash; + class Utils; + class Predicate; + } + namespace atn { + class ATN; + class ATNConfig; + class ATNConfigSet; + class ATNDeserializationOptions; + class ATNDeserializer; + class ATNSerializer; + class ATNSimulator; + class ATNState; + enum class ATNType; + class ActionTransition; + class ArrayPredictionContext; + class AtomTransition; + class BasicBlockStartState; + class BasicState; + class BlockEndState; + class BlockStartState; + class DecisionState; + class EpsilonTransition; + class LL1Analyzer; + class LexerAction; + class LexerActionExecutor; + class LexerATNConfig; + class LexerATNSimulator; + class LexerMoreAction; + class LexerPopModeAction; + class LexerSkipAction; + class LookaheadEventInfo; + class LoopEndState; + class NotSetTransition; + class OrderedATNConfigSet; + class ParseInfo; + class ParserATNSimulator; + class PlusBlockStartState; + class PlusLoopbackState; + class PrecedencePredicateTransition; + class PredicateTransition; + class PredictionContext; + enum class PredictionMode; + class PredictionModeClass; + class RangeTransition; + class RuleStartState; + class RuleStopState; + class RuleTransition; + class SemanticContext; + class SetTransition; + class SingletonPredictionContext; + class StarBlockStartState; + class StarLoopEntryState; + class StarLoopbackState; + class TokensStartState; + class Transition; + class WildcardTransition; + } + namespace dfa { + class DFA; + class DFASerializer; + class DFAState; + class LexerDFASerializer; + class Vocabulary; + } + namespace tree { + class AbstractParseTreeVisitor; + class ErrorNode; + class ErrorNodeImpl; + class ParseTree; + class ParseTreeListener; + template<typename T> class ParseTreeProperty; + class ParseTreeVisitor; + class ParseTreeWalker; + class SyntaxTree; + class TerminalNode; + class TerminalNodeImpl; + class Tree; + class Trees; + + namespace pattern { + class Chunk; + class ParseTreeMatch; + class ParseTreePattern; + class ParseTreePatternMatcher; + class RuleTagToken; + class TagChunk; + class TextChunk; + class TokenTagToken; + } + + namespace xpath { + class XPath; + class XPathElement; + class XPathLexerErrorListener; + class XPathRuleAnywhereElement; + class XPathRuleElement; + class XPathTokenAnywhereElement; + class XPathTokenElement; + class XPathWildcardAnywhereElement; + class XPathWildcardElement; + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp new file mode 100644 index 0000000000..9ee274c8de --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/StringUtils.h" + +namespace antlrcpp { + + std::string escapeWhitespace(std::string_view in) { + std::string out; + escapeWhitespace(out, in); + out.shrink_to_fit(); + return out; + } + + std::string& escapeWhitespace(std::string& out, std::string_view in) { + out.reserve(in.size()); // Best case, no escaping. + for (const auto &c : in) { + switch (c) { + case '\t': + out.append("\\t"); + break; + case '\r': + out.append("\\r"); + break; + case '\n': + out.append("\\n"); + break; + default: + out.push_back(c); + break; + } + } + return out; + } + +} // namespace antrlcpp diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h new file mode 100644 index 0000000000..aee0d46d6e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/StringUtils.h @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + ANTLR4CPP_PUBLIC std::string escapeWhitespace(std::string_view in); + + ANTLR4CPP_PUBLIC std::string& escapeWhitespace(std::string& out, std::string_view in); + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h b/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h new file mode 100644 index 0000000000..f0f84375ad --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Unicode.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Unicode final { + public: + static constexpr char32_t REPLACEMENT_CHARACTER = 0xfffd; + + static constexpr bool isValid(char32_t codePoint) { + return codePoint < 0xd800 || (codePoint > 0xdfff && codePoint <= 0x10ffff); + } + + private: + Unicode() = delete; + Unicode(const Unicode&) = delete; + Unicode(Unicode&&) = delete; + Unicode& operator=(const Unicode&) = delete; + Unicode& operator=(Unicode&&) = delete; + }; + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp new file mode 100644 index 0000000000..294e9f1b21 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.cpp @@ -0,0 +1,242 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <cassert> +#include <cstdint> + +#include "support/Utf8.h" +#include "support/Unicode.h" + +// The below implementation is based off of https://github.com/google/cel-cpp/internal/utf8.cc, +// which is itself based off of https://go.googlesource.com/go/+/refs/heads/master/src/unicode/utf8/utf8.go. +// If for some reason you feel the need to copy this implementation, please retain a comment +// referencing the two source files and giving credit, as well as maintaining any and all +// obligations required by the BSD 3-clause license that governs this file. + +namespace antlrcpp { + +namespace { + +#undef SELF + constexpr uint8_t SELF = 0x80; + +#undef LOW + constexpr uint8_t LOW = 0x80; +#undef HIGH + constexpr uint8_t HIGH = 0xbf; + +#undef MASKX + constexpr uint8_t MASKX = 0x3f; +#undef MASK2 + constexpr uint8_t MASK2 = 0x1f; +#undef MASK3 + constexpr uint8_t MASK3 = 0xf; +#undef MASK4 + constexpr uint8_t MASK4 = 0x7; + +#undef TX + constexpr uint8_t TX = 0x80; +#undef T2 + constexpr uint8_t T2 = 0xc0; +#undef T3 + constexpr uint8_t T3 = 0xe0; +#undef T4 + constexpr uint8_t T4 = 0xf0; + +#undef XX + constexpr uint8_t XX = 0xf1; +#undef AS + constexpr uint8_t AS = 0xf0; +#undef S1 + constexpr uint8_t S1 = 0x02; +#undef S2 + constexpr uint8_t S2 = 0x13; +#undef S3 + constexpr uint8_t S3 = 0x03; +#undef S4 + constexpr uint8_t S4 = 0x23; +#undef S5 + constexpr uint8_t S5 = 0x34; +#undef S6 + constexpr uint8_t S6 = 0x04; +#undef S7 + constexpr uint8_t S7 = 0x44; + + // NOLINTBEGIN + // clang-format off +#undef LEADING + constexpr uint8_t LEADING[256] = { + // 1 2 3 4 5 6 7 8 9 A B C D E F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x00-0x0F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x10-0x1F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x20-0x2F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x30-0x3F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x40-0x4F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x50-0x5F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x60-0x6F + AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, AS, // 0x70-0x7F + // 1 2 3 4 5 6 7 8 9 A B C D E F + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x80-0x8F + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0x90-0x9F + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xA0-0xAF + XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xB0-0xBF + XX, XX, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xC0-0xCF + S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, S1, // 0xD0-0xDF + S2, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S3, S4, S3, S3, // 0xE0-0xEF + S5, S6, S6, S6, S7, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0xF0-0xFF + }; + // clang-format on + // NOLINTEND + +#undef ACCEPT + constexpr std::pair<uint8_t, uint8_t> ACCEPT[16] = { + {LOW, HIGH}, {0xa0, HIGH}, {LOW, 0x9f}, {0x90, HIGH}, + {LOW, 0x8f}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, + {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, + {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, + }; + +} // namespace + + std::pair<char32_t, size_t> Utf8::decode(std::string_view input) { + assert(!input.empty()); + const auto b = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b < SELF) { + return {static_cast<char32_t>(b), 1}; + } + const auto leading = LEADING[b]; + if (leading == XX) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + auto size = static_cast<size_t>(leading & 7) - 1; + if (size > input.size()) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + const auto& accept = ACCEPT[leading >> 4]; + const auto b1 = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b1 < accept.first || b1 > accept.second) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + if (size <= 1) { + return {(static_cast<char32_t>(b & MASK2) << 6) | + static_cast<char32_t>(b1 & MASKX), + 2}; + } + const auto b2 = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b2 < LOW || b2 > HIGH) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + if (size <= 2) { + return {(static_cast<char32_t>(b & MASK3) << 12) | + (static_cast<char32_t>(b1 & MASKX) << 6) | + static_cast<char32_t>(b2 & MASKX), + 3}; + } + const auto b3 = static_cast<uint8_t>(input.front()); + input.remove_prefix(1); + if (b3 < LOW || b3 > HIGH) { + return {Unicode::REPLACEMENT_CHARACTER, 1}; + } + return {(static_cast<char32_t>(b & MASK4) << 18) | + (static_cast<char32_t>(b1 & MASKX) << 12) | + (static_cast<char32_t>(b2 & MASKX) << 6) | + static_cast<char32_t>(b3 & MASKX), + 4}; + } + + std::optional<std::u32string> Utf8::strictDecode(std::string_view input) { + std::u32string output; + char32_t codePoint; + size_t codeUnits; + output.reserve(input.size()); // Worst case is each byte is a single Unicode code point. + for (size_t index = 0; index < input.size(); index += codeUnits) { + std::tie(codePoint, codeUnits) = Utf8::decode(input.substr(index)); + if (codePoint == Unicode::REPLACEMENT_CHARACTER && codeUnits == 1) { + // Condition is only met when an illegal byte sequence is encountered. See Utf8::decode. + return std::nullopt; + } + output.push_back(codePoint); + } + output.shrink_to_fit(); + return output; + } + + std::u32string Utf8::lenientDecode(std::string_view input) { + std::u32string output; + char32_t codePoint; + size_t codeUnits; + output.reserve(input.size()); // Worst case is each byte is a single Unicode code point. + for (size_t index = 0; index < input.size(); index += codeUnits) { + std::tie(codePoint, codeUnits) = Utf8::decode(input.substr(index)); + output.push_back(codePoint); + } + output.shrink_to_fit(); + return output; + } + + std::string& Utf8::encode(std::string* buffer, char32_t codePoint) { + assert(buffer != nullptr); + if (!Unicode::isValid(codePoint)) { + codePoint = Unicode::REPLACEMENT_CHARACTER; + } + if (codePoint <= 0x7f) { + buffer->push_back(static_cast<char>(static_cast<uint8_t>(codePoint))); + } else if (codePoint <= 0x7ff) { + buffer->push_back( + static_cast<char>(T2 | static_cast<uint8_t>(codePoint >> 6))); + buffer->push_back( + static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); + } else if (codePoint <= 0xffff) { + buffer->push_back( + static_cast<char>(T3 | static_cast<uint8_t>(codePoint >> 12))); + buffer->push_back(static_cast<char>( + TX | (static_cast<uint8_t>(codePoint >> 6) & MASKX))); + buffer->push_back( + static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); + } else { + buffer->push_back( + static_cast<char>(T4 | static_cast<uint8_t>(codePoint >> 18))); + buffer->push_back(static_cast<char>( + TX | (static_cast<uint8_t>(codePoint >> 12) & MASKX))); + buffer->push_back(static_cast<char>( + TX | (static_cast<uint8_t>(codePoint >> 6) & MASKX))); + buffer->push_back( + static_cast<char>(TX | (static_cast<uint8_t>(codePoint) & MASKX))); + } + return *buffer; + } + + std::optional<std::string> Utf8::strictEncode(std::u32string_view input) { + std::string output; + output.reserve(input.size() * 4); // Worst case is each Unicode code point encodes to 4 bytes. + for (size_t index = 0; index < input.size(); index++) { + char32_t codePoint = input[index]; + if (!Unicode::isValid(codePoint)) { + return std::nullopt; + } + Utf8::encode(&output, codePoint); + } + output.shrink_to_fit(); + return output; + } + + std::string Utf8::lenientEncode(std::u32string_view input) { + std::string output; + output.reserve(input.size() * 4); // Worst case is each Unicode code point encodes to 4 bytes. + for (size_t index = 0; index < input.size(); index++) { + char32_t codePoint = input[index]; + if (!Unicode::isValid(codePoint)) { + codePoint = Unicode::REPLACEMENT_CHARACTER; + } + Utf8::encode(&output, codePoint); + } + output.shrink_to_fit(); + return output; + } + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h new file mode 100644 index 0000000000..e4828441cd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/support/Utf8.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2021 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <optional> +#include <string> +#include <string_view> +#include <tuple> + +#include "antlr4-common.h" + +namespace antlrcpp { + + class ANTLR4CPP_PUBLIC Utf8 final { + public: + // Decodes the next code point, returning the decoded code point and the number + // of code units (a.k.a. bytes) consumed. In the event that an invalid code unit + // sequence is returned the replacement character, U+FFFD, is returned with a + // code unit count of 1. As U+FFFD requires 3 code units when encoded, this can + // be used to differentiate valid input from malformed input. + static std::pair<char32_t, size_t> decode(std::string_view input); + + // Decodes the given UTF-8 encoded input into a string of code points. + static std::optional<std::u32string> strictDecode(std::string_view input); + + // Decodes the given UTF-8 encoded input into a string of code points. Unlike strictDecode(), + // each byte in an illegal byte sequence is replaced with the Unicode replacement character, + // U+FFFD. + static std::u32string lenientDecode(std::string_view input); + + // Encodes the given code point and appends it to the buffer. If the code point + // is an unpaired surrogate or outside of the valid Unicode range it is replaced + // with the replacement character, U+FFFD. + static std::string& encode(std::string *buffer, char32_t codePoint); + + // Encodes the given Unicode code point string as UTF-8. + static std::optional<std::string> strictEncode(std::u32string_view input); + + // Encodes the given Unicode code point string as UTF-8. Unlike strictEncode(), + // each invalid Unicode code point is replaced with the Unicode replacement character, U+FFFD. + static std::string lenientEncode(std::u32string_view input); + + private: + Utf8() = delete; + Utf8(const Utf8&) = delete; + Utf8(Utf8&&) = delete; + Utf8& operator=(const Utf8&) = delete; + Utf8& operator=(Utf8&&) = delete; + }; + +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/AbstractParseTreeVisitor.h b/contrib/libs/antlr4_cpp_runtime/src/tree/AbstractParseTreeVisitor.h new file mode 100644 index 0000000000..25505278f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/AbstractParseTreeVisitor.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" +#include "tree/ParseTreeVisitor.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC AbstractParseTreeVisitor : public ParseTreeVisitor { + public: + /// The default implementation calls <seealso cref="ParseTree#accept"/> on the + /// specified tree. + virtual std::any visit(ParseTree *tree) override { + return tree->accept(this); + } + + /** + * <p>The default implementation initializes the aggregate result to + * {@link #defaultResult defaultResult()}. Before visiting each child, it + * calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result + * is {@code false} no more children are visited and the current aggregate + * result is returned. After visiting a child, the aggregate result is + * updated by calling {@link #aggregateResult aggregateResult} with the + * previous aggregate result and the result of visiting the child.</p> + * + * <p>The default implementation is not safe for use in visitors that modify + * the tree structure. Visitors that modify the tree should override this + * method to behave properly in respect to the specific algorithm in use.</p> + */ + virtual std::any visitChildren(ParseTree *node) override { + std::any result = defaultResult(); + size_t n = node->children.size(); + for (size_t i = 0; i < n; i++) { + if (!shouldVisitNextChild(node, result)) { + break; + } + + std::any childResult = node->children[i]->accept(this); + result = aggregateResult(std::move(result), std::move(childResult)); + } + + return result; + } + + /// The default implementation returns the result of + /// <seealso cref="#defaultResult defaultResult"/>. + virtual std::any visitTerminal(TerminalNode * /*node*/) override { + return defaultResult(); + } + + /// The default implementation returns the result of + /// <seealso cref="#defaultResult defaultResult"/>. + virtual std::any visitErrorNode(ErrorNode * /*node*/) override { + return defaultResult(); + } + + protected: + /// <summary> + /// Gets the default value returned by visitor methods. This value is + /// returned by the default implementations of + /// <seealso cref="#visitTerminal visitTerminal"/>, <seealso cref="#visitErrorNode visitErrorNode"/>. + /// The default implementation of <seealso cref="#visitChildren visitChildren"/> + /// initializes its aggregate result to this value. + /// <p/> + /// The base implementation returns {@code std::any()}. + /// </summary> + /// <returns> The default value returned by visitor methods. </returns> + virtual std::any defaultResult() { + return std::any(); + } + + /// <summary> + /// Aggregates the results of visiting multiple children of a node. After + /// either all children are visited or <seealso cref="#shouldVisitNextChild"/> returns + /// {@code false}, the aggregate value is returned as the result of + /// <seealso cref="#visitChildren"/>. + /// <p/> + /// The default implementation returns {@code nextResult}, meaning + /// <seealso cref="#visitChildren"/> will return the result of the last child visited + /// (or return the initial value if the node has no children). + /// </summary> + /// <param name="aggregate"> The previous aggregate value. In the default + /// implementation, the aggregate value is initialized to + /// <seealso cref="#defaultResult"/>, which is passed as the {@code aggregate} argument + /// to this method after the first child node is visited. </param> + /// <param name="nextResult"> The result of the immediately preceeding call to visit + /// a child node. + /// </param> + /// <returns> The updated aggregate result. </returns> + virtual std::any aggregateResult(std::any /*aggregate*/, std::any nextResult) { + return nextResult; + } + + /// <summary> + /// This method is called after visiting each child in + /// <seealso cref="#visitChildren"/>. This method is first called before the first + /// child is visited; at that point {@code currentResult} will be the initial + /// value (in the default implementation, the initial value is returned by a + /// call to <seealso cref="#defaultResult"/>. This method is not called after the last + /// child is visited. + /// <p/> + /// The default implementation always returns {@code true}, indicating that + /// {@code visitChildren} should only return after all children are visited. + /// One reason to override this method is to provide a "short circuit" + /// evaluation option for situations where the result of visiting a single + /// child has the potential to determine the result of the visit operation as + /// a whole. + /// </summary> + /// <param name="node"> The <seealso cref="ParseTree"/> whose children are currently being + /// visited. </param> + /// <param name="currentResult"> The current aggregate result of the children visited + /// to the current point. + /// </param> + /// <returns> {@code true} to continue visiting children. Otherwise return + /// {@code false} to stop visiting children and immediately return the + /// current aggregate result from <seealso cref="#visitChildren"/>. </returns> + virtual bool shouldVisitNextChild(ParseTree * /*node*/, const std::any &/*currentResult*/) { + return true; + } + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNode.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNode.h new file mode 100644 index 0000000000..319ce39e0d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNode.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC ErrorNode : public TerminalNode { + public: + static bool is(const tree::ParseTree &parseTree) { return parseTree.getTreeType() == tree::ParseTreeType::ERROR; } + + static bool is(const tree::ParseTree *parseTree) { return parseTree != nullptr && is(*parseTree); } + + protected: + using TerminalNode::TerminalNode; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.cpp new file mode 100644 index 0000000000..142791dd96 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.cpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "Token.h" +#include "RuleContext.h" +#include "tree/ParseTreeVisitor.h" + +#include "tree/ErrorNodeImpl.h" + +using namespace antlr4; +using namespace antlr4::tree; + +Token* ErrorNodeImpl::getSymbol() const { + return symbol; +} + +void ErrorNodeImpl::setParent(RuleContext *parent_) { + this->parent = parent_; +} + +misc::Interval ErrorNodeImpl::getSourceInterval() { + if (symbol == nullptr) { + return misc::Interval::INVALID; + } + + size_t tokenIndex = symbol->getTokenIndex(); + return misc::Interval(tokenIndex, tokenIndex); +} + +std::any ErrorNodeImpl::accept(ParseTreeVisitor *visitor) { + return visitor->visitErrorNode(this); +} + +std::string ErrorNodeImpl::getText() { + return symbol->getText(); +} + +std::string ErrorNodeImpl::toStringTree(Parser * /*parser*/, bool /*pretty*/) { + return toString(); +} + +std::string ErrorNodeImpl::toString() { + if (symbol->getType() == Token::EOF) { + return "<EOF>"; + } + return symbol->getText(); +} + +std::string ErrorNodeImpl::toStringTree(bool /*pretty*/) { + return toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.h new file mode 100644 index 0000000000..8bafb62552 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ErrorNodeImpl.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ErrorNode.h" +#include "tree/TerminalNodeImpl.h" +#include "misc/Interval.h" + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// <summary> + /// Represents a token that was consumed during resynchronization + /// rather than during a valid match operation. For example, + /// we will create this kind of a node during single token insertion + /// and deletion as well as during "consume until error recovery set" + /// upon no viable alternative exceptions. + /// </summary> + class ANTLR4CPP_PUBLIC ErrorNodeImpl : public ErrorNode { + public: + Token *symbol; + + explicit ErrorNodeImpl(Token *symbol) : ErrorNode(ParseTreeType::ERROR), symbol(symbol) {} + + virtual Token* getSymbol() const override; + virtual void setParent(RuleContext *parent) override; + virtual misc::Interval getSourceInterval() override; + + virtual std::any accept(ParseTreeVisitor *visitor) override; + + virtual std::string getText() override; + virtual std::string toStringTree(Parser *parser, bool pretty = false) override; + virtual std::string toString() override; + virtual std::string toStringTree(bool pretty = false) override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.cpp new file mode 100644 index 0000000000..83e6339518 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "tree/ParseTreeListener.h" +#include "tree/ParseTree.h" +#include "tree/ErrorNode.h" + +#include "IterativeParseTreeWalker.h" + +using namespace antlr4::tree; +using namespace antlrcpp; + +void IterativeParseTreeWalker::walk(ParseTreeListener *listener, ParseTree *t) const { + std::vector<std::pair<ParseTree*, size_t>> stack; + ParseTree *currentNode = t; + size_t currentIndex = 0; + + while (currentNode != nullptr) { + // pre-order visit + if (ErrorNode::is(*currentNode)) { + listener->visitErrorNode(downCast<ErrorNode*>(currentNode)); + } else if (TerminalNode::is(*currentNode)) { + listener->visitTerminal(downCast<TerminalNode*>(currentNode)); + } else { + enterRule(listener, currentNode); + } + + // Move down to first child, if it exists. + if (!currentNode->children.empty()) { + stack.push_back(std::make_pair(currentNode, currentIndex)); + currentIndex = 0; + currentNode = currentNode->children[0]; + continue; + } + + // No child nodes, so walk tree. + do { + // post-order visit + if (!TerminalNode::is(*currentNode)) { + exitRule(listener, currentNode); + } + + // No parent, so no siblings. + if (stack.empty()) { + currentNode = nullptr; + currentIndex = 0; + break; + } + + // Move to next sibling if possible. + if (stack.back().first->children.size() > ++currentIndex) { + currentNode = stack.back().first->children[currentIndex]; + break; + } + + // No next sibling, so move up. + std::tie(currentNode, currentIndex) = stack.back(); + stack.pop_back(); + } while (currentNode != nullptr); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.h b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.h new file mode 100644 index 0000000000..8957d87e44 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/IterativeParseTreeWalker.h @@ -0,0 +1,53 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "antlr4-common.h" + +#include "tree/ParseTreeWalker.h" + +namespace antlr4 { +namespace tree { + + class ParseTreeListener; + + /** + * An iterative (read: non-recursive) pre-order and post-order tree walker that + * doesn't use the thread stack but heap-based stacks. Makes it possible to + * process deeply nested parse trees. + */ + class ANTLR4CPP_PUBLIC IterativeParseTreeWalker : public ParseTreeWalker { + public: + virtual void walk(ParseTreeListener *listener, ParseTree *t) const override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.cpp new file mode 100644 index 0000000000..8756398d88 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" + +using namespace antlr4::tree; + +bool ParseTree::operator == (const ParseTree &other) const { + return &other == this; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.h new file mode 100644 index 0000000000..cf8027b8fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTree.h @@ -0,0 +1,111 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/Any.h" +#include "tree/ParseTreeType.h" + +namespace antlr4 { +namespace tree { + + /// An interface to access the tree of <seealso cref="RuleContext"/> objects created + /// during a parse that makes the data structure look like a simple parse tree. + /// This node represents both internal nodes, rule invocations, + /// and leaf nodes, token matches. + /// + /// The payload is either a <seealso cref="Token"/> or a <seealso cref="RuleContext"/> object. + // ml: This class unites 4 Java classes: RuleNode, ParseTree, SyntaxTree and Tree. + class ANTLR4CPP_PUBLIC ParseTree { + public: + ParseTree(ParseTree const&) = delete; + + virtual ~ParseTree() = default; + + ParseTree& operator=(ParseTree const&) = delete; + + /// The parent of this node. If the return value is null, then this + /// node is the root of the tree. + ParseTree *parent = nullptr; + + /// If we are debugging or building a parse tree for a visitor, + /// we need to track all of the tokens and rule invocations associated + /// with this rule's context. This is empty for parsing w/o tree constr. + /// operation because we don't the need to track the details about + /// how we parse this rule. + // ml: memory is not managed here, but by the owning class. This is just for the structure. + std::vector<ParseTree *> children; + + /// Print out a whole tree, not just a node, in LISP format + /// {@code (root child1 .. childN)}. Print just a node if this is a leaf. + virtual std::string toStringTree(bool pretty = false) = 0; + virtual std::string toString() = 0; + + /// Specialize toStringTree so that it can print out more information + /// based upon the parser. + virtual std::string toStringTree(Parser *parser, bool pretty = false) = 0; + + virtual bool operator == (const ParseTree &other) const; + + /// The <seealso cref="ParseTreeVisitor"/> needs a double dispatch method. + // ml: This has been changed to use Any instead of a template parameter, to avoid the need of a virtual template function. + virtual std::any accept(ParseTreeVisitor *visitor) = 0; + + /// Return the combined text of all leaf nodes. Does not get any + /// off-channel tokens (if any) so won't return whitespace and + /// comments if they are sent to parser on hidden channel. + virtual std::string getText() = 0; + + /** + * Return an {@link Interval} indicating the index in the + * {@link TokenStream} of the first and last token associated with this + * subtree. If this node is a leaf, then the interval represents a single + * token and has interval i..i for token index i. + * + * <p>An interval of i..i-1 indicates an empty interval at position + * i in the input stream, where 0 <= i <= the size of the input + * token stream. Currently, the code base can only have i=0..n-1 but + * in concept one could have an empty interval after EOF. </p> + * + * <p>If source interval is unknown, this returns {@link Interval#INVALID}.</p> + * + * <p>As a weird special case, the source interval for rules matched after + * EOF is unspecified.</p> + */ + virtual misc::Interval getSourceInterval() = 0; + + ParseTreeType getTreeType() const { return _treeType; } + + protected: + explicit ParseTree(ParseTreeType treeType) : _treeType(treeType) {} + + private: + const ParseTreeType _treeType; + }; + + // A class to help managing ParseTree instances without the need of a shared_ptr. + class ANTLR4CPP_PUBLIC ParseTreeTracker { + public: + template<typename T, typename ... Args> + T* createInstance(Args&& ... args) { + static_assert(std::is_base_of<ParseTree, T>::value, "Argument must be a parse tree type"); + T* result = new T(args...); + _allocated.push_back(result); + return result; + } + + void reset() { + for (auto * entry : _allocated) + delete entry; + _allocated.clear(); + } + + private: + std::vector<ParseTree *> _allocated; + }; + + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.cpp new file mode 100644 index 0000000000..ce12297586 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ParseTreeListener.h" + +antlr4::tree::ParseTreeListener::~ParseTreeListener() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.h new file mode 100644 index 0000000000..60c7d8861a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeListener.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + /** This interface describes the minimal core of methods triggered + * by {@link ParseTreeWalker}. E.g., + * + * ParseTreeWalker walker = new ParseTreeWalker(); + * walker.walk(myParseTreeListener, myParseTree); <-- triggers events in your listener + * + * If you want to trigger events in multiple listeners during a single + * tree walk, you can use the ParseTreeDispatcher object available at + * + * https://github.com/antlr/antlr4/issues/841 + */ + class ANTLR4CPP_PUBLIC ParseTreeListener { + public: + virtual ~ParseTreeListener(); + + virtual void visitTerminal(TerminalNode *node) = 0; + virtual void visitErrorNode(ErrorNode *node) = 0; + virtual void enterEveryRule(ParserRuleContext *ctx) = 0; + virtual void exitEveryRule(ParserRuleContext *ctx) = 0; + + bool operator == (const ParseTreeListener &other) { + return this == &other; + } + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeProperty.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeProperty.h new file mode 100644 index 0000000000..efd5e73bf8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeProperty.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + /// <summary> + /// Associate a property with a parse tree node. Useful with parse tree listeners + /// that need to associate values with particular tree nodes, kind of like + /// specifying a return value for the listener event method that visited a + /// particular node. Example: + /// + /// <pre> + /// ParseTreeProperty<Integer> values = new ParseTreeProperty<Integer>(); + /// values.put(tree, 36); + /// int x = values.get(tree); + /// values.removeFrom(tree); + /// </pre> + /// + /// You would make one decl (values here) in the listener and use lots of times + /// in your event methods. + /// </summary> + template<typename V> + class ANTLR4CPP_PUBLIC ParseTreeProperty { + public: + virtual ~ParseTreeProperty() {} + virtual V get(ParseTree *node) { + return _annotations[node]; + } + virtual void put(ParseTree *node, V value) { + _annotations[node] = value; + } + virtual V removeFrom(ParseTree *node) { + auto value = _annotations[node]; + _annotations.erase(node); + return value; + } + + protected: + std::map<ParseTree*, V> _annotations; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeType.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeType.h new file mode 100644 index 0000000000..17e0512b00 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeType.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + enum class ParseTreeType : size_t { + TERMINAL = 1, + ERROR = 2, + RULE = 3, + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.cpp new file mode 100644 index 0000000000..a329919c13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ParseTreeVisitor.h" + +antlr4::tree::ParseTreeVisitor::~ParseTreeVisitor() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.h new file mode 100644 index 0000000000..02d9dc9b95 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeVisitor.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/Any.h" + +namespace antlr4 { +namespace tree { + + /// <summary> + /// This interface defines the basic notion of a parse tree visitor. Generated + /// visitors implement this interface and the {@code XVisitor} interface for + /// grammar {@code X}. + /// </summary> + /// @param <T> The return type of the visit operation. Use <seealso cref="Void"/> for + /// operations with no return type. </param> + // ml: no template parameter here, to avoid the need for virtual template functions. Instead we have our Any class. + class ANTLR4CPP_PUBLIC ParseTreeVisitor { + public: + virtual ~ParseTreeVisitor(); + + /// <summary> + /// Visit a parse tree, and return a user-defined result of the operation. + /// </summary> + /// <param name="tree"> The <seealso cref="ParseTree"/> to visit. </param> + /// <returns> The result of visiting the parse tree. </returns> + virtual std::any visit(ParseTree *tree) = 0; + + /// <summary> + /// Visit the children of a node, and return a user-defined result of the + /// operation. + /// </summary> + /// <param name="node"> The <seealso cref="ParseTree"/> whose children should be visited. </param> + /// <returns> The result of visiting the children of the node. </returns> + virtual std::any visitChildren(ParseTree *node) = 0; + + /// <summary> + /// Visit a terminal node, and return a user-defined result of the operation. + /// </summary> + /// <param name="node"> The <seealso cref="TerminalNode"/> to visit. </param> + /// <returns> The result of visiting the node. </returns> + virtual std::any visitTerminal(TerminalNode *node) = 0; + + /// <summary> + /// Visit an error node, and return a user-defined result of the operation. + /// </summary> + /// <param name="node"> The <seealso cref="ErrorNode"/> to visit. </param> + /// <returns> The result of visiting the node. </returns> + virtual std::any visitErrorNode(ErrorNode *node) = 0; + + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.cpp new file mode 100644 index 0000000000..3da4bec5c5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.cpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ErrorNode.h" +#include "ParserRuleContext.h" +#include "tree/ParseTreeListener.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "tree/IterativeParseTreeWalker.h" +#include "tree/ParseTreeWalker.h" + +using namespace antlr4::tree; +using namespace antlrcpp; + +static IterativeParseTreeWalker defaultWalker; +ParseTreeWalker &ParseTreeWalker::DEFAULT = defaultWalker; + +void ParseTreeWalker::walk(ParseTreeListener *listener, ParseTree *t) const { + if (ErrorNode::is(*t)) { + listener->visitErrorNode(downCast<ErrorNode*>(t)); + return; + } + if (TerminalNode::is(*t)) { + listener->visitTerminal(downCast<TerminalNode*>(t)); + return; + } + + enterRule(listener, t); + for (auto &child : t->children) { + walk(listener, child); + } + exitRule(listener, t); +} + +void ParseTreeWalker::enterRule(ParseTreeListener *listener, ParseTree *r) const { + auto *ctx = downCast<ParserRuleContext*>(r); + listener->enterEveryRule(ctx); + ctx->enterRule(listener); +} + +void ParseTreeWalker::exitRule(ParseTreeListener *listener, ParseTree *r) const { + auto *ctx = downCast<ParserRuleContext*>(r); + ctx->exitRule(listener); + listener->exitEveryRule(ctx); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.h b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.h new file mode 100644 index 0000000000..718cbbd1e4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/ParseTreeWalker.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC ParseTreeWalker { + public: + static ParseTreeWalker &DEFAULT; + + virtual ~ParseTreeWalker() = default; + + /** + * <summary> + * Performs a walk on the given parse tree starting at the root and going down recursively + * with depth-first search. On each node, <seealso cref="ParseTreeWalker#enterRule"/> is called before + * recursively walking down into child nodes, then + * <seealso cref="ParseTreeWalker#exitRule"/> is called after the recursive call to wind up. + * </summary> + * <param name='listener'> The listener used by the walker to process grammar rules </param> + * <param name='t'> The parse tree to be walked on </param> + */ + virtual void walk(ParseTreeListener *listener, ParseTree *t) const; + + protected: + + /** + * <summary> + * Enters a grammar rule by first triggering the generic event <seealso cref="ParseTreeListener#enterEveryRule"/> + * then by triggering the event specific to the given parse tree node + * </summary> + * <param name='listener'> The listener responding to the trigger events </param> + * <param name='r'> The grammar rule containing the rule context </param> + */ + virtual void enterRule(ParseTreeListener *listener, ParseTree *r) const; + + /** + * <summary> + * Exits a grammar rule by first triggering the event specific to the given parse tree node + * then by triggering the generic event <seealso cref="ParseTreeListener#exitEveryRule"/> + * </summary> + * <param name='listener'> The listener responding to the trigger events </param> + * <param name='r'> The grammar rule containing the rule context </param> + */ + virtual void exitRule(ParseTreeListener *listener, ParseTree *r) const; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNode.h b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNode.h new file mode 100644 index 0000000000..9f7466edc5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNode.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/ParseTree.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC TerminalNode : public ParseTree { + public: + static bool is(const tree::ParseTree &parseTree) { + const auto treeType = parseTree.getTreeType(); + return treeType == ParseTreeType::TERMINAL || treeType == ParseTreeType::ERROR; + } + + static bool is(const tree::ParseTree *parseTree) { return parseTree != nullptr && is(*parseTree); } + + virtual Token* getSymbol() const = 0; + + /** Set the parent for this leaf node. + * + * Technically, this is not backward compatible as it changes + * the interface but no one was able to create custom + * TerminalNodes anyway so I'm adding as it improves internal + * code quality. + * + * @since 4.7 + */ + virtual void setParent(RuleContext *parent) = 0; + + protected: + using ParseTree::ParseTree; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.cpp new file mode 100644 index 0000000000..8eeb299fee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.cpp @@ -0,0 +1,54 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "Token.h" +#include "RuleContext.h" +#include "tree/ParseTreeVisitor.h" + +#include "tree/TerminalNodeImpl.h" + +using namespace antlr4; +using namespace antlr4::tree; + +Token* TerminalNodeImpl::getSymbol() const { + return symbol; +} + +void TerminalNodeImpl::setParent(RuleContext *parent_) { + this->parent = parent_; +} + +misc::Interval TerminalNodeImpl::getSourceInterval() { + if (symbol == nullptr) { + return misc::Interval::INVALID; + } + + size_t tokenIndex = symbol->getTokenIndex(); + return misc::Interval(tokenIndex, tokenIndex); +} + +std::any TerminalNodeImpl::accept(ParseTreeVisitor *visitor) { + return visitor->visitTerminal(this); +} + +std::string TerminalNodeImpl::getText() { + return symbol->getText(); +} + +std::string TerminalNodeImpl::toStringTree(Parser * /*parser*/, bool /*pretty*/) { + return toString(); +} + +std::string TerminalNodeImpl::toString() { + if (symbol->getType() == Token::EOF) { + return "<EOF>"; + } + return symbol->getText(); +} + +std::string TerminalNodeImpl::toStringTree(bool /*pretty*/) { + return toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.h b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.h new file mode 100644 index 0000000000..1f8adacc6a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/TerminalNodeImpl.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" + +namespace antlr4 { +namespace tree { + + class ANTLR4CPP_PUBLIC TerminalNodeImpl : public TerminalNode { + public: + Token *symbol; + + explicit TerminalNodeImpl(Token *symbol) : TerminalNode(ParseTreeType::TERMINAL), symbol(symbol) {} + + virtual Token* getSymbol() const override; + virtual void setParent(RuleContext *parent) override; + virtual misc::Interval getSourceInterval() override; + + virtual std::any accept(ParseTreeVisitor *visitor) override; + + virtual std::string getText() override; + virtual std::string toStringTree(Parser *parser, bool pretty = false) override; + virtual std::string toString() override; + virtual std::string toStringTree(bool pretty = false) override; + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.cpp new file mode 100644 index 0000000000..f4065949b2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.cpp @@ -0,0 +1,241 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ErrorNode.h" +#include "Parser.h" +#include "ParserRuleContext.h" +#include "support/CPPUtils.h" +#include "tree/TerminalNodeImpl.h" +#include "atn/ATN.h" +#include "misc/Interval.h" +#include "Token.h" +#include "CommonToken.h" +#include "misc/Predicate.h" + +#include "tree/Trees.h" + +using namespace antlr4; +using namespace antlr4::misc; +using namespace antlr4::tree; + +using namespace antlrcpp; + +Trees::Trees() { +} + +std::string Trees::toStringTree(ParseTree *t, bool pretty) { + return toStringTree(t, nullptr, pretty); +} + +std::string Trees::toStringTree(ParseTree *t, Parser *recog, bool pretty) { + if (recog == nullptr) + return toStringTree(t, std::vector<std::string>(), pretty); + return toStringTree(t, recog->getRuleNames(), pretty); +} + +std::string Trees::toStringTree(ParseTree *t, const std::vector<std::string> &ruleNames, bool pretty) { + std::string temp = antlrcpp::escapeWhitespace(Trees::getNodeText(t, ruleNames), false); + if (t->children.empty()) { + return temp; + } + + std::stringstream ss; + ss << "(" << temp << ' '; + + // Implement the recursive walk as iteration to avoid trouble with deep nesting. + std::stack<size_t> stack; + size_t childIndex = 0; + ParseTree *run = t; + size_t indentationLevel = 1; + while (childIndex < run->children.size()) { + if (childIndex > 0) { + ss << ' '; + } + ParseTree *child = run->children[childIndex]; + temp = antlrcpp::escapeWhitespace(Trees::getNodeText(child, ruleNames), false); + if (!child->children.empty()) { + // Go deeper one level. + stack.push(childIndex); + run = child; + childIndex = 0; + if (pretty) { + ++indentationLevel; + ss << std::endl; + for (size_t i = 0; i < indentationLevel; ++i) { + ss << " "; + } + } + ss << "(" << temp << " "; + } else { + ss << temp; + while (++childIndex == run->children.size()) { + if (stack.size() > 0) { + // Reached the end of the current level. See if we can step up from here. + childIndex = stack.top(); + stack.pop(); + run = run->parent; + if (pretty) { + --indentationLevel; + } + ss << ")"; + } else { + break; + } + } + } + } + + ss << ")"; + return ss.str(); +} + +std::string Trees::getNodeText(ParseTree *t, Parser *recog) { + return getNodeText(t, recog->getRuleNames()); +} + +std::string Trees::getNodeText(ParseTree *t, const std::vector<std::string> &ruleNames) { + if (ruleNames.size() > 0) { + if (is<RuleContext *>(t)) { + size_t ruleIndex = dynamic_cast<RuleContext *>(t)->getRuleIndex(); + std::string ruleName = ruleNames[ruleIndex]; + size_t altNumber = dynamic_cast<RuleContext *>(t)->getAltNumber(); + if (altNumber != atn::ATN::INVALID_ALT_NUMBER) { + return ruleName + ":" + std::to_string(altNumber); + } + return ruleName; + } else if (is<ErrorNode *>(t)) { + return t->toString(); + } else if (is<TerminalNode *>(t)) { + Token *symbol = dynamic_cast<TerminalNode *>(t)->getSymbol(); + if (symbol != nullptr) { + std::string s = symbol->getText(); + return s; + } + } + } + // no recog for rule names + if (is<RuleContext *>(t)) { + return dynamic_cast<RuleContext *>(t)->getText(); + } + + if (is<TerminalNodeImpl *>(t)) { + return dynamic_cast<TerminalNodeImpl *>(t)->getSymbol()->getText(); + } + + return ""; +} + +std::vector<ParseTree *> Trees::getAncestors(ParseTree *t) { + std::vector<ParseTree *> ancestors; + ParseTree *parent = t->parent; + while (parent != nullptr) { + ancestors.insert(ancestors.begin(), parent); // insert at start + parent = parent->parent; + } + return ancestors; +} + +template<typename T> +static void _findAllNodes(ParseTree *t, size_t index, bool findTokens, std::vector<T> &nodes) { + // check this node (the root) first + if (findTokens && is<TerminalNode *>(t)) { + TerminalNode *tnode = dynamic_cast<TerminalNode *>(t); + if (tnode->getSymbol()->getType() == index) { + nodes.push_back(t); + } + } else if (!findTokens && is<ParserRuleContext *>(t)) { + ParserRuleContext *ctx = dynamic_cast<ParserRuleContext *>(t); + if (ctx->getRuleIndex() == index) { + nodes.push_back(t); + } + } + // check children + for (size_t i = 0; i < t->children.size(); i++) { + _findAllNodes(t->children[i], index, findTokens, nodes); + } +} + +bool Trees::isAncestorOf(ParseTree *t, ParseTree *u) { + if (t == nullptr || u == nullptr || t->parent == nullptr) { + return false; + } + + ParseTree *p = u->parent; + while (p != nullptr) { + if (t == p) { + return true; + } + p = p->parent; + } + return false; +} + +std::vector<ParseTree *> Trees::findAllTokenNodes(ParseTree *t, size_t ttype) { + return findAllNodes(t, ttype, true); +} + +std::vector<ParseTree *> Trees::findAllRuleNodes(ParseTree *t, size_t ruleIndex) { + return findAllNodes(t, ruleIndex, false); +} + +std::vector<ParseTree *> Trees::findAllNodes(ParseTree *t, size_t index, bool findTokens) { + std::vector<ParseTree *> nodes; + _findAllNodes<ParseTree *>(t, index, findTokens, nodes); + return nodes; +} + +std::vector<ParseTree *> Trees::getDescendants(ParseTree *t) { + std::vector<ParseTree *> nodes; + nodes.push_back(t); + std::size_t n = t->children.size(); + for (size_t i = 0 ; i < n ; i++) { + auto descentants = getDescendants(t->children[i]); + for (auto *entry: descentants) { + nodes.push_back(entry); + } + } + return nodes; +} + +std::vector<ParseTree *> Trees::descendants(ParseTree *t) { + return getDescendants(t); +} + +ParserRuleContext* Trees::getRootOfSubtreeEnclosingRegion(ParseTree *t, size_t startTokenIndex, size_t stopTokenIndex) { + size_t n = t->children.size(); + for (size_t i = 0; i < n; i++) { + ParserRuleContext *r = getRootOfSubtreeEnclosingRegion(t->children[i], startTokenIndex, stopTokenIndex); + if (r != nullptr) { + return r; + } + } + + if (is<ParserRuleContext *>(t)) { + ParserRuleContext *r = dynamic_cast<ParserRuleContext *>(t); + if (startTokenIndex >= r->getStart()->getTokenIndex() && // is range fully contained in t? + (r->getStop() == nullptr || stopTokenIndex <= r->getStop()->getTokenIndex())) { + // note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right + return r; + } + } + return nullptr; +} + +ParseTree * Trees::findNodeSuchThat(ParseTree *t, Ref<Predicate> const& pred) { + if (pred->test(t)) { + return t; + } + + size_t n = t->children.size(); + for (size_t i = 0 ; i < n ; ++i) { + ParseTree *u = findNodeSuchThat(t->children[i], pred); + if (u != nullptr) { + return u; + } + } + + return nullptr; +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.h b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.h new file mode 100644 index 0000000000..f779158d01 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/Trees.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "tree/TerminalNode.h" +#include "ParserRuleContext.h" +#include "Recognizer.h" + +namespace antlr4 { +namespace tree { + + /// A set of utility routines useful for all kinds of ANTLR trees. + class ANTLR4CPP_PUBLIC Trees { + public: + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, bool pretty = false); + + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, Parser *recog, bool pretty = false); + + /// Print out a whole tree in LISP form. getNodeText is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static std::string toStringTree(ParseTree *t, const std::vector<std::string> &ruleNames, bool pretty = false); + static std::string getNodeText(ParseTree *t, Parser *recog); + static std::string getNodeText(ParseTree *t, const std::vector<std::string> &ruleNames); + + /// Return a list of all ancestors of this node. The first node of + /// list is the root and the last is the parent of this node. + static std::vector<ParseTree *> getAncestors(ParseTree *t); + + /** Return true if t is u's parent or a node on path to root from u. + * Use == not equals(). + * + * @since 4.5.1 + */ + static bool isAncestorOf(ParseTree *t, ParseTree *u); + static std::vector<ParseTree *> findAllTokenNodes(ParseTree *t, size_t ttype); + static std::vector<ParseTree *> findAllRuleNodes(ParseTree *t, size_t ruleIndex); + static std::vector<ParseTree *> findAllNodes(ParseTree *t, size_t index, bool findTokens); + + /** Get all descendents; includes t itself. + * + * @since 4.5.1 + */ + static std::vector<ParseTree *> getDescendants(ParseTree *t); + + /** @deprecated */ + static std::vector<ParseTree *> descendants(ParseTree *t); + + /** Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex + * inclusively using postorder traversal. Recursive depth-first-search. + * + * @since 4.5.1 + */ + static ParserRuleContext* getRootOfSubtreeEnclosingRegion(ParseTree *t, + size_t startTokenIndex, // inclusive + size_t stopTokenIndex); // inclusive + + /** Return first node satisfying the pred + * + * @since 4.5.1 + */ + static ParseTree* findNodeSuchThat(ParseTree *t, Ref<misc::Predicate> const& pred); + + private: + Trees(); + }; + +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.cpp new file mode 100644 index 0000000000..5320f910b9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.cpp @@ -0,0 +1,9 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/pattern/Chunk.h" + +antlr4::tree::pattern::Chunk::~Chunk() { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.h new file mode 100644 index 0000000000..61079a8ca8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/Chunk.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A chunk is either a token tag, a rule tag, or a span of literal text within a + /// tree pattern. + /// <p/> + /// The method <seealso cref="ParseTreePatternMatcher#split(String)"/> returns a list of + /// chunks in preparation for creating a token stream by + /// <seealso cref="ParseTreePatternMatcher#tokenize(String)"/>. From there, we get a parse + /// tree from with <seealso cref="ParseTreePatternMatcher#compile(String, int)"/>. These + /// chunks are converted to <seealso cref="RuleTagToken"/>, <seealso cref="TokenTagToken"/>, or the + /// regular tokens of the text surrounding the tags. + /// </summary> + class ANTLR4CPP_PUBLIC Chunk { + public: + Chunk() = default; + Chunk(Chunk const&) = default; + virtual ~Chunk(); + + Chunk& operator=(Chunk const&) = default; + + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + virtual std::string toString() { + std::string str; + return str; + } + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.cpp new file mode 100644 index 0000000000..41896d6df7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.cpp @@ -0,0 +1,69 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/ParseTreeMatch.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::pattern; + +ParseTreeMatch::ParseTreeMatch(ParseTree *tree, const ParseTreePattern &pattern, + const std::map<std::string, std::vector<ParseTree *>> &labels, + ParseTree *mismatchedNode) + : _tree(tree), _pattern(pattern), _labels(labels), _mismatchedNode(mismatchedNode) { + if (tree == nullptr) { + throw IllegalArgumentException("tree cannot be nul"); + } +} + +ParseTreeMatch::~ParseTreeMatch() { +} + +ParseTree* ParseTreeMatch::get(const std::string &label) { + auto iterator = _labels.find(label); + if (iterator == _labels.end() || iterator->second.empty()) { + return nullptr; + } + + return iterator->second.back(); // return last if multiple +} + +std::vector<ParseTree *> ParseTreeMatch::getAll(const std::string &label) { + auto iterator = _labels.find(label); + if (iterator == _labels.end()) { + return {}; + } + + return iterator->second; +} + +std::map<std::string, std::vector<ParseTree *>>& ParseTreeMatch::getLabels() { + return _labels; +} + +ParseTree *ParseTreeMatch::getMismatchedNode() { + return _mismatchedNode; +} + +bool ParseTreeMatch::succeeded() { + return _mismatchedNode == nullptr; +} + +const ParseTreePattern& ParseTreeMatch::getPattern() { + return _pattern; +} + +ParseTree * ParseTreeMatch::getTree() { + return _tree; +} + +std::string ParseTreeMatch::toString() { + if (succeeded()) { + return "Match succeeded; found " + std::to_string(_labels.size()) + " labels"; + } else { + return "Match failed; found " + std::to_string(_labels.size()) + " labels"; + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.h new file mode 100644 index 0000000000..eefde46c83 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreeMatch.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// Represents the result of matching a ParseTree against a tree pattern. + class ANTLR4CPP_PUBLIC ParseTreeMatch { + private: + /// This is the backing field for getTree(). + ParseTree *_tree; + + /// This is the backing field for getPattern(). + const ParseTreePattern &_pattern; + + /// This is the backing field for getLabels(). + std::map<std::string, std::vector<ParseTree *>> _labels; + + /// This is the backing field for getMismatchedNode(). + ParseTree *_mismatchedNode; + + public: + /// <summary> + /// Constructs a new instance of <seealso cref="ParseTreeMatch"/> from the specified + /// parse tree and pattern. + /// </summary> + /// <param name="tree"> The parse tree to match against the pattern. </param> + /// <param name="pattern"> The parse tree pattern. </param> + /// <param name="labels"> A mapping from label names to collections of + /// <seealso cref="ParseTree"/> objects located by the tree pattern matching process. </param> + /// <param name="mismatchedNode"> The first node which failed to match the tree + /// pattern during the matching process. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code tree} is {@code null} </exception> + /// <exception cref="IllegalArgumentException"> if {@code pattern} is {@code null} </exception> + /// <exception cref="IllegalArgumentException"> if {@code labels} is {@code null} </exception> + ParseTreeMatch(ParseTree *tree, ParseTreePattern const& pattern, + const std::map<std::string, std::vector<ParseTree *>> &labels, ParseTree *mismatchedNode); + ParseTreeMatch(ParseTreeMatch const&) = default; + virtual ~ParseTreeMatch(); + + /// <summary> + /// Get the last node associated with a specific {@code label}. + /// <p/> + /// For example, for pattern {@code <id:ID>}, {@code get("id")} returns the + /// node matched for that {@code ID}. If more than one node + /// matched the specified label, only the last is returned. If there is + /// no node associated with the label, this returns {@code null}. + /// <p/> + /// Pattern tags like {@code <ID>} and {@code <expr>} without labels are + /// considered to be labeled with {@code ID} and {@code expr}, respectively. + /// </summary> + /// <param name="labe"> The label to check. + /// </param> + /// <returns> The last <seealso cref="ParseTree"/> to match a tag with the specified + /// label, or {@code null} if no parse tree matched a tag with the label. </returns> + virtual ParseTree* get(const std::string &label); + + /// <summary> + /// Return all nodes matching a rule or token tag with the specified label. + /// <p/> + /// If the {@code label} is the name of a parser rule or token in the + /// grammar, the resulting list will contain both the parse trees matching + /// rule or tags explicitly labeled with the label and the complete set of + /// parse trees matching the labeled and unlabeled tags in the pattern for + /// the parser rule or token. For example, if {@code label} is {@code "foo"}, + /// the result will contain <em>all</em> of the following. + /// + /// <ul> + /// <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and + /// {@code <foo:AnyTokenName>}.</li> + /// <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li> + /// <li>Parse tree nodes matching tags of the form {@code <foo>}.</li> + /// </ul> + /// </summary> + /// <param name="labe"> The label. + /// </param> + /// <returns> A collection of all <seealso cref="ParseTree"/> nodes matching tags with + /// the specified {@code label}. If no nodes matched the label, an empty list + /// is returned. </returns> + virtual std::vector<ParseTree *> getAll(const std::string &label); + + /// <summary> + /// Return a mapping from label → [list of nodes]. + /// <p/> + /// The map includes special entries corresponding to the names of rules and + /// tokens referenced in tags in the original pattern. For additional + /// information, see the description of <seealso cref="#getAll(String)"/>. + /// </summary> + /// <returns> A mapping from labels to parse tree nodes. If the parse tree + /// pattern did not contain any rule or token tags, this map will be empty. </returns> + virtual std::map<std::string, std::vector<ParseTree *>>& getLabels(); + + /// <summary> + /// Get the node at which we first detected a mismatch. + /// </summary> + /// <returns> the node at which we first detected a mismatch, or {@code null} + /// if the match was successful. </returns> + virtual ParseTree* getMismatchedNode(); + + /// <summary> + /// Gets a value indicating whether the match operation succeeded. + /// </summary> + /// <returns> {@code true} if the match operation succeeded; otherwise, + /// {@code false}. </returns> + virtual bool succeeded(); + + /// <summary> + /// Get the tree pattern we are matching against. + /// </summary> + /// <returns> The tree pattern we are matching against. </returns> + virtual const ParseTreePattern& getPattern(); + + /// <summary> + /// Get the parse tree we are trying to match to a pattern. + /// </summary> + /// <returns> The <seealso cref="ParseTree"/> we are trying to match to a pattern. </returns> + virtual ParseTree* getTree(); + + virtual std::string toString(); + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.cpp new file mode 100644 index 0000000000..ca7f8f20d6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/pattern/ParseTreePatternMatcher.h" +#include "tree/pattern/ParseTreeMatch.h" + +#include "tree/xpath/XPath.h" +#include "tree/xpath/XPathElement.h" + +#include "tree/pattern/ParseTreePattern.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::pattern; + +using namespace antlrcpp; + +ParseTreePattern::ParseTreePattern(ParseTreePatternMatcher *matcher, const std::string &pattern, int patternRuleIndex_, + ParseTree *patternTree) + : patternRuleIndex(patternRuleIndex_), _pattern(pattern), _patternTree(patternTree), _matcher(matcher) { +} + +ParseTreePattern::~ParseTreePattern() { +} + +ParseTreeMatch ParseTreePattern::match(ParseTree *tree) { + return _matcher->match(tree, *this); +} + +bool ParseTreePattern::matches(ParseTree *tree) { + return _matcher->match(tree, *this).succeeded(); +} + +std::vector<ParseTreeMatch> ParseTreePattern::findAll(ParseTree *tree, const std::string &xpath) { + xpath::XPath finder(_matcher->getParser(), xpath); + std::vector<ParseTree *> subtrees = finder.evaluate(tree); + std::vector<ParseTreeMatch> matches; + for (auto *t : subtrees) { + ParseTreeMatch aMatch = match(t); + if (aMatch.succeeded()) { + matches.push_back(aMatch); + } + } + return matches; +} + + +ParseTreePatternMatcher *ParseTreePattern::getMatcher() const { + return _matcher; +} + +std::string ParseTreePattern::getPattern() const { + return _pattern; +} + +int ParseTreePattern::getPatternRuleIndex() const { + return patternRuleIndex; +} + +ParseTree* ParseTreePattern::getPatternTree() const { + return _patternTree; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.h new file mode 100644 index 0000000000..d5b86ff473 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePattern.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A pattern like {@code <ID> = <expr>;} converted to a <seealso cref="ParseTree"/> by + /// <seealso cref="ParseTreePatternMatcher#compile(String, int)"/>. + /// </summary> + class ANTLR4CPP_PUBLIC ParseTreePattern { + public: + /// <summary> + /// Construct a new instance of the <seealso cref="ParseTreePattern"/> class. + /// </summary> + /// <param name="matcher"> The <seealso cref="ParseTreePatternMatcher"/> which created this + /// tree pattern. </param> + /// <param name="pattern"> The tree pattern in concrete syntax form. </param> + /// <param name="patternRuleIndex"> The parser rule which serves as the root of the + /// tree pattern. </param> + /// <param name="patternTree"> The tree pattern in <seealso cref="ParseTree"/> form. </param> + ParseTreePattern(ParseTreePatternMatcher *matcher, const std::string &pattern, int patternRuleIndex, + ParseTree *patternTree); + ParseTreePattern(ParseTreePattern const&) = default; + virtual ~ParseTreePattern(); + + /// <summary> + /// Match a specific parse tree against this tree pattern. + /// </summary> + /// <param name="tree"> The parse tree to match against this tree pattern. </param> + /// <returns> A <seealso cref="ParseTreeMatch"/> object describing the result of the + /// match operation. The <seealso cref="ParseTreeMatch#succeeded()"/> method can be + /// used to determine whether or not the match was successful. </returns> + virtual ParseTreeMatch match(ParseTree *tree); + + /// <summary> + /// Determine whether or not a parse tree matches this tree pattern. + /// </summary> + /// <param name="tree"> The parse tree to match against this tree pattern. </param> + /// <returns> {@code true} if {@code tree} is a match for the current tree + /// pattern; otherwise, {@code false}. </returns> + virtual bool matches(ParseTree *tree); + + /// Find all nodes using XPath and then try to match those subtrees against + /// this tree pattern. + /// @param tree The ParseTree to match against this pattern. + /// @param xpath An expression matching the nodes + /// + /// @returns A collection of ParseTreeMatch objects describing the + /// successful matches. Unsuccessful matches are omitted from the result, + /// regardless of the reason for the failure. + virtual std::vector<ParseTreeMatch> findAll(ParseTree *tree, const std::string &xpath); + + /// <summary> + /// Get the <seealso cref="ParseTreePatternMatcher"/> which created this tree pattern. + /// </summary> + /// <returns> The <seealso cref="ParseTreePatternMatcher"/> which created this tree + /// pattern. </returns> + virtual ParseTreePatternMatcher *getMatcher() const; + + /// <summary> + /// Get the tree pattern in concrete syntax form. + /// </summary> + /// <returns> The tree pattern in concrete syntax form. </returns> + virtual std::string getPattern() const; + + /// <summary> + /// Get the parser rule which serves as the outermost rule for the tree + /// pattern. + /// </summary> + /// <returns> The parser rule which serves as the outermost rule for the tree + /// pattern. </returns> + virtual int getPatternRuleIndex() const; + + /// <summary> + /// Get the tree pattern as a <seealso cref="ParseTree"/>. The rule and token tags from + /// the pattern are present in the parse tree as terminal nodes with a symbol + /// of type <seealso cref="RuleTagToken"/> or <seealso cref="TokenTagToken"/>. + /// </summary> + /// <returns> The tree pattern as a <seealso cref="ParseTree"/>. </returns> + virtual ParseTree* getPatternTree() const; + + private: + const int patternRuleIndex; + + /// This is the backing field for <seealso cref="#getPattern()"/>. + const std::string _pattern; + + /// This is the backing field for <seealso cref="#getPatternTree()"/>. + ParseTree *_patternTree; + + /// This is the backing field for <seealso cref="#getMatcher()"/>. + ParseTreePatternMatcher *const _matcher; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.cpp new file mode 100644 index 0000000000..4c28658954 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.cpp @@ -0,0 +1,370 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/pattern/ParseTreePattern.h" +#include "tree/pattern/ParseTreeMatch.h" +#include "tree/TerminalNode.h" +#include "CommonTokenStream.h" +#include "ParserInterpreter.h" +#include "tree/pattern/TokenTagToken.h" +#include "ParserRuleContext.h" +#include "tree/pattern/RuleTagToken.h" +#include "tree/pattern/TagChunk.h" +#include "atn/ATN.h" +#include "Lexer.h" +#include "BailErrorStrategy.h" + +#include "ListTokenSource.h" +#include "tree/pattern/TextChunk.h" +#include "ANTLRInputStream.h" +#include "support/Arrays.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "tree/pattern/ParseTreePatternMatcher.h" + +using namespace antlr4; +using namespace antlr4::tree; +using namespace antlr4::tree::pattern; +using namespace antlrcpp; + +ParseTreePatternMatcher::CannotInvokeStartRule::CannotInvokeStartRule(const RuntimeException &e) : RuntimeException(e.what()) { +} + +ParseTreePatternMatcher::CannotInvokeStartRule::~CannotInvokeStartRule() { +} + +ParseTreePatternMatcher::StartRuleDoesNotConsumeFullPattern::~StartRuleDoesNotConsumeFullPattern() { +} + +ParseTreePatternMatcher::ParseTreePatternMatcher(Lexer *lexer, Parser *parser) : _lexer(lexer), _parser(parser) { + InitializeInstanceFields(); +} + +ParseTreePatternMatcher::~ParseTreePatternMatcher() { +} + +void ParseTreePatternMatcher::setDelimiters(const std::string &start, const std::string &stop, const std::string &escapeLeft) { + if (start.empty()) { + throw IllegalArgumentException("start cannot be null or empty"); + } + + if (stop.empty()) { + throw IllegalArgumentException("stop cannot be null or empty"); + } + + _start = start; + _stop = stop; + _escape = escapeLeft; +} + +bool ParseTreePatternMatcher::matches(ParseTree *tree, const std::string &pattern, int patternRuleIndex) { + ParseTreePattern p = compile(pattern, patternRuleIndex); + return matches(tree, p); +} + +bool ParseTreePatternMatcher::matches(ParseTree *tree, const ParseTreePattern &pattern) { + std::map<std::string, std::vector<ParseTree *>> labels; + ParseTree *mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels); + return mismatchedNode == nullptr; +} + +ParseTreeMatch ParseTreePatternMatcher::match(ParseTree *tree, const std::string &pattern, int patternRuleIndex) { + ParseTreePattern p = compile(pattern, patternRuleIndex); + return match(tree, p); +} + +ParseTreeMatch ParseTreePatternMatcher::match(ParseTree *tree, const ParseTreePattern &pattern) { + std::map<std::string, std::vector<ParseTree *>> labels; + tree::ParseTree *mismatchedNode = matchImpl(tree, pattern.getPatternTree(), labels); + return ParseTreeMatch(tree, pattern, labels, mismatchedNode); +} + +ParseTreePattern ParseTreePatternMatcher::compile(const std::string &pattern, int patternRuleIndex) { + ListTokenSource tokenSrc(tokenize(pattern)); + CommonTokenStream tokens(&tokenSrc); + + ParserInterpreter parserInterp(_parser->getGrammarFileName(), _parser->getVocabulary(), + _parser->getRuleNames(), _parser->getATNWithBypassAlts(), &tokens); + + ParserRuleContext *tree = nullptr; + try { + parserInterp.setErrorHandler(std::make_shared<BailErrorStrategy>()); + tree = parserInterp.parse(patternRuleIndex); + } catch (ParseCancellationException &e) { +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + // rethrow_if_nested is not available before VS 2015. + throw e; +#else + std::rethrow_if_nested(e); // Unwrap the nested exception. +#endif + } catch (RecognitionException &re) { + throw re; +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (std::exception &e) { + // throw_with_nested is not available before VS 2015. + throw e; +#else + } catch (std::exception & /*e*/) { + std::throw_with_nested(RuntimeException("Cannot invoke start rule")); // Wrap any other exception. +#endif + } + + // Make sure tree pattern compilation checks for a complete parse + if (tokens.LA(1) != Token::EOF) { + throw StartRuleDoesNotConsumeFullPattern(); + } + + return ParseTreePattern(this, pattern, patternRuleIndex, tree); +} + +Lexer* ParseTreePatternMatcher::getLexer() { + return _lexer; +} + +Parser* ParseTreePatternMatcher::getParser() { + return _parser; +} + +ParseTree* ParseTreePatternMatcher::matchImpl(ParseTree *tree, ParseTree *patternTree, + std::map<std::string, std::vector<ParseTree *>> &labels) { + if (tree == nullptr) { + throw IllegalArgumentException("tree cannot be nul"); + } + + if (patternTree == nullptr) { + throw IllegalArgumentException("patternTree cannot be nul"); + } + + // x and <ID>, x and y, or x and x; or could be mismatched types + if (is<TerminalNode *>(tree) && is<TerminalNode *>(patternTree)) { + TerminalNode *t1 = dynamic_cast<TerminalNode *>(tree); + TerminalNode *t2 = dynamic_cast<TerminalNode *>(patternTree); + + ParseTree *mismatchedNode = nullptr; + // both are tokens and they have same type + if (t1->getSymbol()->getType() == t2->getSymbol()->getType()) { + if (is<TokenTagToken *>(t2->getSymbol())) { // x and <ID> + TokenTagToken *tokenTagToken = dynamic_cast<TokenTagToken *>(t2->getSymbol()); + + // track label->list-of-nodes for both token name and label (if any) + labels[tokenTagToken->getTokenName()].push_back(tree); + if (tokenTagToken->getLabel() != "") { + labels[tokenTagToken->getLabel()].push_back(tree); + } + } else if (t1->getText() == t2->getText()) { + // x and x + } else { + // x and y + if (mismatchedNode == nullptr) { + mismatchedNode = t1; + } + } + } else { + if (mismatchedNode == nullptr) { + mismatchedNode = t1; + } + } + + return mismatchedNode; + } + + if (is<ParserRuleContext *>(tree) && is<ParserRuleContext *>(patternTree)) { + ParserRuleContext *r1 = dynamic_cast<ParserRuleContext *>(tree); + ParserRuleContext *r2 = dynamic_cast<ParserRuleContext *>(patternTree); + ParseTree *mismatchedNode = nullptr; + + // (expr ...) and <expr> + RuleTagToken *ruleTagToken = getRuleTagToken(r2); + if (ruleTagToken != nullptr) { + //ParseTreeMatch *m = nullptr; // unused? + if (r1->getRuleIndex() == r2->getRuleIndex()) { + // track label->list-of-nodes for both rule name and label (if any) + labels[ruleTagToken->getRuleName()].push_back(tree); + if (ruleTagToken->getLabel() != "") { + labels[ruleTagToken->getLabel()].push_back(tree); + } + } else { + if (!mismatchedNode) { + mismatchedNode = r1; + } + } + + return mismatchedNode; + } + + // (expr ...) and (expr ...) + if (r1->children.size() != r2->children.size()) { + if (mismatchedNode == nullptr) { + mismatchedNode = r1; + } + + return mismatchedNode; + } + + std::size_t n = r1->children.size(); + for (size_t i = 0; i < n; i++) { + ParseTree *childMatch = matchImpl(r1->children[i], patternTree->children[i], labels); + if (childMatch) { + return childMatch; + } + } + + return mismatchedNode; + } + + // if nodes aren't both tokens or both rule nodes, can't match + return tree; +} + +RuleTagToken* ParseTreePatternMatcher::getRuleTagToken(ParseTree *t) { + if (t->children.size() == 1 && is<TerminalNode *>(t->children[0])) { + TerminalNode *c = dynamic_cast<TerminalNode *>(t->children[0]); + if (is<RuleTagToken *>(c->getSymbol())) { + return dynamic_cast<RuleTagToken *>(c->getSymbol()); + } + } + return nullptr; +} + +std::vector<std::unique_ptr<Token>> ParseTreePatternMatcher::tokenize(const std::string &pattern) { + // split pattern into chunks: sea (raw input) and islands (<ID>, <expr>) + std::vector<Chunk> chunks = split(pattern); + + // create token stream from text and tags + std::vector<std::unique_ptr<Token>> tokens; + for (auto chunk : chunks) { + if (is<TagChunk *>(&chunk)) { + TagChunk &tagChunk = (TagChunk&)chunk; + // add special rule token or conjure up new token from name + if (isupper(tagChunk.getTag()[0])) { + size_t ttype = _parser->getTokenType(tagChunk.getTag()); + if (ttype == Token::INVALID_TYPE) { + throw IllegalArgumentException("Unknown token " + tagChunk.getTag() + " in pattern: " + pattern); + } + tokens.emplace_back(new TokenTagToken(tagChunk.getTag(), (int)ttype, tagChunk.getLabel())); + } else if (islower(tagChunk.getTag()[0])) { + size_t ruleIndex = _parser->getRuleIndex(tagChunk.getTag()); + if (ruleIndex == INVALID_INDEX) { + throw IllegalArgumentException("Unknown rule " + tagChunk.getTag() + " in pattern: " + pattern); + } + size_t ruleImaginaryTokenType = _parser->getATNWithBypassAlts().ruleToTokenType[ruleIndex]; + tokens.emplace_back(new RuleTagToken(tagChunk.getTag(), ruleImaginaryTokenType, tagChunk.getLabel())); + } else { + throw IllegalArgumentException("invalid tag: " + tagChunk.getTag() + " in pattern: " + pattern); + } + } else { + TextChunk &textChunk = (TextChunk&)chunk; + ANTLRInputStream input(textChunk.getText()); + _lexer->setInputStream(&input); + std::unique_ptr<Token> t(_lexer->nextToken()); + while (t->getType() != Token::EOF) { + tokens.push_back(std::move(t)); + t = _lexer->nextToken(); + } + _lexer->setInputStream(nullptr); + } + } + + return tokens; +} + +std::vector<Chunk> ParseTreePatternMatcher::split(const std::string &pattern) { + size_t p = 0; + size_t n = pattern.length(); + std::vector<Chunk> chunks; + + // find all start and stop indexes first, then collect + std::vector<size_t> starts; + std::vector<size_t> stops; + while (p < n) { + if (p == pattern.find(_escape + _start,p)) { + p += _escape.length() + _start.length(); + } else if (p == pattern.find(_escape + _stop,p)) { + p += _escape.length() + _stop.length(); + } else if (p == pattern.find(_start,p)) { + starts.push_back(p); + p += _start.length(); + } else if (p == pattern.find(_stop,p)) { + stops.push_back(p); + p += _stop.length(); + } else { + p++; + } + } + + if (starts.size() > stops.size()) { + throw IllegalArgumentException("unterminated tag in pattern: " + pattern); + } + + if (starts.size() < stops.size()) { + throw IllegalArgumentException("missing start tag in pattern: " + pattern); + } + + size_t ntags = starts.size(); + for (size_t i = 0; i < ntags; i++) { + if (starts[i] >= stops[i]) { + throw IllegalArgumentException("tag delimiters out of order in pattern: " + pattern); + } + } + + // collect into chunks now + if (ntags == 0) { + std::string text = pattern.substr(0, n); + chunks.push_back(TextChunk(text)); + } + + if (ntags > 0 && starts[0] > 0) { // copy text up to first tag into chunks + std::string text = pattern.substr(0, starts[0]); + chunks.push_back(TextChunk(text)); + } + + for (size_t i = 0; i < ntags; i++) { + // copy inside of <tag> + std::string tag = pattern.substr(starts[i] + _start.length(), stops[i] - (starts[i] + _start.length())); + std::string ruleOrToken = tag; + std::string label = ""; + size_t colon = tag.find(':'); + if (colon != std::string::npos) { + label = tag.substr(0,colon); + ruleOrToken = tag.substr(colon + 1, tag.length() - (colon + 1)); + } + chunks.push_back(TagChunk(label, ruleOrToken)); + if (i + 1 < ntags) { + // copy from end of <tag> to start of next + std::string text = pattern.substr(stops[i] + _stop.length(), starts[i + 1] - (stops[i] + _stop.length())); + chunks.push_back(TextChunk(text)); + } + } + + if (ntags > 0) { + size_t afterLastTag = stops[ntags - 1] + _stop.length(); + if (afterLastTag < n) { // copy text from end of last tag to end + std::string text = pattern.substr(afterLastTag, n - afterLastTag); + chunks.push_back(TextChunk(text)); + } + } + + // strip out all backslashes from text chunks but not tags + for (size_t i = 0; i < chunks.size(); i++) { + Chunk &c = chunks[i]; + if (is<TextChunk *>(&c)) { + TextChunk &tc = (TextChunk&)c; + std::string unescaped = tc.getText(); + unescaped.erase(std::remove(unescaped.begin(), unescaped.end(), '\\'), unescaped.end()); + if (unescaped.length() < tc.getText().length()) { + chunks[i] = TextChunk(unescaped); + } + } + } + + return chunks; +} + +void ParseTreePatternMatcher::InitializeInstanceFields() { + _start = "<"; + _stop = ">"; + _escape = "\\"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.h new file mode 100644 index 0000000000..8641fc9a00 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/ParseTreePatternMatcher.h @@ -0,0 +1,185 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Exceptions.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A tree pattern matching mechanism for ANTLR <seealso cref="ParseTree"/>s. + /// <p/> + /// Patterns are strings of source input text with special tags representing + /// token or rule references such as: + /// <p/> + /// {@code <ID> = <expr>;} + /// <p/> + /// Given a pattern start rule such as {@code statement}, this object constructs + /// a <seealso cref="ParseTree"/> with placeholders for the {@code ID} and {@code expr} + /// subtree. Then the <seealso cref="#match"/> routines can compare an actual + /// <seealso cref="ParseTree"/> from a parse with this pattern. Tag {@code <ID>} matches + /// any {@code ID} token and tag {@code <expr>} references the result of the + /// {@code expr} rule (generally an instance of {@code ExprContext}. + /// <p/> + /// Pattern {@code x = 0;} is a similar pattern that matches the same pattern + /// except that it requires the identifier to be {@code x} and the expression to + /// be {@code 0}. + /// <p/> + /// The <seealso cref="#matches"/> routines return {@code true} or {@code false} based + /// upon a match for the tree rooted at the parameter sent in. The + /// <seealso cref="#match"/> routines return a <seealso cref="ParseTreeMatch"/> object that + /// contains the parse tree, the parse tree pattern, and a map from tag name to + /// matched nodes (more below). A subtree that fails to match, returns with + /// <seealso cref="ParseTreeMatch#mismatchedNode"/> set to the first tree node that did not + /// match. + /// <p/> + /// For efficiency, you can compile a tree pattern in string form to a + /// <seealso cref="ParseTreePattern"/> object. + /// <p/> + /// See {@code TestParseTreeMatcher} for lots of examples. + /// <seealso cref="ParseTreePattern"/> has two static helper methods: + /// <seealso cref="ParseTreePattern#findAll"/> and <seealso cref="ParseTreePattern#match"/> that + /// are easy to use but not super efficient because they create new + /// <seealso cref="ParseTreePatternMatcher"/> objects each time and have to compile the + /// pattern in string form before using it. + /// <p/> + /// The lexer and parser that you pass into the <seealso cref="ParseTreePatternMatcher"/> + /// constructor are used to parse the pattern in string form. The lexer converts + /// the {@code <ID> = <expr>;} into a sequence of four tokens (assuming lexer + /// throws out whitespace or puts it on a hidden channel). Be aware that the + /// input stream is reset for the lexer (but not the parser; a + /// <seealso cref="ParserInterpreter"/> is created to parse the input.). Any user-defined + /// fields you have put into the lexer might get changed when this mechanism asks + /// it to scan the pattern string. + /// <p/> + /// Normally a parser does not accept token {@code <expr>} as a valid + /// {@code expr} but, from the parser passed in, we create a special version of + /// the underlying grammar representation (an <seealso cref="ATN"/>) that allows imaginary + /// tokens representing rules ({@code <expr>}) to match entire rules. We call + /// these <em>bypass alternatives</em>. + /// <p/> + /// Delimiters are {@code <} and {@code >}, with {@code \} as the escape string + /// by default, but you can set them to whatever you want using + /// <seealso cref="#setDelimiters"/>. You must escape both start and stop strings + /// {@code \<} and {@code \>}. + /// </summary> + class ANTLR4CPP_PUBLIC ParseTreePatternMatcher { + public: + class CannotInvokeStartRule : public RuntimeException { + public: + CannotInvokeStartRule(const RuntimeException &e); + ~CannotInvokeStartRule(); + }; + + // Fixes https://github.com/antlr/antlr4/issues/413 + // "Tree pattern compilation doesn't check for a complete parse" + class StartRuleDoesNotConsumeFullPattern : public RuntimeException { + public: + StartRuleDoesNotConsumeFullPattern() = default; + StartRuleDoesNotConsumeFullPattern(StartRuleDoesNotConsumeFullPattern const&) = default; + ~StartRuleDoesNotConsumeFullPattern(); + + StartRuleDoesNotConsumeFullPattern& operator=(StartRuleDoesNotConsumeFullPattern const&) = default; + }; + + /// Constructs a <seealso cref="ParseTreePatternMatcher"/> or from a <seealso cref="Lexer"/> and + /// <seealso cref="Parser"/> object. The lexer input stream is altered for tokenizing + /// the tree patterns. The parser is used as a convenient mechanism to get + /// the grammar name, plus token, rule names. + ParseTreePatternMatcher(Lexer *lexer, Parser *parser); + virtual ~ParseTreePatternMatcher(); + + /// <summary> + /// Set the delimiters used for marking rule and token tags within concrete + /// syntax used by the tree pattern parser. + /// </summary> + /// <param name="start"> The start delimiter. </param> + /// <param name="stop"> The stop delimiter. </param> + /// <param name="escapeLeft"> The escape sequence to use for escaping a start or stop delimiter. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code start} is {@code null} or empty. </exception> + /// <exception cref="IllegalArgumentException"> if {@code stop} is {@code null} or empty. </exception> + virtual void setDelimiters(const std::string &start, const std::string &stop, const std::string &escapeLeft); + + /// <summary> + /// Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}? </summary> + virtual bool matches(ParseTree *tree, const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a + /// compiled pattern instead of a string representation of a tree pattern. + /// </summary> + virtual bool matches(ParseTree *tree, const ParseTreePattern &pattern); + + /// <summary> + /// Compare {@code pattern} matched as rule {@code patternRuleIndex} against + /// {@code tree} and return a <seealso cref="ParseTreeMatch"/> object that contains the + /// matched elements, or the node at which the match failed. + /// </summary> + virtual ParseTreeMatch match(ParseTree *tree, const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// Compare {@code pattern} matched against {@code tree} and return a + /// <seealso cref="ParseTreeMatch"/> object that contains the matched elements, or the + /// node at which the match failed. Pass in a compiled pattern instead of a + /// string representation of a tree pattern. + /// </summary> + virtual ParseTreeMatch match(ParseTree *tree, const ParseTreePattern &pattern); + + /// <summary> + /// For repeated use of a tree pattern, compile it to a + /// <seealso cref="ParseTreePattern"/> using this method. + /// </summary> + virtual ParseTreePattern compile(const std::string &pattern, int patternRuleIndex); + + /// <summary> + /// Used to convert the tree pattern string into a series of tokens. The + /// input stream is reset. + /// </summary> + virtual Lexer* getLexer(); + + /// <summary> + /// Used to collect to the grammar file name, token names, rule names for + /// used to parse the pattern into a parse tree. + /// </summary> + virtual Parser* getParser(); + + // ---- SUPPORT CODE ---- + + virtual std::vector<std::unique_ptr<Token>> tokenize(const std::string &pattern); + + /// Split "<ID> = <e:expr>;" into 4 chunks for tokenizing by tokenize(). + virtual std::vector<Chunk> split(const std::string &pattern); + + protected: + std::string _start; + std::string _stop; + std::string _escape; // e.g., \< and \> must escape BOTH! + + /// Recursively walk {@code tree} against {@code patternTree}, filling + /// {@code match.}<seealso cref="ParseTreeMatch#labels labels"/>. + /// + /// <returns> the first node encountered in {@code tree} which does not match + /// a corresponding node in {@code patternTree}, or {@code null} if the match + /// was successful. The specific node returned depends on the matching + /// algorithm used by the implementation, and may be overridden. </returns> + virtual ParseTree* matchImpl(ParseTree *tree, ParseTree *patternTree, std::map<std::string, std::vector<ParseTree *>> &labels); + + /// Is t <expr> subtree? + virtual RuleTagToken* getRuleTagToken(ParseTree *t); + + private: + Lexer *_lexer; + Parser *_parser; + + void InitializeInstanceFields(); + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.cpp new file mode 100644 index 0000000000..6f3fb73446 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.cpp @@ -0,0 +1,77 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/RuleTagToken.h" + +using namespace antlr4::tree::pattern; + +RuleTagToken::RuleTagToken(const std::string &/*ruleName*/, int _bypassTokenType) : bypassTokenType(_bypassTokenType) { +} + +RuleTagToken::RuleTagToken(const std::string &ruleName, size_t bypassTokenType, const std::string &label) + : ruleName(ruleName), bypassTokenType(bypassTokenType), label(label) { + if (ruleName.empty()) { + throw IllegalArgumentException("ruleName cannot be null or empty."); + } + +} + +std::string RuleTagToken::getRuleName() const { + return ruleName; +} + +std::string RuleTagToken::getLabel() const { + return label; +} + +size_t RuleTagToken::getChannel() const { + return DEFAULT_CHANNEL; +} + +std::string RuleTagToken::getText() const { + if (label != "") { + return std::string("<") + label + std::string(":") + ruleName + std::string(">"); + } + + return std::string("<") + ruleName + std::string(">"); +} + +size_t RuleTagToken::getType() const { + return bypassTokenType; +} + +size_t RuleTagToken::getLine() const { + return 0; +} + +size_t RuleTagToken::getCharPositionInLine() const { + return INVALID_INDEX; +} + +size_t RuleTagToken::getTokenIndex() const { + return INVALID_INDEX; +} + +size_t RuleTagToken::getStartIndex() const { + return INVALID_INDEX; +} + +size_t RuleTagToken::getStopIndex() const { + return INVALID_INDEX; +} + +antlr4::TokenSource *RuleTagToken::getTokenSource() const { + return nullptr; +} + +antlr4::CharStream *RuleTagToken::getInputStream() const { + return nullptr; +} + +std::string RuleTagToken::toString() const { + return ruleName + ":" + std::to_string(bypassTokenType); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.h new file mode 100644 index 0000000000..cb0e50399e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/RuleTagToken.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A <seealso cref="Token"/> object representing an entire subtree matched by a parser + /// rule; e.g., {@code <expr>}. These tokens are created for <seealso cref="TagChunk"/> + /// chunks where the tag corresponds to a parser rule. + /// </summary> + class ANTLR4CPP_PUBLIC RuleTagToken : public Token { + /// <summary> + /// This is the backing field for <seealso cref="#getRuleName"/>. + /// </summary> + private: + const std::string ruleName; + + /// The token type for the current token. This is the token type assigned to + /// the bypass alternative for the rule during ATN deserialization. + const size_t bypassTokenType; + + /// This is the backing field for <seealso cref="#getLabe"/>. + const std::string label; + + public: + /// <summary> + /// Constructs a new instance of <seealso cref="RuleTagToken"/> with the specified rule + /// name and bypass token type and no label. + /// </summary> + /// <param name="ruleName"> The name of the parser rule this rule tag matches. </param> + /// <param name="bypassTokenType"> The bypass token type assigned to the parser rule. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code ruleName} is {@code null} + /// or empty. </exception> + RuleTagToken(const std::string &ruleName, int bypassTokenType); //this(ruleName, bypassTokenType, nullptr); + + /// <summary> + /// Constructs a new instance of <seealso cref="RuleTagToken"/> with the specified rule + /// name, bypass token type, and label. + /// </summary> + /// <param name="ruleName"> The name of the parser rule this rule tag matches. </param> + /// <param name="bypassTokenType"> The bypass token type assigned to the parser rule. </param> + /// <param name="label"> The label associated with the rule tag, or {@code null} if + /// the rule tag is unlabeled. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code ruleName} is {@code null} + /// or empty. </exception> + RuleTagToken(const std::string &ruleName, size_t bypassTokenType, const std::string &label); + + /// <summary> + /// Gets the name of the rule associated with this rule tag. + /// </summary> + /// <returns> The name of the parser rule associated with this rule tag. </returns> + std::string getRuleName() const; + + /// <summary> + /// Gets the label associated with the rule tag. + /// </summary> + /// <returns> The name of the label associated with the rule tag, or + /// {@code null} if this is an unlabeled rule tag. </returns> + std::string getLabel() const; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// Rule tag tokens are always placed on the <seealso cref="#DEFAULT_CHANNE"/>. + /// </summary> + virtual size_t getChannel() const override; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// This method returns the rule tag formatted with {@code <} and {@code >} + /// delimiters. + /// </summary> + virtual std::string getText() const override; + + /// Rule tag tokens have types assigned according to the rule bypass + /// transitions created during ATN deserialization. + virtual size_t getType() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns 0. + virtual size_t getLine() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getCharPositionInLine() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getTokenIndex() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getStartIndex() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns INVALID_INDEX. + virtual size_t getStopIndex() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns {@code null}. + virtual TokenSource *getTokenSource() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> always returns {@code null}. + virtual CharStream *getInputStream() const override; + + /// The implementation for <seealso cref="RuleTagToken"/> returns a string of the form {@code ruleName:bypassTokenType}. + virtual std::string toString() const override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.cpp new file mode 100644 index 0000000000..63e97aeaa2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/TagChunk.h" + +using namespace antlr4::tree::pattern; + +TagChunk::TagChunk(const std::string &tag) : TagChunk("", tag) { +} + +TagChunk::TagChunk(const std::string &label, const std::string &tag) : _tag(tag), _label(label) { + if (tag.empty()) { + throw IllegalArgumentException("tag cannot be null or empty"); + } + +} + +TagChunk::~TagChunk() { +} + +std::string TagChunk::getTag() { + return _tag; +} + +std::string TagChunk::getLabel() { + return _label; +} + +std::string TagChunk::toString() { + if (!_label.empty()) { + return _label + ":" + _tag; + } + + return _tag; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.h new file mode 100644 index 0000000000..1cdae78995 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TagChunk.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Chunk.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// Represents a placeholder tag in a tree pattern. A tag can have any of the + /// following forms. + /// + /// <ul> + /// <li>{@code expr}: An unlabeled placeholder for a parser rule {@code expr}.</li> + /// <li>{@code ID}: An unlabeled placeholder for a token of type {@code ID}.</li> + /// <li>{@code e:expr}: A labeled placeholder for a parser rule {@code expr}.</li> + /// <li>{@code id:ID}: A labeled placeholder for a token of type {@code ID}.</li> + /// </ul> + /// + /// This class does not perform any validation on the tag or label names aside + /// from ensuring that the tag is a non-null, non-empty string. + /// </summary> + class ANTLR4CPP_PUBLIC TagChunk : public Chunk { + public: + /// <summary> + /// Construct a new instance of <seealso cref="TagChunk"/> using the specified tag and + /// no label. + /// </summary> + /// <param name="tag"> The tag, which should be the name of a parser rule or token + /// type. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code tag} is {@code null} or + /// empty. </exception> + TagChunk(const std::string &tag); + virtual ~TagChunk(); + + /// <summary> + /// Construct a new instance of <seealso cref="TagChunk"/> using the specified label + /// and tag. + /// </summary> + /// <param name="label"> The label for the tag. If this is {@code null}, the + /// <seealso cref="TagChunk"/> represents an unlabeled tag. </param> + /// <param name="tag"> The tag, which should be the name of a parser rule or token + /// type. + /// </param> + /// <exception cref="IllegalArgumentException"> if {@code tag} is {@code null} or + /// empty. </exception> + TagChunk(const std::string &label, const std::string &tag); + + /// <summary> + /// Get the tag for this chunk. + /// </summary> + /// <returns> The tag for the chunk. </returns> + std::string getTag(); + + /// <summary> + /// Get the label, if any, assigned to this chunk. + /// </summary> + /// <returns> The label assigned to this chunk, or {@code null} if no label is + /// assigned to the chunk. </returns> + std::string getLabel(); + + /// <summary> + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + /// </summary> + virtual std::string toString() override; + + private: + /// This is the backing field for <seealso cref="#getTag"/>. + const std::string _tag; + /// <summary> + /// This is the backing field for <seealso cref="#getLabe"/>. + /// </summary> + const std::string _label; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.cpp new file mode 100644 index 0000000000..8e2e6689d7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.cpp @@ -0,0 +1,28 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" + +#include "tree/pattern/TextChunk.h" + +using namespace antlr4::tree::pattern; + +TextChunk::TextChunk(const std::string &text) : text(text) { + if (text == "") { + throw IllegalArgumentException("text cannot be nul"); + } + +} + +TextChunk::~TextChunk() { +} + +std::string TextChunk::getText() { + return text; +} + +std::string TextChunk::toString() { + return std::string("'") + text + std::string("'"); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.h new file mode 100644 index 0000000000..bb7fc7f966 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TextChunk.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Chunk.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// Represents a span of raw text (concrete syntax) between tags in a tree + /// pattern string. + /// </summary> + class ANTLR4CPP_PUBLIC TextChunk : public Chunk { + private: + /// <summary> + /// This is the backing field for <seealso cref="#getText"/>. + /// </summary> + const std::string text; + + /// <summary> + /// Constructs a new instance of <seealso cref="TextChunk"/> with the specified text. + /// </summary> + /// <param name="text"> The text of this chunk. </param> + /// <exception cref="IllegalArgumentException"> if {@code text} is {@code null}. </exception> + public: + TextChunk(const std::string &text); + virtual ~TextChunk(); + + /// <summary> + /// Gets the raw text of this chunk. + /// </summary> + /// <returns> The text of the chunk. </returns> + std::string getText(); + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The implementation for <seealso cref="TextChunk"/> returns the result of + /// <seealso cref="#getText()"/> in single quotes. + /// </summary> + virtual std::string toString() override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.cpp new file mode 100644 index 0000000000..f5153c8357 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/pattern/TokenTagToken.h" + +using namespace antlr4::tree::pattern; + +TokenTagToken::TokenTagToken(const std::string &/*tokenName*/, int type) + : CommonToken(type), tokenName(""), label("") { +} + +TokenTagToken::TokenTagToken(const std::string &tokenName, int type, const std::string &label) + : CommonToken(type), tokenName(tokenName), label(label) { +} + +std::string TokenTagToken::getTokenName() const { + return tokenName; +} + +std::string TokenTagToken::getLabel() const { + return label; +} + +std::string TokenTagToken::getText() const { + if (!label.empty()) { + return "<" + label + ":" + tokenName + ">"; + } + + return "<" + tokenName + ">"; +} + +std::string TokenTagToken::toString() const { + return tokenName + ":" + std::to_string(_type); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.h b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.h new file mode 100644 index 0000000000..da9e11cd36 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/pattern/TokenTagToken.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CommonToken.h" + +namespace antlr4 { +namespace tree { +namespace pattern { + + /// <summary> + /// A <seealso cref="Token"/> object representing a token of a particular type; e.g., + /// {@code <ID>}. These tokens are created for <seealso cref="TagChunk"/> chunks where the + /// tag corresponds to a lexer rule or token type. + /// </summary> + class ANTLR4CPP_PUBLIC TokenTagToken : public CommonToken { + /// <summary> + /// This is the backing field for <seealso cref="#getTokenName"/>. + /// </summary> + private: + const std::string tokenName; + /// <summary> + /// This is the backing field for <seealso cref="#getLabe"/>. + /// </summary> + const std::string label; + + /// <summary> + /// Constructs a new instance of <seealso cref="TokenTagToken"/> for an unlabeled tag + /// with the specified token name and type. + /// </summary> + /// <param name="tokenName"> The token name. </param> + /// <param name="type"> The token type. </param> + public: + TokenTagToken(const std::string &tokenName, int type); //this(tokenName, type, nullptr); + + /// <summary> + /// Constructs a new instance of <seealso cref="TokenTagToken"/> with the specified + /// token name, type, and label. + /// </summary> + /// <param name="tokenName"> The token name. </param> + /// <param name="type"> The token type. </param> + /// <param name="label"> The label associated with the token tag, or {@code null} if + /// the token tag is unlabeled. </param> + TokenTagToken(const std::string &tokenName, int type, const std::string &label); + + /// <summary> + /// Gets the token name. </summary> + /// <returns> The token name. </returns> + std::string getTokenName() const; + + /// <summary> + /// Gets the label associated with the rule tag. + /// </summary> + /// <returns> The name of the label associated with the rule tag, or + /// {@code null} if this is an unlabeled rule tag. </returns> + std::string getLabel() const; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The implementation for <seealso cref="TokenTagToken"/> returns the token tag + /// formatted with {@code <} and {@code >} delimiters. + /// </summary> + virtual std::string getText() const override; + + /// <summary> + /// {@inheritDoc} + /// <p/> + /// The implementation for <seealso cref="TokenTagToken"/> returns a string of the form + /// {@code tokenName:type}. + /// </summary> + virtual std::string toString() const override; + }; + +} // namespace pattern +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.cpp new file mode 100644 index 0000000000..c0398962ec --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPathLexer.h" +#include "XPathLexerErrorListener.h" +#include "XPathElement.h" +#include "XPathWildcardAnywhereElement.h" +#include "XPathWildcardElement.h" +#include "XPathTokenAnywhereElement.h" +#include "XPathTokenElement.h" +#include "XPathRuleAnywhereElement.h" +#include "XPathRuleElement.h" + +#include "XPath.h" + +using namespace antlr4; +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +const std::string XPath::WILDCARD = "*"; +const std::string XPath::NOT = "!"; + +XPath::XPath(Parser *parser, const std::string &path) { + _parser = parser; + _path = path; +} + +std::vector<std::unique_ptr<XPathElement>> XPath::split(const std::string &path) { + ANTLRInputStream in(path); + XPathLexer lexer(&in); + lexer.removeErrorListeners(); + XPathLexerErrorListener listener; + lexer.addErrorListener(&listener); + CommonTokenStream tokenStream(&lexer); + try { + tokenStream.fill(); + } catch (LexerNoViableAltException &) { + size_t pos = lexer.getCharPositionInLine(); + std::string msg = "Invalid tokens or characters at index " + std::to_string(pos) + " in path '" + path + "'"; + throw IllegalArgumentException(msg); + } + + std::vector<Token *> tokens = tokenStream.getTokens(); + std::vector<std::unique_ptr<XPathElement>> elements; + size_t n = tokens.size(); + size_t i = 0; + bool done = false; + while (!done && i < n) { + Token *el = tokens[i]; + Token *next = nullptr; + switch (el->getType()) { + case XPathLexer::ROOT: + case XPathLexer::ANYWHERE: { + bool anywhere = el->getType() == XPathLexer::ANYWHERE; + i++; + next = tokens[i]; + bool invert = next->getType() == XPathLexer::BANG; + if (invert) { + i++; + next = tokens[i]; + } + std::unique_ptr<XPathElement> pathElement = getXPathElement(next, anywhere); + pathElement->setInvert(invert); + elements.push_back(std::move(pathElement)); + i++; + break; + + } + case XPathLexer::TOKEN_REF: + case XPathLexer::RULE_REF: + case XPathLexer::WILDCARD: + elements.push_back(getXPathElement(el, false)); + i++; + break; + + case Token::EOF: + done = true; + break; + + default : + throw IllegalArgumentException("Unknown path element " + el->toString()); + } + } + + return elements; +} + +std::unique_ptr<XPathElement> XPath::getXPathElement(Token *wordToken, bool anywhere) { + if (wordToken->getType() == Token::EOF) { + throw IllegalArgumentException("Missing path element at end of path"); + } + + std::string word = wordToken->getText(); + size_t ttype = _parser->getTokenType(word); + ssize_t ruleIndex = _parser->getRuleIndex(word); + switch (wordToken->getType()) { + case XPathLexer::WILDCARD : + if (anywhere) + return std::unique_ptr<XPathWildcardAnywhereElement>(new XPathWildcardAnywhereElement()); + return std::unique_ptr<XPathWildcardElement>(new XPathWildcardElement()); + + case XPathLexer::TOKEN_REF: + case XPathLexer::STRING : + if (ttype == Token::INVALID_TYPE) { + throw IllegalArgumentException(word + " at index " + std::to_string(wordToken->getStartIndex()) + " isn't a valid token name"); + } + if (anywhere) + return std::unique_ptr<XPathTokenAnywhereElement>(new XPathTokenAnywhereElement(word, (int)ttype)); + return std::unique_ptr<XPathTokenElement>(new XPathTokenElement(word, (int)ttype)); + + default : + if (ruleIndex == -1) { + throw IllegalArgumentException(word + " at index " + std::to_string(wordToken->getStartIndex()) + " isn't a valid rule name"); + } + if (anywhere) + return std::unique_ptr<XPathRuleAnywhereElement>(new XPathRuleAnywhereElement(word, (int)ruleIndex)); + return std::unique_ptr<XPathRuleElement>(new XPathRuleElement(word, (int)ruleIndex)); + } +} + +static ParserRuleContext dummyRoot; + +std::vector<ParseTree *> XPath::findAll(ParseTree *tree, std::string const& xpath, Parser *parser) { + XPath p(parser, xpath); + return p.evaluate(tree); +} + +std::vector<ParseTree *> XPath::evaluate(ParseTree *t) { + dummyRoot.children = { t }; // don't set t's parent. + + std::vector<ParseTree *> work = { &dummyRoot }; + + size_t i = 0; + std::vector<std::unique_ptr<XPathElement>> elements = split(_path); + + while (i < elements.size()) { + std::vector<ParseTree *> next; + for (auto *node : work) { + if (!node->children.empty()) { + // only try to match next element if it has children + // e.g., //func/*/stat might have a token node for which + // we can't go looking for stat nodes. + auto matching = elements[i]->evaluate(node); + next.insert(next.end(), matching.begin(), matching.end()); + } + } + i++; + work = next; + } + + return work; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.h new file mode 100644 index 0000000000..e38d482d58 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPath.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + /// Represent a subset of XPath XML path syntax for use in identifying nodes in + /// parse trees. + /// + /// <para> + /// Split path into words and separators {@code /} and {@code //} via ANTLR + /// itself then walk path elements from left to right. At each separator-word + /// pair, find set of nodes. Next stage uses those as work list.</para> + /// + /// <para> + /// The basic interface is + /// <seealso cref="XPath#findAll ParseTree.findAll"/>{@code (tree, pathString, parser)}. + /// But that is just shorthand for:</para> + /// + /// <pre> + /// <seealso cref="XPath"/> p = new <seealso cref="XPath#XPath XPath"/>(parser, pathString); + /// return p.<seealso cref="#evaluate evaluate"/>(tree); + /// </pre> + /// + /// <para> + /// See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this + /// allows operators:</para> + /// + /// <dl> + /// <dt>/</dt> <dd>root</dd> + /// <dt>//</dt> <dd>anywhere</dd> + /// <dt>!</dt> <dd>invert; this must appear directly after root or anywhere + /// operator</dd> + /// </dl> + /// + /// <para> + /// and path elements:</para> + /// + /// <dl> + /// <dt>ID</dt> <dd>token name</dd> + /// <dt>'string'</dt> <dd>any string literal token from the grammar</dd> + /// <dt>expr</dt> <dd>rule name</dd> + /// <dt>*</dt> <dd>wildcard matching any node</dd> + /// </dl> + /// + /// <para> + /// Whitespace is not allowed.</para> + + class ANTLR4CPP_PUBLIC XPath { + public: + static const std::string WILDCARD; // word not operator/separator + static const std::string NOT; // word for invert operator + + XPath(Parser *parser, const std::string &path); + virtual ~XPath() {} + + // TODO: check for invalid token/rule names, bad syntax + virtual std::vector<std::unique_ptr<XPathElement>> split(const std::string &path); + + static std::vector<ParseTree *> findAll(ParseTree *tree, std::string const& xpath, Parser *parser); + + /// Return a list of all nodes starting at {@code t} as root that satisfy the + /// path. The root {@code /} is relative to the node passed to + /// <seealso cref="#evaluate"/>. + virtual std::vector<ParseTree *> evaluate(ParseTree *t); + + protected: + std::string _path; + Parser *_parser; + + /// Convert word like {@code *} or {@code ID} or {@code expr} to a path + /// element. {@code anywhere} is {@code true} if {@code //} precedes the + /// word. + virtual std::unique_ptr<XPathElement> getXPathElement(Token *wordToken, bool anywhere); + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.cpp new file mode 100644 index 0000000000..64b122df13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "support/CPPUtils.h" + +#include "XPathElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathElement::XPathElement(const std::string &nodeName) { + _nodeName = nodeName; +} + +XPathElement::~XPathElement() { +} + +std::vector<ParseTree *> XPathElement::evaluate(ParseTree * /*t*/) { + return {}; +} + +std::string XPathElement::toString() const { + std::string inv = _invert ? "!" : ""; + return antlrcpp::toString(*this) + "[" + inv + _nodeName + "]"; +} + +void XPathElement::setInvert(bool value) { + _invert = value; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.h new file mode 100644 index 0000000000..f339117d7f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathElement.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace tree { + class ParseTree; + +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathElement { + public: + /// Construct element like {@code /ID} or {@code ID} or {@code /*} etc... + /// op is null if just node + XPathElement(const std::string &nodeName); + XPathElement(XPathElement const&) = default; + virtual ~XPathElement(); + + XPathElement& operator=(XPathElement const&) = default; + + /// Given tree rooted at {@code t} return all nodes matched by this path + /// element. + virtual std::vector<ParseTree *> evaluate(ParseTree *t); + virtual std::string toString() const; + + void setInvert(bool value); + + protected: + std::string _nodeName; + bool _invert = false; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.cpp new file mode 100644 index 0000000000..506d2e1179 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.cpp @@ -0,0 +1,182 @@ + +// Generated from XPathLexer.g4 by ANTLR 4.9.3 + + +#include "XPathLexer.h" + + +using namespace antlr4; + +namespace { + +struct XPathLexerStaticData final { + XPathLexerStaticData(std::vector<std::string> ruleNames, + std::vector<std::string> channelNames, + std::vector<std::string> modeNames, + std::vector<std::string> literalNames, + std::vector<std::string> symbolicNames) + : ruleNames(std::move(ruleNames)), channelNames(std::move(channelNames)), + modeNames(std::move(modeNames)), literalNames(std::move(literalNames)), + symbolicNames(std::move(symbolicNames)), + vocabulary(this->literalNames, this->symbolicNames) {} + + XPathLexerStaticData(const XPathLexerStaticData&) = delete; + XPathLexerStaticData(XPathLexerStaticData&&) = delete; + XPathLexerStaticData& operator=(const XPathLexerStaticData&) = delete; + XPathLexerStaticData& operator=(XPathLexerStaticData&&) = delete; + + std::vector<antlr4::dfa::DFA> decisionToDFA; + antlr4::atn::PredictionContextCache sharedContextCache; + const std::vector<std::string> ruleNames; + const std::vector<std::string> channelNames; + const std::vector<std::string> modeNames; + const std::vector<std::string> literalNames; + const std::vector<std::string> symbolicNames; + const antlr4::dfa::Vocabulary vocabulary; + antlr4::atn::SerializedATNView serializedATN; + std::unique_ptr<antlr4::atn::ATN> atn; +}; + +::antlr4::internal::OnceFlag xpathLexerOnceFlag; +XPathLexerStaticData *xpathLexerStaticData = nullptr; + +void xpathLexerInitialize() { + assert(xpathLexerStaticData == nullptr); + auto staticData = std::make_unique<XPathLexerStaticData>( + std::vector<std::string>{ + "ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", "NameChar", "NameStartChar", + "STRING" + }, + std::vector<std::string>{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN" + }, + std::vector<std::string>{ + "DEFAULT_MODE" + }, + std::vector<std::string>{ + "", "", "", "'//'", "'/'", "'*'", "'!'" + }, + std::vector<std::string>{ + "", "TOKEN_REF", "RULE_REF", "ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", + "STRING" + } + ); + static const int32_t serializedATNSegment[] = { + 0x4, 0x0, 0x8, 0x32, 0x6, -1, 0x2, 0x0, 0x7, 0x0, 0x2, 0x1, 0x7, + 0x1, 0x2, 0x2, 0x7, 0x2, 0x2, 0x3, 0x7, 0x3, 0x2, 0x4, 0x7, 0x4, + 0x2, 0x5, 0x7, 0x5, 0x2, 0x6, 0x7, 0x6, 0x2, 0x7, 0x7, 0x7, 0x1, + 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x2, + 0x1, 0x3, 0x1, 0x3, 0x1, 0x4, 0x1, 0x4, 0x5, 0x4, 0x1d, 0x8, 0x4, + 0xa, 0x4, 0xc, 0x4, 0x20, 0x9, 0x4, 0x1, 0x4, 0x1, 0x4, 0x1, 0x5, + 0x1, 0x5, 0x3, 0x5, 0x26, 0x8, 0x5, 0x1, 0x6, 0x1, 0x6, 0x1, 0x7, + 0x1, 0x7, 0x5, 0x7, 0x2c, 0x8, 0x7, 0xa, 0x7, 0xc, 0x7, 0x2f, 0x9, + 0x7, 0x1, 0x7, 0x1, 0x7, 0x1, 0x2d, 0x0, 0x8, 0x1, 0x3, 0x3, 0x4, + 0x5, 0x5, 0x7, 0x6, 0x9, 0x7, 0xb, 0x0, 0xd, 0x0, 0xf, 0x8, 0x1, + 0x0, 0x2, 0x5, 0x0, 0x30, 0x39, 0x5f, 0x5f, 0xb7, 0xb7, 0x300, 0x36f, + 0x203f, 0x2040, 0xd, 0x0, 0x41, 0x5a, 0x61, 0x7a, 0xc0, 0xd6, 0xd8, + 0xf6, 0xf8, 0x2ff, 0x370, 0x37d, 0x37f, 0x1fff, 0x200c, 0x200d, 0x2070, + 0x218f, 0x2c00, 0x2fef, 0x3001, 0xd7ff, 0xf900, 0xfdcf, 0xfdf0, -1, + 0x0, 0x32, 0x0, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, + 0x0, 0x0, 0x5, 0x1, 0x0, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, 0x0, + 0x0, 0x9, 0x1, 0x0, 0x0, 0x0, 0x0, 0xf, 0x1, 0x0, 0x0, 0x0, 0x1, + 0x11, 0x1, 0x0, 0x0, 0x0, 0x3, 0x14, 0x1, 0x0, 0x0, 0x0, 0x5, 0x16, + 0x1, 0x0, 0x0, 0x0, 0x7, 0x18, 0x1, 0x0, 0x0, 0x0, 0x9, 0x1a, 0x1, + 0x0, 0x0, 0x0, 0xb, 0x25, 0x1, 0x0, 0x0, 0x0, 0xd, 0x27, 0x1, 0x0, + 0x0, 0x0, 0xf, 0x29, 0x1, 0x0, 0x0, 0x0, 0x11, 0x12, 0x5, 0x2f, 0x0, + 0x0, 0x12, 0x13, 0x5, 0x2f, 0x0, 0x0, 0x13, 0x2, 0x1, 0x0, 0x0, 0x0, + 0x14, 0x15, 0x5, 0x2f, 0x0, 0x0, 0x15, 0x4, 0x1, 0x0, 0x0, 0x0, 0x16, + 0x17, 0x5, 0x2a, 0x0, 0x0, 0x17, 0x6, 0x1, 0x0, 0x0, 0x0, 0x18, 0x19, + 0x5, 0x21, 0x0, 0x0, 0x19, 0x8, 0x1, 0x0, 0x0, 0x0, 0x1a, 0x1e, 0x3, + 0xd, 0x6, 0x0, 0x1b, 0x1d, 0x3, 0xb, 0x5, 0x0, 0x1c, 0x1b, 0x1, 0x0, + 0x0, 0x0, 0x1d, 0x20, 0x1, 0x0, 0x0, 0x0, 0x1e, 0x1c, 0x1, 0x0, 0x0, + 0x0, 0x1e, 0x1f, 0x1, 0x0, 0x0, 0x0, 0x1f, 0x21, 0x1, 0x0, 0x0, 0x0, + 0x20, 0x1e, 0x1, 0x0, 0x0, 0x0, 0x21, 0x22, 0x6, 0x4, 0x0, 0x0, 0x22, + 0xa, 0x1, 0x0, 0x0, 0x0, 0x23, 0x26, 0x3, 0xd, 0x6, 0x0, 0x24, 0x26, + 0x7, 0x0, 0x0, 0x0, 0x25, 0x23, 0x1, 0x0, 0x0, 0x0, 0x25, 0x24, 0x1, + 0x0, 0x0, 0x0, 0x26, 0xc, 0x1, 0x0, 0x0, 0x0, 0x27, 0x28, 0x7, 0x1, + 0x0, 0x0, 0x28, 0xe, 0x1, 0x0, 0x0, 0x0, 0x29, 0x2d, 0x5, 0x27, 0x0, + 0x0, 0x2a, 0x2c, 0x9, 0x0, 0x0, 0x0, 0x2b, 0x2a, 0x1, 0x0, 0x0, 0x0, + 0x2c, 0x2f, 0x1, 0x0, 0x0, 0x0, 0x2d, 0x2e, 0x1, 0x0, 0x0, 0x0, 0x2d, + 0x2b, 0x1, 0x0, 0x0, 0x0, 0x2e, 0x30, 0x1, 0x0, 0x0, 0x0, 0x2f, 0x2d, + 0x1, 0x0, 0x0, 0x0, 0x30, 0x31, 0x5, 0x27, 0x0, 0x0, 0x31, 0x10, + 0x1, 0x0, 0x0, 0x0, 0x4, 0x0, 0x1e, 0x25, 0x2d, 0x1, 0x1, 0x4, 0x0, + }; + + staticData->serializedATN = antlr4::atn::SerializedATNView(serializedATNSegment, sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0])); + + atn::ATNDeserializer deserializer; + staticData->atn = deserializer.deserialize(staticData->serializedATN); + + size_t count = staticData->atn->getNumberOfDecisions(); + staticData->decisionToDFA.reserve(count); + for (size_t i = 0; i < count; i++) { + staticData->decisionToDFA.emplace_back(staticData->atn->getDecisionState(i), i); + } + xpathLexerStaticData = staticData.release(); +} + +} + +XPathLexer::XPathLexer(CharStream *input) : Lexer(input) { + XPathLexer::initialize(); + _interpreter = new atn::LexerATNSimulator(this, *xpathLexerStaticData->atn, xpathLexerStaticData->decisionToDFA, xpathLexerStaticData->sharedContextCache); +} + +XPathLexer::~XPathLexer() { + delete _interpreter; +} + +std::string XPathLexer::getGrammarFileName() const { + return "XPathLexer.g4"; +} + +const std::vector<std::string>& XPathLexer::getRuleNames() const { + return xpathLexerStaticData->ruleNames; +} + +const std::vector<std::string>& XPathLexer::getChannelNames() const { + return xpathLexerStaticData->channelNames; +} + +const std::vector<std::string>& XPathLexer::getModeNames() const { + return xpathLexerStaticData->modeNames; +} + +const dfa::Vocabulary& XPathLexer::getVocabulary() const { + return xpathLexerStaticData->vocabulary; +} + +antlr4::atn::SerializedATNView XPathLexer::getSerializedATN() const { + return xpathLexerStaticData->serializedATN; +} + +const atn::ATN& XPathLexer::getATN() const { + return *xpathLexerStaticData->atn; +} + +void XPathLexer::action(RuleContext *context, size_t ruleIndex, size_t actionIndex) { + switch (ruleIndex) { + case 4: IDAction(antlrcpp::downCast<antlr4::RuleContext *>(context), actionIndex); break; + + default: + break; + } +} + +void XPathLexer::IDAction(antlr4::RuleContext *context, size_t actionIndex) { + switch (actionIndex) { + case 0: + if (isupper(getText()[0])) + setType(TOKEN_REF); + else + setType(RULE_REF); + break; + + default: + break; + } +} + +void XPathLexer::initialize() { + ::antlr4::internal::call_once(xpathLexerOnceFlag, xpathLexerInitialize); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.h new file mode 100644 index 0000000000..6926d2161e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexer.h @@ -0,0 +1,47 @@ + +// Generated from XPathLexer.g4 by ANTLR 4.9.3 + +#pragma once + + +#include "antlr4-runtime.h" + + +class XPathLexer : public antlr4::Lexer { +public: + enum { + TOKEN_REF = 1, RULE_REF = 2, ANYWHERE = 3, ROOT = 4, WILDCARD = 5, BANG = 6, + ID = 7, STRING = 8 + }; + + explicit XPathLexer(antlr4::CharStream *input); + + ~XPathLexer() override; + + virtual std::string getGrammarFileName() const override; + + virtual const std::vector<std::string>& getRuleNames() const override; + + virtual const std::vector<std::string>& getChannelNames() const override; + + virtual const std::vector<std::string>& getModeNames() const override; + + virtual const antlr4::dfa::Vocabulary& getVocabulary() const override; + + virtual antlr4::atn::SerializedATNView getSerializedATN() const override; + + virtual const antlr4::atn::ATN& getATN() const override; + + virtual void action(antlr4::RuleContext *context, size_t ruleIndex, size_t actionIndex) override; + + // By default the static state used to implement the lexer is lazily initialized during the first + // call to the constructor. You can call this function if you wish to initialize the static state + // ahead of time. + static void initialize(); +private: + // Individual action functions triggered by action() above. + void IDAction(antlr4::RuleContext *context, size_t actionIndex); + + // Individual semantic predicate functions triggered by sempred() above. +}; + diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.cpp new file mode 100644 index 0000000000..2804c8ee3d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.cpp @@ -0,0 +1,13 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPathLexerErrorListener.h" + +using namespace antlr4; +using namespace antlr4::tree::xpath; + +void XPathLexerErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, + size_t /*line*/, size_t /*charPositionInLine*/, const std::string &/*msg*/, std::exception_ptr /*e*/) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.h new file mode 100644 index 0000000000..c0c3eaaca7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathLexerErrorListener.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathLexerErrorListener : public BaseErrorListener { + public: + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.cpp new file mode 100644 index 0000000000..9ca910df2e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.cpp @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "tree/xpath/XPathRuleAnywhereElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathRuleAnywhereElement::XPathRuleAnywhereElement(const std::string &ruleName, int ruleIndex) : XPathElement(ruleName) { + _ruleIndex = ruleIndex; +} + +std::vector<ParseTree *> XPathRuleAnywhereElement::evaluate(ParseTree *t) { + return Trees::findAllRuleNodes(t, _ruleIndex); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.h new file mode 100644 index 0000000000..2ceb75ceed --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleAnywhereElement.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + /// Either {@code ID} at start of path or {@code ...//ID} in middle of path. + class ANTLR4CPP_PUBLIC XPathRuleAnywhereElement : public XPathElement { + public: + XPathRuleAnywhereElement(const std::string &ruleName, int ruleIndex); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + + protected: + int _ruleIndex = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.cpp new file mode 100644 index 0000000000..1d145fb575 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathRuleElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathRuleElement::XPathRuleElement(const std::string &ruleName, size_t ruleIndex) : XPathElement(ruleName) { + _ruleIndex = ruleIndex; +} + +std::vector<ParseTree *> XPathRuleElement::evaluate(ParseTree *t) { + // return all children of t that match nodeName + std::vector<ParseTree *> nodes; + for (auto *c : t->children) { + if (antlrcpp::is<ParserRuleContext *>(c)) { + ParserRuleContext *ctx = dynamic_cast<ParserRuleContext *>(c); + if ((ctx->getRuleIndex() == _ruleIndex && !_invert) || (ctx->getRuleIndex() != _ruleIndex && _invert)) { + nodes.push_back(ctx); + } + } + } + return nodes; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.h new file mode 100644 index 0000000000..b57276f033 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathRuleElement.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathRuleElement : public XPathElement { + public: + XPathRuleElement(const std::string &ruleName, size_t ruleIndex); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + + protected: + size_t _ruleIndex = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.cpp new file mode 100644 index 0000000000..c557c9d675 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.cpp @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathTokenAnywhereElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathTokenAnywhereElement::XPathTokenAnywhereElement(const std::string &tokenName, int tokenType) : XPathElement(tokenName) { + this->tokenType = tokenType; +} + +std::vector<ParseTree *> XPathTokenAnywhereElement::evaluate(ParseTree *t) { + return Trees::findAllTokenNodes(t, tokenType); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.h new file mode 100644 index 0000000000..2045d91b32 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenAnywhereElement.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathTokenAnywhereElement : public XPathElement { + protected: + int tokenType = 0; + public: + XPathTokenAnywhereElement(const std::string &tokenName, int tokenType); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.cpp new file mode 100644 index 0000000000..d52fc26afd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "tree/ParseTree.h" +#include "tree/Trees.h" +#include "support/CPPUtils.h" +#include "Token.h" + +#include "XPathTokenElement.h" + +using namespace antlr4; +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathTokenElement::XPathTokenElement(const std::string &tokenName, size_t tokenType) : XPathElement(tokenName) { + _tokenType = tokenType; +} + +std::vector<ParseTree *> XPathTokenElement::evaluate(ParseTree *t) { + // return all children of t that match nodeName + std::vector<ParseTree *> nodes; + for (auto *c : t->children) { + if (antlrcpp::is<TerminalNode *>(c)) { + TerminalNode *tnode = dynamic_cast<TerminalNode *>(c); + if ((tnode->getSymbol()->getType() == _tokenType && !_invert) || (tnode->getSymbol()->getType() != _tokenType && _invert)) { + nodes.push_back(tnode); + } + } + } + return nodes; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.h new file mode 100644 index 0000000000..7221530ce6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathTokenElement.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathTokenElement : public XPathElement { + public: + XPathTokenElement(const std::string &tokenName, size_t tokenType); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + + protected: + size_t _tokenType = 0; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.cpp new file mode 100644 index 0000000000..4ff424f056 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPath.h" +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathWildcardAnywhereElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathWildcardAnywhereElement::XPathWildcardAnywhereElement() : XPathElement(XPath::WILDCARD) { +} + +std::vector<ParseTree *> XPathWildcardAnywhereElement::evaluate(ParseTree *t) { + if (_invert) { + return {}; // !* is weird but valid (empty) + } + return Trees::getDescendants(t); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.h new file mode 100644 index 0000000000..dc5d1e5a29 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardAnywhereElement.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathWildcardAnywhereElement : public XPathElement { + public: + XPathWildcardAnywhereElement(); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.cpp b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.cpp new file mode 100644 index 0000000000..aabda5a9be --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.cpp @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "XPath.h" +#include "tree/ParseTree.h" +#include "tree/Trees.h" + +#include "XPathWildcardElement.h" + +using namespace antlr4::tree; +using namespace antlr4::tree::xpath; + +XPathWildcardElement::XPathWildcardElement() : XPathElement(XPath::WILDCARD) { +} + +std::vector<ParseTree *> XPathWildcardElement::evaluate(ParseTree *t) { + if (_invert) { + return {}; // !* is weird but valid (empty) + } + + return t->children; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.h b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.h new file mode 100644 index 0000000000..accb461de2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/tree/xpath/XPathWildcardElement.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "XPathElement.h" + +namespace antlr4 { +namespace tree { +namespace xpath { + + class ANTLR4CPP_PUBLIC XPathWildcardElement : public XPathElement { + public: + XPathWildcardElement(); + + virtual std::vector<ParseTree *> evaluate(ParseTree *t) override; + }; + +} // namespace xpath +} // namespace tree +} // namespace antlr4 |