diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 01:45:21 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 02:42:50 +0300 |
commit | 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch) | |
tree | 9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/atn | |
parent | 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff) | |
download | ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/atn')
127 files changed, 12289 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp new file mode 100644 index 0000000000..339515cc9c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.cpp @@ -0,0 +1,159 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LL1Analyzer.h" +#include "Token.h" +#include "atn/RuleTransition.h" +#include "misc/IntervalSet.h" +#include "RuleContext.h" +#include "atn/DecisionState.h" +#include "Recognizer.h" +#include "atn/ATNType.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "atn/ATN.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +ATN::ATN() : ATN(ATNType::LEXER, 0) {} + +ATN::ATN(ATNType grammarType_, size_t maxTokenType_) : grammarType(grammarType_), maxTokenType(maxTokenType_) {} + +ATN::~ATN() { + for (ATNState *state : states) { + delete state; + } +} + +misc::IntervalSet ATN::nextTokens(ATNState *s, RuleContext *ctx) const { + LL1Analyzer analyzer(*this); + return analyzer.LOOK(s, ctx); + +} + +misc::IntervalSet const& ATN::nextTokens(ATNState *s) const { + if (!s->_nextTokenUpdated) { + UniqueLock<Mutex> lock(_mutex); + if (!s->_nextTokenUpdated) { + s->_nextTokenWithinRule = nextTokens(s, nullptr); + s->_nextTokenUpdated = true; + } + } + return s->_nextTokenWithinRule; +} + +void ATN::addState(ATNState *state) { + if (state != nullptr) { + //state->atn = this; + state->stateNumber = static_cast<int>(states.size()); + } + + states.push_back(state); +} + +void ATN::removeState(ATNState *state) { + delete states.at(state->stateNumber);// just free mem, don't shift states in list + states.at(state->stateNumber) = nullptr; +} + +int ATN::defineDecisionState(DecisionState *s) { + decisionToState.push_back(s); + s->decision = static_cast<int>(decisionToState.size() - 1); + return s->decision; +} + +DecisionState *ATN::getDecisionState(size_t decision) const { + if (!decisionToState.empty()) { + return decisionToState[decision]; + } + return nullptr; +} + +size_t ATN::getNumberOfDecisions() const { + return decisionToState.size(); +} + +misc::IntervalSet ATN::getExpectedTokens(size_t stateNumber, RuleContext *context) const { + if (stateNumber == ATNState::INVALID_STATE_NUMBER || stateNumber >= states.size()) { + throw IllegalArgumentException("Invalid state number."); + } + + RuleContext *ctx = context; + ATNState *s = states.at(stateNumber); + misc::IntervalSet following = nextTokens(s); + if (!following.contains(Token::EPSILON)) { + return following; + } + + misc::IntervalSet expected; + expected.addAll(following); + expected.remove(Token::EPSILON); + while (ctx && ctx->invokingState != ATNState::INVALID_STATE_NUMBER && following.contains(Token::EPSILON)) { + ATNState *invokingState = states.at(ctx->invokingState); + const RuleTransition *rt = static_cast<const RuleTransition*>(invokingState->transitions[0].get()); + following = nextTokens(rt->followState); + expected.addAll(following); + expected.remove(Token::EPSILON); + + if (ctx->parent == nullptr) { + break; + } + ctx = static_cast<RuleContext *>(ctx->parent); + } + + if (following.contains(Token::EPSILON)) { + expected.add(Token::EOF); + } + + return expected; +} + +std::string ATN::toString() const { + std::stringstream ss; + std::string type; + switch (grammarType) { + case ATNType::LEXER: + type = "LEXER "; + break; + + case ATNType::PARSER: + type = "PARSER "; + break; + + default: + break; + } + ss << "(" << type << "ATN " << std::hex << this << std::dec << ") maxTokenType: " << maxTokenType << std::endl; + ss << "states (" << states.size() << ") {" << std::endl; + + size_t index = 0; + for (auto *state : states) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + index = 0; + for (auto *state : decisionToState) { + if (state == nullptr) { + ss << " " << index++ << ": nul" << std::endl; + } else { + std::string text = state->toString(); + ss << " " << index++ << ": " << indent(text, " ", false) << std::endl; + } + } + + ss << "}"; + + return ss.str(); +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h new file mode 100644 index 0000000000..f12476358a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATN.h @@ -0,0 +1,133 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "internal/Synchronization.h" + +// GCC generates a warning when forward-declaring ATN if ATN has already been +// declared due to the attributes added by ANTLR4CPP_PUBLIC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159 +// Add constant that can be checked so forward-declarations can be omitted. +#define ANTLR4CPP_ATN_DECLARED + +namespace antlr4 { +namespace atn { + + class LexerATNSimulator; + class ParserATNSimulator; + + class ANTLR4CPP_PUBLIC ATN { + public: + static constexpr size_t INVALID_ALT_NUMBER = 0; + + /// Used for runtime deserialization of ATNs from strings. + ATN(); + + ATN(ATNType grammarType, size_t maxTokenType); + + ATN(const ATN&) = delete; + + ATN(ATN&&) = delete; + + ~ATN(); + + ATN& operator=(const ATN&) = delete; + + ATN& operator=(ATN&&) = delete; + + std::vector<ATNState *> states; + + /// Each subrule/rule is a decision point and we must track them so we + /// can go back later and build DFA predictors for them. This includes + /// all the rules, subrules, optional blocks, ()+, ()* etc... + std::vector<DecisionState *> decisionToState; + + /// Maps from rule index to starting state number. + std::vector<RuleStartState *> ruleToStartState; + + /// Maps from rule index to stop state number. + std::vector<RuleStopState *> ruleToStopState; + + /// The type of the ATN. + ATNType grammarType; + + /// The maximum value for any symbol recognized by a transition in the ATN. + size_t maxTokenType; + + /// <summary> + /// For lexer ATNs, this maps the rule index to the resulting token type. + /// For parser ATNs, this maps the rule index to the generated bypass token + /// type if the + /// <seealso cref="ATNDeserializationOptions#isGenerateRuleBypassTransitions"/> + /// deserialization option was specified; otherwise, this is {@code null}. + /// </summary> + std::vector<size_t> ruleToTokenType; + + /// For lexer ATNs, this is an array of {@link LexerAction} objects which may + /// be referenced by action transitions in the ATN. + std::vector<Ref<const LexerAction>> lexerActions; + + std::vector<TokensStartState *> modeToStartState; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in state {@code s}. + /// If {@code ctx} is null, the set of tokens will not include what can follow + /// the rule surrounding {@code s}. In other words, the set will be + /// restricted to tokens reachable staying within {@code s}'s rule. + /// </summary> + misc::IntervalSet nextTokens(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute the set of valid tokens that can occur starting in {@code s} and + /// staying in same rule. <seealso cref="Token#EPSILON"/> is in set if we reach end of + /// rule. + /// </summary> + misc::IntervalSet const& nextTokens(ATNState *s) const; + + void addState(ATNState *state); + + void removeState(ATNState *state); + + int defineDecisionState(DecisionState *s); + + DecisionState *getDecisionState(size_t decision) const; + + size_t getNumberOfDecisions() const; + + /// <summary> + /// Computes the set of input symbols which could follow ATN state number + /// {@code stateNumber} in the specified full {@code context}. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// <seealso cref="RuleStopState"/> of the outermost context without matching any + /// symbols, <seealso cref="Token#EOF"/> is added to the returned set. + /// <p/> + /// If {@code context} is {@code null}, it is treated as + /// <seealso cref="ParserRuleContext#EMPTY"/>. + /// </summary> + /// <param name="stateNumber"> the ATN state number </param> + /// <param name="context"> the full parse context </param> + /// <returns> The set of potentially valid input symbols which could follow the + /// specified state in the specified context. </returns> + /// <exception cref="IllegalArgumentException"> if the ATN does not contain a state with + /// number {@code stateNumber} </exception> + misc::IntervalSet getExpectedTokens(size_t stateNumber, RuleContext *context) const; + + std::string toString() const; + + private: + friend class LexerATNSimulator; + friend class ParserATNSimulator; + + mutable internal::Mutex _mutex; + mutable internal::SharedMutex _stateMutex; + mutable internal::SharedMutex _edgeMutex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp new file mode 100644 index 0000000000..be4d5bfa8c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.cpp @@ -0,0 +1,106 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" + +#include "atn/ATNConfig.h" + +using namespace antlr4::atn; + +namespace { + +/** + * This field stores the bit mask for implementing the + * {@link #isPrecedenceFilterSuppressed} property as a bit within the + * existing {@link #reachesIntoOuterContext} field. + */ +inline constexpr size_t SUPPRESS_PRECEDENCE_FILTER = 0x40000000; + +} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context) + : ATNConfig(state, alt, std::move(context), 0, SemanticContext::Empty::Instance) {} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, alt, std::move(context), 0, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext) + : ATNConfig(other.state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state) + : ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, other.semanticContext) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, other.alt, other.context, other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context) + : ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, other.semanticContext) {} + +ATNConfig::ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext) + : ATNConfig(state, other.alt, std::move(context), other.reachesIntoOuterContext, std::move(semanticContext)) {} + +ATNConfig::ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext) + : state(state), alt(alt), context(std::move(context)), reachesIntoOuterContext(reachesIntoOuterContext), semanticContext(std::move(semanticContext)) {} + +size_t ATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +size_t ATNConfig::getOuterContextDepth() const { + return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER; +} + +bool ATNConfig::isPrecedenceFilterSuppressed() const { + return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0; +} + +void ATNConfig::setPrecedenceFilterSuppressed(bool value) { + if (value) { + reachesIntoOuterContext |= SUPPRESS_PRECEDENCE_FILTER; + } else { + reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER; + } +} + +bool ATNConfig::operator==(const ATNConfig &other) const { + return state->stateNumber == other.state->stateNumber && alt == other.alt && + ((context == other.context) || (*context == *other.context)) && + *semanticContext == *other.semanticContext && + isPrecedenceFilterSuppressed() == other.isPrecedenceFilterSuppressed(); +} + +std::string ATNConfig::toString() const { + return toString(true); +} + +std::string ATNConfig::toString(bool showAlt) const { + std::stringstream ss; + ss << "("; + + ss << state->toString(); + if (showAlt) { + ss << "," << alt; + } + if (context) { + ss << ",[" << context->toString() << "]"; + } + if (semanticContext != nullptr && semanticContext != SemanticContext::Empty::Instance) { + ss << ",[" << semanticContext->toString() << "]"; + } + if (getOuterContextDepth() > 0) { + ss << ",up=" << getOuterContextDepth(); + } + ss << ")"; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h new file mode 100644 index 0000000000..1d2e7ae163 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfig.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> + +#include "antlr4-common.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A tuple: (ATN state, predicted alt, syntactic, semantic context). + /// The syntactic context is a graph-structured stack node whose + /// path(s) to the root is the rule invocation(s) + /// chain used to arrive at the state. The semantic context is + /// the tree of semantic predicates encountered before reaching + /// an ATN state. + /// </summary> + class ANTLR4CPP_PUBLIC ATNConfig { + public: + struct Hasher + { + size_t operator()(Ref<ATNConfig> const& k) const { + return k->hashCode(); + } + + size_t operator()(ATNConfig const& k) const { + return k.hashCode(); + } + }; + + struct Comparer { + bool operator()(Ref<ATNConfig> const& lhs, Ref<ATNConfig> const& rhs) const { + return (lhs == rhs) || (*lhs == *rhs); + } + + bool operator()(ATNConfig const& lhs, ATNConfig const& rhs) const { + return (&lhs == &rhs) || (lhs == rhs); + } + }; + + using Set = std::unordered_set<Ref<ATNConfig>, Hasher, Comparer>; + + /// The ATN state associated with this configuration. + ATNState *state = nullptr; + + /// What alt (or lexer rule) is predicted by this configuration. + const size_t alt = 0; + + /// The stack of invoking states leading to the rule/states associated + /// with this config. We track only those contexts pushed during + /// execution of the ATN simulator. + /// + /// Can be shared between multiple ANTConfig instances. + Ref<const PredictionContext> context; + + /** + * We cannot execute predicates dependent upon local context unless + * we know for sure we are in the correct context. Because there is + * no way to do this efficiently, we simply cannot evaluate + * dependent predicates unless we are in the rule that initially + * invokes the ATN simulator. + * + * <p> + * closure() tracks the depth of how far we dip into the outer context: + * depth > 0. Note that it may not be totally accurate depth since I + * don't ever decrement. TODO: make it a boolean then</p> + * + * <p> + * For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + * is also backed by this field. Since the field is publicly accessible, the + * highest bit which would not cause the value to become negative is used to + * store this field. This choice minimizes the risk that code which only + * compares this value to 0 would be affected by the new purpose of the + * flag. It also ensures the performance of the existing {@link ATNConfig} + * constructors as well as certain operations like + * {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + * <em>completely</em> unaffected by the change.</p> + */ + size_t reachesIntoOuterContext = 0; + + /// Can be shared between multiple ATNConfig instances. + Ref<const SemanticContext> semanticContext; + + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context); + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext); + + ATNConfig(ATNConfig const& other, Ref<const SemanticContext> semanticContext); + ATNConfig(ATNConfig const& other, ATNState *state); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const SemanticContext> semanticContext); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context); + ATNConfig(ATNConfig const& other, ATNState *state, Ref<const PredictionContext> context, Ref<const SemanticContext> semanticContext); + + ATNConfig(ATNConfig const&) = default; + + ATNConfig(ATNConfig&&) = default; + + virtual ~ATNConfig() = default; + + virtual size_t hashCode() const; + + /** + * This method gets the value of the {@link #reachesIntoOuterContext} field + * as it existed prior to the introduction of the + * {@link #isPrecedenceFilterSuppressed} method. + */ + size_t getOuterContextDepth() const; + bool isPrecedenceFilterSuppressed() const; + void setPrecedenceFilterSuppressed(bool value); + + /// An ATN configuration is equal to another if both have + /// the same state, they predict the same alternative, and + /// syntactic/semantic contexts are the same. + bool operator==(const ATNConfig &other) const; + bool operator!=(const ATNConfig &other) const; + + virtual std::string toString() const; + std::string toString(bool showAlt) const; + + private: + ATNConfig(ATNState *state, size_t alt, Ref<const PredictionContext> context, size_t reachesIntoOuterContext, Ref<const SemanticContext> semanticContext); + }; + +} // namespace atn +} // namespace antlr4 + + +// Hash function for ATNConfig. + +namespace std { + using antlr4::atn::ATNConfig; + + template <> struct hash<ATNConfig> + { + size_t operator() (const ATNConfig &x) const + { + return x.hashCode(); + } + }; + + template <> struct hash<std::vector<Ref<ATNConfig>>> + { + size_t operator() (const std::vector<Ref<ATNConfig>> &vector) const + { + std::size_t seed = 0; + for (const auto &config : vector) { + seed ^= config->hashCode() + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } + }; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp new file mode 100644 index 0000000000..4ebdf8882b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.cpp @@ -0,0 +1,232 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "atn/ATNSimulator.h" +#include "Exceptions.h" +#include "atn/SemanticContext.h" +#include "support/Arrays.h" + +#include "atn/ATNConfigSet.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + +} + +ATNConfigSet::ATNConfigSet() : ATNConfigSet(true) {} + +ATNConfigSet::ATNConfigSet(const ATNConfigSet &other) + : fullCtx(other.fullCtx), _configLookup(other._configLookup.bucket_count(), ATNConfigHasher{this}, ATNConfigComparer{this}) { + addAll(other); + uniqueAlt = other.uniqueAlt; + conflictingAlts = other.conflictingAlts; + hasSemanticContext = other.hasSemanticContext; + dipsIntoOuterContext = other.dipsIntoOuterContext; +} + +ATNConfigSet::ATNConfigSet(bool fullCtx) + : fullCtx(fullCtx), _configLookup(0, ATNConfigHasher{this}, ATNConfigComparer{this}) {} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config) { + return add(config, nullptr); +} + +bool ATNConfigSet::add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache) { + assert(config); + + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (config->semanticContext != SemanticContext::Empty::Instance) { + hasSemanticContext = true; + } + if (config->getOuterContextDepth() > 0) { + dipsIntoOuterContext = true; + } + + auto existing = _configLookup.find(config.get()); + if (existing == _configLookup.end()) { + _configLookup.insert(config.get()); + _cachedHashCode = 0; + configs.push_back(config); // track order here + + return true; + } + + // a previous (s,i,pi,_), merge with it and save result + bool rootIsWildcard = !fullCtx; + Ref<const PredictionContext> merged = PredictionContext::merge((*existing)->context, config->context, rootIsWildcard, mergeCache); + // no need to check for existing.context, config.context in cache + // since only way to create new graphs is "call rule" and here. We + // cache at both places. + (*existing)->reachesIntoOuterContext = std::max((*existing)->reachesIntoOuterContext, config->reachesIntoOuterContext); + + // make sure to preserve the precedence filter suppression during the merge + if (config->isPrecedenceFilterSuppressed()) { + (*existing)->setPrecedenceFilterSuppressed(true); + } + + (*existing)->context = std::move(merged); // replace context; no need to alt mapping + + return true; +} + +bool ATNConfigSet::addAll(const ATNConfigSet &other) { + for (const auto &c : other.configs) { + add(c); + } + return false; +} + +std::vector<ATNState*> ATNConfigSet::getStates() const { + std::vector<ATNState*> states; + states.reserve(configs.size()); + for (const auto &c : configs) { + states.push_back(c->state); + } + return states; +} + +/** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + +BitSet ATNConfigSet::getAlts() const { + BitSet alts; + for (const auto &config : configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<Ref<const SemanticContext>> ATNConfigSet::getPredicates() const { + std::vector<Ref<const SemanticContext>> preds; + preds.reserve(configs.size()); + for (const auto &c : configs) { + if (c->semanticContext != SemanticContext::Empty::Instance) { + preds.push_back(c->semanticContext); + } + } + return preds; +} + +const Ref<ATNConfig>& ATNConfigSet::get(size_t i) const { + return configs[i]; +} + +void ATNConfigSet::optimizeConfigs(ATNSimulator *interpreter) { + assert(interpreter); + + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + if (_configLookup.empty()) + return; + + for (const auto &config : configs) { + config->context = interpreter->getCachedContext(config->context); + } +} + +bool ATNConfigSet::equals(const ATNConfigSet &other) const { + if (&other == this) { + return true; + } + + if (configs.size() != other.configs.size()) + return false; + + if (fullCtx != other.fullCtx || uniqueAlt != other.uniqueAlt || + conflictingAlts != other.conflictingAlts || hasSemanticContext != other.hasSemanticContext || + dipsIntoOuterContext != other.dipsIntoOuterContext) // includes stack context + return false; + + return Arrays::equals(configs, other.configs); +} + +size_t ATNConfigSet::hashCode() const { + size_t cachedHashCode = _cachedHashCode.load(std::memory_order_relaxed); + if (!isReadonly() || cachedHashCode == 0) { + cachedHashCode = 1; + for (const auto &i : configs) { + cachedHashCode = 31 * cachedHashCode + i->hashCode(); // Same as Java's list hashCode impl. + } + _cachedHashCode.store(cachedHashCode, std::memory_order_relaxed); + } + return cachedHashCode; +} + +size_t ATNConfigSet::size() const { + return configs.size(); +} + +bool ATNConfigSet::isEmpty() const { + return configs.empty(); +} + +void ATNConfigSet::clear() { + if (_readonly) { + throw IllegalStateException("This set is readonly"); + } + configs.clear(); + _cachedHashCode = 0; + _configLookup.clear(); +} + +bool ATNConfigSet::isReadonly() const { + return _readonly; +} + +void ATNConfigSet::setReadonly(bool readonly) { + _readonly = readonly; + LookupContainer(0, ATNConfigHasher{this}, ATNConfigComparer{this}).swap(_configLookup); +} + +std::string ATNConfigSet::toString() const { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < configs.size(); i++) { + ss << configs[i]->toString(); + } + ss << "]"; + + if (hasSemanticContext) { + ss << ",hasSemanticContext = " << hasSemanticContext; + } + if (uniqueAlt != ATN::INVALID_ALT_NUMBER) { + ss << ",uniqueAlt = " << uniqueAlt; + } + + if (conflictingAlts.size() > 0) { + ss << ",conflictingAlts = "; + ss << conflictingAlts.toString(); + } + + if (dipsIntoOuterContext) { + ss << ", dipsIntoOuterContext"; + } + return ss.str(); +} + +size_t ATNConfigSet::hashCode(const ATNConfig &other) const { + size_t hashCode = 7; + hashCode = 31 * hashCode + other.state->stateNumber; + hashCode = 31 * hashCode + other.alt; + hashCode = 31 * hashCode + other.semanticContext->hashCode(); + return hashCode; +} + +bool ATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const { + return lhs.state->stateNumber == rhs.state->stateNumber && lhs.alt == rhs.alt && *lhs.semanticContext == *rhs.semanticContext; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h new file mode 100644 index 0000000000..d147f183a0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNConfigSet.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cassert> + +#include "support/BitSet.h" +#include "atn/PredictionContext.h" +#include "atn/ATNConfig.h" +#include "FlatHashSet.h" + +namespace antlr4 { +namespace atn { + + /// Specialized set that can track info about the set, with support for combining similar configurations using a + /// graph-structured stack. + class ANTLR4CPP_PUBLIC ATNConfigSet { + public: + /// Track the elements as they are added to the set; supports get(i) + std::vector<Ref<ATNConfig>> configs; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + size_t uniqueAlt = 0; + + /** Currently this is only used when we detect SLL conflict; this does + * not necessarily represent the ambiguous alternatives. In fact, + * I should also point out that this seems to include predicated alternatives + * that have predicates that evaluate to false. Computed in computeTargetState(). + */ + antlrcpp::BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext = false; + bool dipsIntoOuterContext = false; + + /// Indicates that this configuration set is part of a full context + /// LL prediction. It will be used to determine how to merge $. With SLL + /// it's a wildcard whereas it is not for LL context merge. + const bool fullCtx = true; + + ATNConfigSet(); + + ATNConfigSet(const ATNConfigSet &other); + + ATNConfigSet(ATNConfigSet&&) = delete; + + explicit ATNConfigSet(bool fullCtx); + + virtual ~ATNConfigSet() = default; + + bool add(const Ref<ATNConfig> &config); + + /// <summary> + /// Adding a new config means merging contexts with existing configs for + /// {@code (s, i, pi, _)}, where {@code s} is the + /// <seealso cref="ATNConfig#state"/>, {@code i} is the <seealso cref="ATNConfig#alt"/>, and + /// {@code pi} is the <seealso cref="ATNConfig#semanticContext"/>. We use + /// {@code (s,i,pi)} as key. + /// <p/> + /// This method updates <seealso cref="#dipsIntoOuterContext"/> and + /// <seealso cref="#hasSemanticContext"/> when necessary. + /// </summary> + bool add(const Ref<ATNConfig> &config, PredictionContextMergeCache *mergeCache); + + bool addAll(const ATNConfigSet &other); + + std::vector<ATNState*> getStates() const; + + /** + * Gets the complete set of represented alternatives for the configuration + * set. + * + * @return the set of represented alternatives in this configuration set + * + * @since 4.3 + */ + antlrcpp::BitSet getAlts() const; + std::vector<Ref<const SemanticContext>> getPredicates() const; + + const Ref<ATNConfig>& get(size_t i) const; + + void optimizeConfigs(ATNSimulator *interpreter); + + size_t size() const; + bool isEmpty() const; + void clear(); + bool isReadonly() const; + void setReadonly(bool readonly); + + virtual size_t hashCode() const; + + virtual bool equals(const ATNConfigSet &other) const; + + virtual std::string toString() const; + + private: + struct ATNConfigHasher final { + const ATNConfigSet* atnConfigSet; + + size_t operator()(const ATNConfig *other) const { + assert(other != nullptr); + return atnConfigSet->hashCode(*other); + } + }; + + struct ATNConfigComparer final { + const ATNConfigSet* atnConfigSet; + + bool operator()(const ATNConfig *lhs, const ATNConfig *rhs) const { + assert(lhs != nullptr); + assert(rhs != nullptr); + return atnConfigSet->equals(*lhs, *rhs); + } + }; + + mutable std::atomic<size_t> _cachedHashCode = 0; + + /// Indicates that the set of configurations is read-only. Do not + /// allow any code to manipulate the set; DFA states will point at + /// the sets and they must not change. This does not protect the other + /// fields; in particular, conflictingAlts is set after + /// we've made this readonly. + bool _readonly = false; + + virtual size_t hashCode(const ATNConfig &atnConfig) const; + + virtual bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const; + + using LookupContainer = FlatHashSet<ATNConfig*, ATNConfigHasher, ATNConfigComparer>; + + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + LookupContainer _configLookup; + }; + + inline bool operator==(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return lhs.equals(rhs); } + + inline bool operator!=(const ATNConfigSet &lhs, const ATNConfigSet &rhs) { return !operator==(lhs, rhs); } + +} // namespace atn +} // namespace antlr4 + +namespace std { + +template <> +struct hash<::antlr4::atn::ATNConfigSet> { + size_t operator()(const ::antlr4::atn::ATNConfigSet &atnConfigSet) const { + return atnConfigSet.hashCode(); + } +}; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp new file mode 100644 index 0000000000..e0a7cb2b27 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" +#include "Exceptions.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ATNDeserializationOptions::ATNDeserializationOptions(ATNDeserializationOptions *options) + : _readOnly(false), _verifyATN(options->_verifyATN), + _generateRuleBypassTransitions(options->_generateRuleBypassTransitions) {} + +const ATNDeserializationOptions& ATNDeserializationOptions::getDefaultOptions() { + static const ATNDeserializationOptions* const defaultOptions = new ATNDeserializationOptions(); + return *defaultOptions; +} + +void ATNDeserializationOptions::makeReadOnly() { + _readOnly = true; +} + +void ATNDeserializationOptions::setVerifyATN(bool verify) { + throwIfReadOnly(); + _verifyATN = verify; +} + +void ATNDeserializationOptions::setGenerateRuleBypassTransitions(bool generate) { + throwIfReadOnly(); + _generateRuleBypassTransitions = generate; +} + +void ATNDeserializationOptions::throwIfReadOnly() const { + if (isReadOnly()) { + throw IllegalStateException("ATNDeserializationOptions is read only."); + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h new file mode 100644 index 0000000000..595f918649 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializationOptions.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + +class ANTLR4CPP_PUBLIC ATNDeserializationOptions final { +public: + ATNDeserializationOptions() + : _readOnly(false), _verifyATN(true), _generateRuleBypassTransitions(false) {} + + // TODO: Is this useful? If so we should mark it as explicit, otherwise remove it. + ATNDeserializationOptions(ATNDeserializationOptions *options); + + ATNDeserializationOptions(const ATNDeserializationOptions&) = default; + + ATNDeserializationOptions& operator=(const ATNDeserializationOptions&) = default; + + static const ATNDeserializationOptions& getDefaultOptions(); + + bool isReadOnly() const { return _readOnly; } + + void makeReadOnly(); + + bool isVerifyATN() const { return _verifyATN; } + + void setVerifyATN(bool verify); + + bool isGenerateRuleBypassTransitions() const { return _generateRuleBypassTransitions; } + + void setGenerateRuleBypassTransitions(bool generate); + +private: + void throwIfReadOnly() const; + + bool _readOnly; + bool _verifyATN; + bool _generateRuleBypassTransitions; +}; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp new file mode 100644 index 0000000000..2da3c32357 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.cpp @@ -0,0 +1,628 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNDeserializationOptions.h" + +#include "atn/ATNType.h" +#include "atn/ATNState.h" +#include "atn/ATN.h" + +#include "atn/LoopEndState.h" +#include "atn/DecisionState.h" +#include "atn/RuleStartState.h" +#include "atn/RuleStopState.h" +#include "atn/TokensStartState.h" +#include "atn/RuleTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/PlusLoopbackState.h" +#include "atn/PlusBlockStartState.h" +#include "atn/StarLoopbackState.h" +#include "atn/BasicBlockStartState.h" +#include "atn/BasicState.h" +#include "atn/BlockEndState.h" +#include "atn/StarLoopEntryState.h" + +#include "atn/AtomTransition.h" +#include "atn/StarBlockStartState.h" +#include "atn/RangeTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/SetTransition.h" +#include "atn/NotSetTransition.h" +#include "atn/WildcardTransition.h" +#include "atn/TransitionType.h" +#include "Token.h" + +#include "misc/IntervalSet.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerCustomAction.h" +#include "atn/LexerChannelAction.h" +#include "atn/LexerModeAction.h" +#include "atn/LexerMoreAction.h" +#include "atn/LexerPopModeAction.h" +#include "atn/LexerPushModeAction.h" +#include "atn/LexerSkipAction.h" +#include "atn/LexerTypeAction.h" + +#include "atn/ATNDeserializer.h" + +#include <cassert> +#include <string> +#include <vector> + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + void checkCondition(bool condition, std::string_view message) { + if (!condition) { + throw IllegalStateException(std::string(message)); + } + } + + void checkCondition(bool condition) { + checkCondition(condition, ""); + } + + /** + * Analyze the {@link StarLoopEntryState} states in the specified ATN to set + * the {@link StarLoopEntryState#isPrecedenceDecision} field to the + * correct value. + * + * @param atn The ATN. + */ + void markPrecedenceDecisions(const ATN &atn) { + for (ATNState *state : atn.states) { + if (!StarLoopEntryState::is(state)) { + continue; + } + + /* We analyze the ATN to determine if this ATN decision state is the + * decision for the closure block that determines whether a + * precedence rule should continue or complete. + */ + if (atn.ruleToStartState[state->ruleIndex]->isLeftRecursiveRule) { + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (LoopEndState::is(maybeLoopEndState)) { + if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) { + downCast<StarLoopEntryState*>(state)->isPrecedenceDecision = true; + } + } + } + } + } + + Ref<const LexerAction> lexerActionFactory(LexerActionType type, int data1, int data2) { + switch (type) { + case LexerActionType::CHANNEL: + return std::make_shared<LexerChannelAction>(data1); + + case LexerActionType::CUSTOM: + return std::make_shared<LexerCustomAction>(data1, data2); + + case LexerActionType::MODE: + return std::make_shared< LexerModeAction>(data1); + + case LexerActionType::MORE: + return LexerMoreAction::getInstance(); + + case LexerActionType::POP_MODE: + return LexerPopModeAction::getInstance(); + + case LexerActionType::PUSH_MODE: + return std::make_shared<LexerPushModeAction>(data1); + + case LexerActionType::SKIP: + return LexerSkipAction::getInstance(); + + case LexerActionType::TYPE: + return std::make_shared<LexerTypeAction>(data1); + + default: + throw IllegalArgumentException("The specified lexer action type " + std::to_string(static_cast<size_t>(type)) + + " is not valid."); + } + } + + ConstTransitionPtr edgeFactory(const ATN &atn, TransitionType type, size_t trg, size_t arg1, size_t arg2, + size_t arg3, const std::vector<misc::IntervalSet> &sets) { + ATNState *target = atn.states[trg]; + switch (type) { + case TransitionType::EPSILON: + return std::make_unique<EpsilonTransition>(target); + case TransitionType::RANGE: + if (arg3 != 0) { + return std::make_unique<RangeTransition>(target, Token::EOF, arg2); + } else { + return std::make_unique<RangeTransition>(target, arg1, arg2); + } + case TransitionType::RULE: + return std::make_unique<RuleTransition>(downCast<RuleStartState*>(atn.states[arg1]), arg2, (int)arg3, target); + case TransitionType::PREDICATE: + return std::make_unique<PredicateTransition>(target, arg1, arg2, arg3 != 0); + case TransitionType::PRECEDENCE: + return std::make_unique<PrecedencePredicateTransition>(target, (int)arg1); + case TransitionType::ATOM: + if (arg3 != 0) { + return std::make_unique<AtomTransition>(target, Token::EOF); + } else { + return std::make_unique<AtomTransition>(target, arg1); + } + case TransitionType::ACTION: + return std::make_unique<ActionTransition>(target, arg1, arg2, arg3 != 0); + case TransitionType::SET: + return std::make_unique<SetTransition>(target, sets[arg1]); + case TransitionType::NOT_SET: + return std::make_unique<NotSetTransition>(target, sets[arg1]); + case TransitionType::WILDCARD: + return std::make_unique<WildcardTransition>(target); + } + + throw IllegalArgumentException("The specified transition type is not valid."); + } + + /* mem check: all created instances are freed in the d-tor of the ATN. */ + ATNState* stateFactory(ATNStateType type, size_t ruleIndex) { + ATNState *s; + switch (type) { + case ATNStateType::INVALID: + return nullptr; + case ATNStateType::BASIC : + s = new BasicState(); + break; + case ATNStateType::RULE_START : + s = new RuleStartState(); + break; + case ATNStateType::BLOCK_START : + s = new BasicBlockStartState(); + break; + case ATNStateType::PLUS_BLOCK_START : + s = new PlusBlockStartState(); + break; + case ATNStateType::STAR_BLOCK_START : + s = new StarBlockStartState(); + break; + case ATNStateType::TOKEN_START : + s = new TokensStartState(); + break; + case ATNStateType::RULE_STOP : + s = new RuleStopState(); + break; + case ATNStateType::BLOCK_END : + s = new BlockEndState(); + break; + case ATNStateType::STAR_LOOP_BACK : + s = new StarLoopbackState(); + break; + case ATNStateType::STAR_LOOP_ENTRY : + s = new StarLoopEntryState(); + break; + case ATNStateType::PLUS_LOOP_BACK : + s = new PlusLoopbackState(); + break; + case ATNStateType::LOOP_END : + s = new LoopEndState(); + break; + default : + std::string message = "The specified state type " + std::to_string(static_cast<size_t>(type)) + " is not valid."; + throw IllegalArgumentException(message); + } + assert(s->getStateType() == type); + s->ruleIndex = ruleIndex; + return s; + } + + ssize_t readUnicodeInt32(SerializedATNView data, int& p) { + return static_cast<ssize_t>(data[p++]); + } + + void deserializeSets( + SerializedATNView data, + int& p, + std::vector<misc::IntervalSet>& sets) { + size_t nsets = data[p++]; + sets.reserve(sets.size() + nsets); + for (size_t i = 0; i < nsets; i++) { + size_t nintervals = data[p++]; + misc::IntervalSet set; + + bool containsEof = data[p++] != 0; + if (containsEof) { + set.add(-1); + } + + for (size_t j = 0; j < nintervals; j++) { + auto a = readUnicodeInt32(data, p); + auto b = readUnicodeInt32(data, p); + set.add(a, b); + } + sets.push_back(set); + } + } + +} + +ATNDeserializer::ATNDeserializer() : ATNDeserializer(ATNDeserializationOptions::getDefaultOptions()) {} + +ATNDeserializer::ATNDeserializer(ATNDeserializationOptions deserializationOptions) : _deserializationOptions(std::move(deserializationOptions)) {} + +std::unique_ptr<ATN> ATNDeserializer::deserialize(SerializedATNView data) const { + int p = 0; + int version = data[p++]; + if (version != SERIALIZED_VERSION) { + std::string reason = "Could not deserialize ATN with version" + std::to_string(version) + "(expected " + std::to_string(SERIALIZED_VERSION) + ")."; + + throw UnsupportedOperationException(reason); + } + + ATNType grammarType = (ATNType)data[p++]; + size_t maxTokenType = data[p++]; + auto atn = std::make_unique<ATN>(grammarType, maxTokenType); + + // + // STATES + // + { + std::vector<std::pair<LoopEndState*, size_t>> loopBackStateNumbers; + std::vector<std::pair<BlockStartState*, size_t>> endStateNumbers; + size_t nstates = data[p++]; + atn->states.reserve(nstates); + loopBackStateNumbers.reserve(nstates); // Reserve worst case size, its short lived. + endStateNumbers.reserve(nstates); // Reserve worst case size, its short lived. + for (size_t i = 0; i < nstates; i++) { + ATNStateType stype = static_cast<ATNStateType>(data[p++]); + // ignore bad type of states + if (stype == ATNStateType::INVALID) { + atn->addState(nullptr); + continue; + } + + size_t ruleIndex = data[p++]; + ATNState *s = stateFactory(stype, ruleIndex); + if (stype == ATNStateType::LOOP_END) { // special case + int loopBackStateNumber = data[p++]; + loopBackStateNumbers.push_back({ downCast<LoopEndState*>(s), loopBackStateNumber }); + } else if (BlockStartState::is(s)) { + int endStateNumber = data[p++]; + endStateNumbers.push_back({ downCast<BlockStartState*>(s), endStateNumber }); + } + atn->addState(s); + } + + // delay the assignment of loop back and end states until we know all the state instances have been initialized + for (auto &pair : loopBackStateNumbers) { + pair.first->loopBackState = atn->states[pair.second]; + } + + for (auto &pair : endStateNumbers) { + pair.first->endState = downCast<BlockEndState*>(atn->states[pair.second]); + } + } + + size_t numNonGreedyStates = data[p++]; + for (size_t i = 0; i < numNonGreedyStates; i++) { + size_t stateNumber = data[p++]; + // The serialized ATN must be specifying the right states, so that the + // cast below is correct. + downCast<DecisionState*>(atn->states[stateNumber])->nonGreedy = true; + } + + size_t numPrecedenceStates = data[p++]; + for (size_t i = 0; i < numPrecedenceStates; i++) { + size_t stateNumber = data[p++]; + downCast<RuleStartState*>(atn->states[stateNumber])->isLeftRecursiveRule = true; + } + + // + // RULES + // + size_t nrules = data[p++]; + atn->ruleToStartState.reserve(nrules); + for (size_t i = 0; i < nrules; i++) { + size_t s = data[p++]; + // Also here, the serialized atn must ensure to point to the correct class type. + RuleStartState *startState = downCast<RuleStartState*>(atn->states[s]); + atn->ruleToStartState.push_back(startState); + if (atn->grammarType == ATNType::LEXER) { + size_t tokenType = data[p++]; + atn->ruleToTokenType.push_back(tokenType); + } + } + + atn->ruleToStopState.resize(nrules); + for (ATNState *state : atn->states) { + if (!RuleStopState::is(state)) { + continue; + } + + RuleStopState *stopState = downCast<RuleStopState*>(state); + atn->ruleToStopState[state->ruleIndex] = stopState; + atn->ruleToStartState[state->ruleIndex]->stopState = stopState; + } + + // + // MODES + // + size_t nmodes = data[p++]; + atn->modeToStartState.reserve(nmodes); + for (size_t i = 0; i < nmodes; i++) { + size_t s = data[p++]; + atn->modeToStartState.push_back(downCast<TokensStartState*>(atn->states[s])); + } + + // + // SETS + // + { + std::vector<misc::IntervalSet> sets; + + deserializeSets(data, p, sets); + sets.shrink_to_fit(); + + // + // EDGES + // + int nedges = data[p++]; + for (int i = 0; i < nedges; i++) { + size_t src = data[p]; + size_t trg = data[p + 1]; + TransitionType ttype = static_cast<TransitionType>(data[p + 2]); + size_t arg1 = data[p + 3]; + size_t arg2 = data[p + 4]; + size_t arg3 = data[p + 5]; + ConstTransitionPtr trans = edgeFactory(*atn, ttype, trg, arg1, arg2, arg3, sets); + ATNState *srcState = atn->states[src]; + srcState->addTransition(std::move(trans)); + p += 6; + } + } + // edges for rule stop states can be derived, so they aren't serialized + for (ATNState *state : atn->states) { + for (size_t i = 0; i < state->transitions.size(); i++) { + const Transition *t = state->transitions[i].get(); + if (!RuleTransition::is(t)) { + continue; + } + + const RuleTransition *ruleTransition = downCast<const RuleTransition*>(t); + size_t outermostPrecedenceReturn = INVALID_INDEX; + if (atn->ruleToStartState[ruleTransition->target->ruleIndex]->isLeftRecursiveRule) { + if (ruleTransition->precedence == 0) { + outermostPrecedenceReturn = ruleTransition->target->ruleIndex; + } + } + + ConstTransitionPtr returnTransition = std::make_unique<EpsilonTransition>(ruleTransition->followState, outermostPrecedenceReturn); + atn->ruleToStopState[ruleTransition->target->ruleIndex]->addTransition(std::move(returnTransition)); + } + } + + for (ATNState *state : atn->states) { + if (BlockStartState::is(state)) { + BlockStartState *startState = downCast<BlockStartState*>(state); + + // we need to know the end state to set its start state + if (startState->endState == nullptr) { + throw IllegalStateException(); + } + + // block end states can only be associated to a single block start state + if (startState->endState->startState != nullptr) { + throw IllegalStateException(); + } + + startState->endState->startState = downCast<BlockStartState*>(state); + } + + if (PlusLoopbackState::is(state)) { + PlusLoopbackState *loopbackState = downCast<PlusLoopbackState*>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (PlusBlockStartState::is(target)) { + (downCast<PlusBlockStartState*>(target))->loopBackState = loopbackState; + } + } + } else if (StarLoopbackState::is(state)) { + StarLoopbackState *loopbackState = downCast<StarLoopbackState*>(state); + for (size_t i = 0; i < loopbackState->transitions.size(); i++) { + ATNState *target = loopbackState->transitions[i]->target; + if (StarLoopEntryState::is(target)) { + downCast<StarLoopEntryState*>(target)->loopBackState = loopbackState; + } + } + } + } + + // + // DECISIONS + // + size_t ndecisions = data[p++]; + atn->decisionToState.reserve(ndecisions); + for (size_t i = 0; i < ndecisions; i++) { + size_t s = data[p++]; + DecisionState *decState = downCast<DecisionState*>(atn->states[s]); + if (decState == nullptr) + throw IllegalStateException(); + + atn->decisionToState.push_back(decState); + decState->decision = static_cast<int>(i); + } + + // + // LEXER ACTIONS + // + if (atn->grammarType == ATNType::LEXER) { + atn->lexerActions.resize(data[p++]); + for (size_t i = 0; i < atn->lexerActions.size(); i++) { + LexerActionType actionType = static_cast<LexerActionType>(data[p++]); + int data1 = data[p++]; + int data2 = data[p++]; + atn->lexerActions[i] = lexerActionFactory(actionType, data1, data2); + } + } + + markPrecedenceDecisions(*atn); + + if (_deserializationOptions.isVerifyATN()) { + verifyATN(*atn); + } + + if (_deserializationOptions.isGenerateRuleBypassTransitions() && atn->grammarType == ATNType::PARSER) { + atn->ruleToTokenType.resize(atn->ruleToStartState.size()); + for (size_t i = 0; i < atn->ruleToStartState.size(); i++) { + atn->ruleToTokenType[i] = static_cast<int>(atn->maxTokenType + i + 1); + } + + for (std::vector<RuleStartState*>::size_type i = 0; i < atn->ruleToStartState.size(); i++) { + BasicBlockStartState *bypassStart = new BasicBlockStartState(); /* mem check: freed in ATN d-tor */ + bypassStart->ruleIndex = static_cast<int>(i); + atn->addState(bypassStart); + + BlockEndState *bypassStop = new BlockEndState(); /* mem check: freed in ATN d-tor */ + bypassStop->ruleIndex = static_cast<int>(i); + atn->addState(bypassStop); + + bypassStart->endState = bypassStop; + atn->defineDecisionState(bypassStart); + + bypassStop->startState = bypassStart; + + ATNState *endState; + const Transition *excludeTransition = nullptr; + if (atn->ruleToStartState[i]->isLeftRecursiveRule) { + // wrap from the beginning of the rule to the StarLoopEntryState + endState = nullptr; + for (ATNState *state : atn->states) { + if (state->ruleIndex != i) { + continue; + } + + if (!StarLoopEntryState::is(state)) { + continue; + } + + ATNState *maybeLoopEndState = state->transitions[state->transitions.size() - 1]->target; + if (!LoopEndState::is(maybeLoopEndState)) { + continue; + } + + if (maybeLoopEndState->epsilonOnlyTransitions && RuleStopState::is(maybeLoopEndState->transitions[0]->target)) { + endState = state; + break; + } + } + + if (endState == nullptr) { + throw UnsupportedOperationException("Couldn't identify final state of the precedence rule prefix section."); + + } + + excludeTransition = (static_cast<StarLoopEntryState*>(endState))->loopBackState->transitions[0].get(); + } else { + endState = atn->ruleToStopState[i]; + } + + // all non-excluded transitions that currently target end state need to target blockEnd instead + for (ATNState *state : atn->states) { + for (auto &transition : state->transitions) { + if (transition.get() == excludeTransition) { + continue; + } + + if (transition->target == endState) { + const_cast<Transition*>(transition.get())->target = bypassStop; + } + } + } + + // all transitions leaving the rule start state need to leave blockStart instead + while (atn->ruleToStartState[i]->transitions.size() > 0) { + ConstTransitionPtr transition = atn->ruleToStartState[i]->removeTransition(atn->ruleToStartState[i]->transitions.size() - 1); + bypassStart->addTransition(std::move(transition)); + } + + // link the new states + atn->ruleToStartState[i]->addTransition(std::make_unique<EpsilonTransition>(bypassStart)); + bypassStop->addTransition(std::make_unique<EpsilonTransition>(endState)); + + ATNState *matchState = new BasicState(); /* mem check: freed in ATN d-tor */ + atn->addState(matchState); + matchState->addTransition(std::make_unique<AtomTransition>(bypassStop, atn->ruleToTokenType[i])); + bypassStart->addTransition(std::make_unique<EpsilonTransition>(matchState)); + } + + if (_deserializationOptions.isVerifyATN()) { + // reverify after modification + verifyATN(*atn); + } + } + + return atn; +} + +void ATNDeserializer::verifyATN(const ATN &atn) const { + // verify assumptions + for (ATNState *state : atn.states) { + if (state == nullptr) { + continue; + } + + checkCondition(state->epsilonOnlyTransitions || state->transitions.size() <= 1); + + if (PlusBlockStartState::is(state)) { + checkCondition((downCast<PlusBlockStartState*>(state))->loopBackState != nullptr); + } + + if (StarLoopEntryState::is(state)) { + StarLoopEntryState *starLoopEntryState = downCast<StarLoopEntryState*>(state); + checkCondition(starLoopEntryState->loopBackState != nullptr); + checkCondition(starLoopEntryState->transitions.size() == 2); + + if (StarBlockStartState::is(starLoopEntryState->transitions[0]->target)) { + checkCondition(downCast<LoopEndState*>(starLoopEntryState->transitions[1]->target) != nullptr); + checkCondition(!starLoopEntryState->nonGreedy); + } else if (LoopEndState::is(starLoopEntryState->transitions[0]->target)) { + checkCondition(StarBlockStartState::is(starLoopEntryState->transitions[1]->target)); + checkCondition(starLoopEntryState->nonGreedy); + } else { + throw IllegalStateException(); + } + } + + if (StarLoopbackState::is(state)) { + checkCondition(state->transitions.size() == 1); + checkCondition(StarLoopEntryState::is(state->transitions[0]->target)); + } + + if (LoopEndState::is(state)) { + checkCondition((downCast<LoopEndState*>(state))->loopBackState != nullptr); + } + + if (RuleStartState::is(state)) { + checkCondition((downCast<RuleStartState*>(state))->stopState != nullptr); + } + + if (BlockStartState::is(state)) { + checkCondition((downCast<BlockStartState*>(state))->endState != nullptr); + } + + if (BlockEndState::is(state)) { + checkCondition((downCast<BlockEndState*>(state))->startState != nullptr); + } + + if (DecisionState::is(state)) { + DecisionState *decisionState = downCast<DecisionState*>(state); + checkCondition(decisionState->transitions.size() <= 1 || decisionState->decision >= 0); + } else { + checkCondition(state->transitions.size() <= 1 || RuleStopState::is(state)); + } + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h new file mode 100644 index 0000000000..3cd56b9cdf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNDeserializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNDeserializationOptions.h" +#include "atn/SerializedATNView.h" +#include "atn/LexerAction.h" +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNDeserializer final { + public: + static constexpr size_t SERIALIZED_VERSION = 4; + + ATNDeserializer(); + + explicit ATNDeserializer(ATNDeserializationOptions deserializationOptions); + + std::unique_ptr<ATN> deserialize(SerializedATNView input) const; + void verifyATN(const ATN &atn) const; + + private: + const ATNDeserializationOptions _deserializationOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp new file mode 100644 index 0000000000..04e1af992e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNSimulator.h" + +#include "atn/ATNConfigSet.h" +#include "atn/ATNDeserializer.h" +#include "atn/ATNType.h" +#include "dfa/DFAState.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlr4::atn; + +const Ref<DFAState> ATNSimulator::ERROR = std::make_shared<DFAState>(std::numeric_limits<int>::max()); + +ATNSimulator::ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache) + : atn(atn), _sharedContextCache(sharedContextCache) {} + +void ATNSimulator::clearDFA() { + throw UnsupportedOperationException("This ATN simulator does not support clearing the DFA."); +} + +PredictionContextCache& ATNSimulator::getSharedContextCache() const { + return _sharedContextCache; +} + +Ref<const PredictionContext> ATNSimulator::getCachedContext(const Ref<const PredictionContext> &context) { + // This function must only be called with an active state lock, as we are going to change a shared structure. + return PredictionContext::getCachedContext(context, getSharedContextCache()); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h new file mode 100644 index 0000000000..b14939e219 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNSimulator.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATN.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ATNSimulator { + public: + /// Must distinguish between missing edge and edge we know leads nowhere. + static const Ref<dfa::DFAState> ERROR; + const ATN &atn; + + ATNSimulator(const ATN &atn, PredictionContextCache &sharedContextCache); + + virtual ~ATNSimulator() = default; + + virtual void reset() = 0; + + /** + * Clear the DFA cache used by the current instance. Since the DFA cache may + * be shared by multiple ATN simulators, this method may affect the + * performance (but not accuracy) of other parsers which are being used + * concurrently. + * + * @throws UnsupportedOperationException if the current instance does not + * support clearing the DFA. + * + * @since 4.3 + */ + virtual void clearDFA(); + + PredictionContextCache& getSharedContextCache() const; + Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context); + + protected: + /// <summary> + /// The context cache maps all PredictionContext objects that are equals() + /// to a single cached copy. This cache is shared across all contexts + /// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + /// to use only cached nodes/graphs in addDFAState(). We don't want to + /// fill this during closure() since there are lots of contexts that + /// pop up but are not used ever again. It also greatly slows down closure(). + /// <p/> + /// This cache makes a huge difference in memory and a little bit in speed. + /// For the Java grammar on java.*, it dropped the memory requirements + /// at the end from 25M to 16M. We don't store any of the full context + /// graphs in the DFA because they are limited to local context only, + /// but apparently there's a lot of repetition there as well. We optimize + /// the config contexts before storing the config set in the DFA states + /// by literally rebuilding them with cached subgraphs only. + /// <p/> + /// I tried a cache for use during closure operations, that was + /// whacked after each adaptivePredict(). It cost a little bit + /// more time I think and doesn't save on the overall footprint + /// so it's not worth the complexity. + /// </summary> + PredictionContextCache &_sharedContextCache; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp new file mode 100644 index 0000000000..29911901be --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/Transition.h" +#include "misc/IntervalSet.h" +#include "support/CPPUtils.h" + +#include "atn/ATNState.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +size_t ATNState::hashCode() const { + return stateNumber; +} + +bool ATNState::equals(const ATNState &other) const { + return stateNumber == other.stateNumber; +} + +bool ATNState::isNonGreedyExitState() const { + return false; +} + +std::string ATNState::toString() const { + return std::to_string(stateNumber); +} + +void ATNState::addTransition(ConstTransitionPtr e) { + addTransition(transitions.size(), std::move(e)); +} + +void ATNState::addTransition(size_t index, ConstTransitionPtr e) { + for (const auto &transition : transitions) + if (transition->target->stateNumber == e->target->stateNumber) { + return; + } + + if (transitions.empty()) { + epsilonOnlyTransitions = e->isEpsilon(); + } else if (epsilonOnlyTransitions != e->isEpsilon()) { + std::cerr << "ATN state %d has both epsilon and non-epsilon transitions.\n" << stateNumber; + epsilonOnlyTransitions = false; + } + + transitions.insert(transitions.begin() + index, std::move(e)); +} + +ConstTransitionPtr ATNState::removeTransition(size_t index) { + ConstTransitionPtr result = std::move(transitions[index]); + transitions.erase(transitions.begin() + index); + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h new file mode 100644 index 0000000000..7613f40eee --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNState.h @@ -0,0 +1,139 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" +#include "atn/ATNStateType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// The following images show the relation of states and + /// <seealso cref="ATNState#transitions"/> for various grammar constructs. + /// + /// <ul> + /// + /// <li>Solid edges marked with an ε indicate a required + /// <seealso cref="EpsilonTransition"/>.</li> + /// + /// <li>Dashed edges indicate locations where any transition derived from + /// <seealso cref="Transition"/> might appear.</li> + /// + /// <li>Dashed nodes are place holders for either a sequence of linked + /// <seealso cref="BasicState"/> states or the inclusion of a block representing a nested + /// construct in one of the forms below.</li> + /// + /// <li>Nodes showing multiple outgoing alternatives with a {@code ...} support + /// any number of alternatives (one or more). Nodes without the {@code ...} only + /// support the exact number of alternatives shown in the diagram.</li> + /// + /// </ul> + /// + /// <h2>Basic Blocks</h2> + /// + /// <h3>Rule</h3> + /// + /// <embed src="images/Rule.svg" type="image/svg+xml"/> + /// + /// <h3>Block of 1 or more alternatives</h3> + /// + /// <embed src="images/Block.svg" type="image/svg+xml"/> + /// + /// <h2>Greedy Loops</h2> + /// + /// <h3>Greedy Closure: {@code (...)*}</h3> + /// + /// <embed src="images/ClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Positive Closure: {@code (...)+}</h3> + /// + /// <embed src="images/PositiveClosureGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Greedy Optional: {@code (...)?}</h3> + /// + /// <embed src="images/OptionalGreedy.svg" type="image/svg+xml"/> + /// + /// <h2>Non-Greedy Loops</h2> + /// + /// <h3>Non-Greedy Closure: {@code (...)*?}</h3> + /// + /// <embed src="images/ClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Positive Closure: {@code (...)+?}</h3> + /// + /// <embed src="images/PositiveClosureNonGreedy.svg" type="image/svg+xml"/> + /// + /// <h3>Non-Greedy Optional: {@code (...)??}</h3> + /// + /// <embed src="images/OptionalNonGreedy.svg" type="image/svg+xml"/> + /// </summary> + +// GCC generates a warning here if ATN has already been declared due to the +// attributes added by ANTLR4CPP_PUBLIC. +// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39159 +// Only forward-declare if it hasn't already been declared. +#ifndef ANTLR4CPP_ATN_DECLARED + class ANTLR4CPP_PUBLIC ATN; +#endif + + class ANTLR4CPP_PUBLIC ATNState { + public: + static constexpr size_t INITIAL_NUM_TRANSITIONS = 4; + static constexpr size_t INVALID_STATE_NUMBER = std::numeric_limits<size_t>::max(); + + size_t stateNumber = INVALID_STATE_NUMBER; + size_t ruleIndex = 0; // at runtime, we don't have Rule objects + bool epsilonOnlyTransitions = false; + + /// Track the transitions emanating from this ATN state. + std::vector<ConstTransitionPtr> transitions; + + ATNState() = delete; + + ATNState(ATNState const&) = delete; + + ATNState(ATNState&&) = delete; + + virtual ~ATNState() = default; + + ATNState& operator=(ATNState const&) = delete; + + ATNState& operator=(ATNState&&) = delete; + + void addTransition(ConstTransitionPtr e); + void addTransition(size_t index, ConstTransitionPtr e); + ConstTransitionPtr removeTransition(size_t index); + + virtual size_t hashCode() const; + virtual bool equals(const ATNState &other) const; + + virtual bool isNonGreedyExitState() const; + virtual std::string toString() const; + + ATNStateType getStateType() const { return _stateType; } + + protected: + explicit ATNState(ATNStateType stateType) : _stateType(stateType) {} + + private: + /// Used to cache lookahead during parsing, not used during construction. + + misc::IntervalSet _nextTokenWithinRule; + std::atomic<bool> _nextTokenUpdated { false }; + + const ATNStateType _stateType; + + friend class ATN; + }; + + inline bool operator==(const ATNState &lhs, const ATNState &rhs) { return lhs.equals(rhs); } + + inline bool operator!=(const ATNState &lhs, const ATNState &rhs) { return !operator==(lhs, rhs); } + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp new file mode 100644 index 0000000000..577e2af87c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.cpp @@ -0,0 +1,33 @@ +#include "atn/ATNStateType.h" + +std::string antlr4::atn::atnStateTypeName(ATNStateType atnStateType) { + switch (atnStateType) { + case ATNStateType::INVALID: + return "INVALID"; + case ATNStateType::BASIC: + return "BASIC"; + case ATNStateType::RULE_START: + return "RULE_START"; + case ATNStateType::BLOCK_START: + return "BLOCK_START"; + case ATNStateType::PLUS_BLOCK_START: + return "PLUS_BLOCK_START"; + case ATNStateType::STAR_BLOCK_START: + return "STAR_BLOCK_START"; + case ATNStateType::TOKEN_START: + return "TOKEN_START"; + case ATNStateType::RULE_STOP: + return "RULE_STOP"; + case ATNStateType::BLOCK_END: + return "BLOCK_END"; + case ATNStateType::STAR_LOOP_BACK: + return "STAR_LOOP_BACK"; + case ATNStateType::STAR_LOOP_ENTRY: + return "STAR_LOOP_ENTRY"; + case ATNStateType::PLUS_LOOP_BACK: + return "PLUS_LOOP_BACK"; + case ATNStateType::LOOP_END: + return "LOOP_END"; + } + return "UNKNOWN"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h new file mode 100644 index 0000000000..e19b2cce92 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNStateType.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <string> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + // Constants for ATNState serialization. + enum class ATNStateType : size_t { + INVALID = 0, + BASIC = 1, + RULE_START = 2, + BLOCK_START = 3, + PLUS_BLOCK_START = 4, + STAR_BLOCK_START = 5, + TOKEN_START = 6, + RULE_STOP = 7, + BLOCK_END = 8, + STAR_LOOP_BACK = 9, + STAR_LOOP_ENTRY = 10, + PLUS_LOOP_BACK = 11, + LOOP_END = 12, + }; + + ANTLR4CPP_PUBLIC std::string atnStateTypeName(ATNStateType atnStateType); + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h new file mode 100644 index 0000000000..3530ef6051 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ATNType.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// Represents the type of recognizer an ATN applies to. + enum class ATNType { + LEXER = 0, + PARSER = 1, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp new file mode 100644 index 0000000000..1886b7e169 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.cpp @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ActionTransition.h" + +using namespace antlr4::atn; + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex) + : Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(INVALID_INDEX), isCtxDependent(false) { +} + +ActionTransition::ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent) + : Transition(TransitionType::ACTION, target), ruleIndex(ruleIndex), actionIndex(actionIndex), isCtxDependent(isCtxDependent) { +} + +bool ActionTransition::isEpsilon() const { + return true; // we are to be ignored by analysis 'cept for predicates +} + +bool ActionTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string ActionTransition::toString() const { + return " ACTION " + Transition::toString() + " { ruleIndex: " + std::to_string(ruleIndex) + ", actionIndex: " + + std::to_string(actionIndex) + ", isCtxDependent: " + std::to_string(isCtxDependent) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h new file mode 100644 index 0000000000..1700297a78 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ActionTransition.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ActionTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ACTION; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const size_t ruleIndex; + const size_t actionIndex; + const bool isCtxDependent; // e.g., $i ref in action + + ActionTransition(ATNState *target, size_t ruleIndex); + + ActionTransition(ATNState *target, size_t ruleIndex, size_t actionIndex, bool isCtxDependent); + + virtual bool isEpsilon() const override; + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp new file mode 100644 index 0000000000..72ce922633 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/AmbiguityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +AmbiguityInfo::AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->ambigAlts = ambigAlts; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h new file mode 100644 index 0000000000..db594a1f48 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AmbiguityInfo.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for an ambiguity. + /// Ambiguities are decisions where a particular input resulted in an SLL + /// conflict, followed by LL prediction also reaching a conflict state + /// (indicating a true ambiguity in the grammar). + /// + /// <para> + /// This event may be reported during SLL prediction in cases where the + /// conflicting SLL configuration set provides sufficient information to + /// determine that the SLL conflict is truly an ambiguity. For example, if none + /// of the ATN configurations in the conflicting SLL configuration set have + /// traversed a global follow transition (i.e. + /// <seealso cref="ATNConfig#reachesIntoOuterContext"/> is 0 for all configurations), then + /// the result of SLL prediction for that input is known to be equivalent to the + /// result of LL prediction for that input.</para> + /// + /// <para> + /// In some cases, the minimum represented alternative in the conflicting LL + /// configuration set is not equal to the minimum represented alternative in the + /// conflicting SLL configuration set. Grammars and inputs which result in this + /// scenario are unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means + /// they cannot use the two-stage parsing strategy to improve parsing performance + /// for that input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportAmbiguity </seealso> + /// <seealso cref= ANTLRErrorListener#reportAmbiguity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC AmbiguityInfo : public DecisionEventInfo { + public: + /// The set of alternative numbers for this decision event that lead to a valid parse. + antlrcpp::BitSet ambigAlts; + + /// <summary> + /// Constructs a new instance of the <seealso cref="AmbiguityInfo"/> class with the + /// specified detailed ambiguity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set identifying the ambiguous + /// alternatives for the current input </param> + /// <param name="ambigAlts"> The set of alternatives in the decision that lead to a valid parse. + /// The predicted alt is the min(ambigAlts) </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the ambiguity was identified during + /// prediction </param> + /// <param name="fullCtx"> {@code true} if the ambiguity was identified during LL + /// prediction; otherwise, {@code false} if the ambiguity was identified + /// during SLL prediction </param> + AmbiguityInfo(size_t decision, ATNConfigSet *configs, const antlrcpp::BitSet &ambigAlts, TokenStream *input, + size_t startIndex, size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp new file mode 100644 index 0000000000..e9478001b4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.cpp @@ -0,0 +1,109 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ArrayPredictionContext.h" + +#include <cstring> + +#include "atn/SingletonPredictionContext.h" +#include "misc/MurmurHash.h" +#include "support/Casts.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + + bool predictionContextEqual(const Ref<const PredictionContext> &lhs, const Ref<const PredictionContext> &rhs) { + return *lhs == *rhs; + } + +} + +ArrayPredictionContext::ArrayPredictionContext(const SingletonPredictionContext &predictionContext) + : ArrayPredictionContext({ predictionContext.parent }, { predictionContext.returnState }) {} + +ArrayPredictionContext::ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, + std::vector<size_t> returnStates) + : PredictionContext(PredictionContextType::ARRAY), parents(std::move(parents)), returnStates(std::move(returnStates)) { + assert(this->parents.size() > 0); + assert(this->returnStates.size() > 0); + assert(this->parents.size() == this->returnStates.size()); +} + +bool ArrayPredictionContext::isEmpty() const { + // Since EMPTY_RETURN_STATE can only appear in the last position, we don't need to verify that size == 1. + return returnStates[0] == EMPTY_RETURN_STATE; +} + +size_t ArrayPredictionContext::size() const { + return returnStates.size(); +} + +const Ref<const PredictionContext>& ArrayPredictionContext::getParent(size_t index) const { + return parents[index]; +} + +size_t ArrayPredictionContext::getReturnState(size_t index) const { + return returnStates[index]; +} + +size_t ArrayPredictionContext::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getContextType())); + for (const auto &parent : parents) { + hash = MurmurHash::update(hash, parent); + } + for (const auto &returnState : returnStates) { + hash = MurmurHash::update(hash, returnState); + } + return MurmurHash::finish(hash, 1 + parents.size() + returnStates.size()); +} + +bool ArrayPredictionContext::equals(const PredictionContext &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const auto &array = downCast<const ArrayPredictionContext&>(other); + return returnStates.size() == array.returnStates.size() && + parents.size() == array.parents.size() && + cachedHashCodeEqual(cachedHashCode(), array.cachedHashCode()) && + std::memcmp(returnStates.data(), array.returnStates.data(), returnStates.size() * sizeof(decltype(returnStates)::value_type)) == 0 && + std::equal(parents.begin(), parents.end(), array.parents.begin(), predictionContextEqual); +} + +std::string ArrayPredictionContext::toString() const { + if (isEmpty()) { + return "[]"; + } + + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < returnStates.size(); i++) { + if (i > 0) { + ss << ", "; + } + if (returnStates[i] == EMPTY_RETURN_STATE) { + ss << "$"; + continue; + } + ss << returnStates[i]; + if (parents[i] != nullptr) { + ss << " " << parents[i]->toString(); + } else { + ss << "nul"; + } + } + ss << "]"; + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h new file mode 100644 index 0000000000..f43db98a01 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ArrayPredictionContext.h @@ -0,0 +1,51 @@ + +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class SingletonPredictionContext; + + class ANTLR4CPP_PUBLIC ArrayPredictionContext final : public PredictionContext { + public: + static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::ARRAY; } + + static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); } + + /// Parent can be empty only if full ctx mode and we make an array + /// from EMPTY and non-empty. We merge EMPTY by using null parent and + /// returnState == EMPTY_RETURN_STATE. + // Also here: we use a strong reference to our parents to avoid having them freed prematurely. + // See also SinglePredictionContext. + std::vector<Ref<const PredictionContext>> parents; + + /// Sorted for merge, no duplicates; if present, EMPTY_RETURN_STATE is always last. + std::vector<size_t> returnStates; + + explicit ArrayPredictionContext(const SingletonPredictionContext &predictionContext); + + ArrayPredictionContext(std::vector<Ref<const PredictionContext>> parents, std::vector<size_t> returnStates); + + ArrayPredictionContext(ArrayPredictionContext&&) = default; + + bool isEmpty() const override; + size_t size() const override; + const Ref<const PredictionContext>& getParent(size_t index) const override; + size_t getReturnState(size_t index) const override; + bool equals(const PredictionContext &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp new file mode 100644 index 0000000000..74153bf5cd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.cpp @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" +#include "atn/Transition.h" + +#include "atn/AtomTransition.h" + +using namespace antlr4::misc; +using namespace antlr4::atn; + +AtomTransition::AtomTransition(ATNState *target, size_t label) : Transition(TransitionType::ATOM, target), _label(label) { +} + +IntervalSet AtomTransition::label() const { + return IntervalSet::of((int)_label); +} + +bool AtomTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return _label == symbol; +} + +std::string AtomTransition::toString() const { + return "ATOM " + Transition::toString() + " { label: " + std::to_string(_label) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h new file mode 100644 index 0000000000..db62a7feab --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/AtomTransition.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// TODO: make all transitions sets? no, should remove set edges. + class ANTLR4CPP_PUBLIC AtomTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::ATOM; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + /// The token type or character value; or, signifies special label. + /// TODO: rename this to label + const size_t _label; + + AtomTransition(ATNState *target, size_t label); + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h new file mode 100644 index 0000000000..1c462ec0eb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicBlockStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BasicBlockStartState() : BlockStartState(ATNStateType::BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h new file mode 100644 index 0000000000..7f8a9ef0dd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BasicState.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC BasicState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BASIC; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BasicState() : ATNState(ATNStateType::BASIC) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h new file mode 100644 index 0000000000..11ef5499ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockEndState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Terminal node of a simple {@code (a|b|c)} block. + class ANTLR4CPP_PUBLIC BlockEndState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::BLOCK_END; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BlockStartState *startState = nullptr; + + BlockEndState() : ATNState(ATNStateType::BLOCK_END) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h new file mode 100644 index 0000000000..3475115894 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/BlockStartState.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The start of a regular {@code (...)} block. + class ANTLR4CPP_PUBLIC BlockStartState : public DecisionState { + public: + static bool is(const ATNState &atnState) { + const auto stateType = atnState.getStateType(); + return stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::STAR_BLOCK_START; + } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + BlockEndState *endState = nullptr; + + protected: + using DecisionState::DecisionState; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp new file mode 100644 index 0000000000..12442a9bc0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ContextSensitivityInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ContextSensitivityInfo::ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, + size_t startIndex, size_t stopIndex) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, true) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h new file mode 100644 index 0000000000..430ce3b6e8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ContextSensitivityInfo.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a context sensitivity. + /// Context sensitivities are decisions where a particular input resulted in an + /// SLL conflict, but LL prediction produced a single unique alternative. + /// + /// <para> + /// In some cases, the unique alternative identified by LL prediction is not + /// equal to the minimum represented alternative in the conflicting SLL + /// configuration set. Grammars and inputs which result in this scenario are + /// unable to use <seealso cref="PredictionMode#SLL"/>, which in turn means they cannot use + /// the two-stage parsing strategy to improve parsing performance for that + /// input.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#reportContextSensitivity </seealso> + /// <seealso cref= ANTLRErrorListener#reportContextSensitivity + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ContextSensitivityInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ContextSensitivityInfo"/> class + /// with the specified detailed context sensitivity information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the unique + /// alternative identified by full-context prediction </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the context sensitivity was + /// identified during full-context prediction </param> + ContextSensitivityInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp new file mode 100644 index 0000000000..bca6c778c0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.cpp @@ -0,0 +1,14 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +DecisionEventInfo::DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx) + : decision(decision), configs(configs), input(input), startIndex(startIndex), stopIndex(stopIndex), fullCtx(fullCtx) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h new file mode 100644 index 0000000000..af7f5f4b17 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionEventInfo.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This is the base class for gathering detailed information about prediction + /// events which occur during parsing. + /// + /// Note that we could record the parser call stack at the time this event + /// occurred but in the presence of left recursive rules, the stack is kind of + /// meaningless. It's better to look at the individual configurations for their + /// individual stacks. Of course that is a <seealso cref="PredictionContext"/> object + /// not a parse tree node and so it does not have information about the extent + /// (start...stop) of the various subtrees. Examining the stack tops of all + /// configurations provide the return states for the rule invocations. + /// From there you can get the enclosing rule. + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionEventInfo { + public: + /// <summary> + /// The invoked decision number which this event is related to. + /// </summary> + /// <seealso cref= ATN#decisionToState </seealso> + const size_t decision; + + /// <summary> + /// The configuration set containing additional information relevant to the + /// prediction state when the current event occurred, or {@code null} if no + /// additional information is relevant or available. + /// </summary> + const ATNConfigSet *configs; + + /// <summary> + /// The input token stream which is being parsed. + /// </summary> + const TokenStream *input; + + /// <summary> + /// The token index in the input stream at which the current prediction was + /// originally invoked. + /// </summary> + const size_t startIndex; + + /// <summary> + /// The token index in the input stream at which the current event occurred. + /// </summary> + const size_t stopIndex; + + /// <summary> + /// {@code true} if the current event occurred during LL prediction; + /// otherwise, {@code false} if the input occurred during SLL prediction. + /// </summary> + const bool fullCtx; + + DecisionEventInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp new file mode 100644 index 0000000000..ee9b1aac34 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ErrorInfo.h" +#include "atn/LookaheadEventInfo.h" + +#include "atn/DecisionInfo.h" + +using namespace antlr4::atn; + +DecisionInfo::DecisionInfo(size_t decision) : decision(decision) { +} + +std::string DecisionInfo::toString() const { + std::stringstream ss; + + ss << "{decision=" << decision << ", contextSensitivities=" << contextSensitivities.size() << ", errors="; + ss << errors.size() << ", ambiguities=" << ambiguities.size() << ", SLL_lookahead=" << SLL_TotalLook; + ss << ", SLL_ATNTransitions=" << SLL_ATNTransitions << ", SLL_DFATransitions=" << SLL_DFATransitions; + ss << ", LL_Fallback=" << LL_Fallback << ", LL_lookahead=" << LL_TotalLook << ", LL_ATNTransitions=" << LL_ATNTransitions << '}'; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h new file mode 100644 index 0000000000..2b43ad8be9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionInfo.h @@ -0,0 +1,227 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ContextSensitivityInfo.h" +#include "atn/AmbiguityInfo.h" +#include "atn/PredicateEvalInfo.h" +#include "atn/ErrorInfo.h" + +namespace antlr4 { +namespace atn { + + class LookaheadEventInfo; + + /// <summary> + /// This class contains profiling gathered for a particular decision. + /// + /// <para> + /// Parsing performance in ANTLR 4 is heavily influenced by both static factors + /// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the + /// choice of input and the state of the DFA cache at the time profiling + /// operations are started). For best results, gather and use aggregate + /// statistics from a large sample of inputs representing the inputs expected in + /// production before using the results to make changes in the grammar.</para> + /// + /// @since 4.3 + /// </summary> + class ANTLR4CPP_PUBLIC DecisionInfo { + public: + /// <summary> + /// The decision number, which is an index into <seealso cref="ATN#decisionToState"/>. + /// </summary> + const size_t decision; + + /// <summary> + /// The total number of times <seealso cref="ParserATNSimulator#adaptivePredict"/> was + /// invoked for this decision. + /// </summary> + long long invocations = 0; + + /// <summary> + /// The total time spent in <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision, in nanoseconds. + /// + /// <para> + /// The value of this field contains the sum of differential results obtained + /// by <seealso cref="System#nanoTime()"/>, and is not adjusted to compensate for JIT + /// and/or garbage collection overhead. For best accuracy, use a modern JVM + /// implementation that provides precise results from + /// <seealso cref="System#nanoTime()"/>, and perform profiling in a separate process + /// which is warmed up by parsing the input prior to profiling. If desired, + /// call <seealso cref="ATNSimulator#clearDFA"/> to reset the DFA cache to its initial + /// state before starting the profiling measurement pass.</para> + /// </summary> + long long timeInPrediction = 0; + + /// <summary> + /// The sum of the lookahead required for SLL prediction for this decision. + /// Note that SLL prediction is used before LL prediction for performance + /// reasons even when <seealso cref="PredictionMode#LL"/> or + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/> is used. + /// </summary> + long long SLL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + /// </summary> + long long SLL_MaxLook = 0; + + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#SLL_MaxLook"/> value was set. + Ref<LookaheadEventInfo> SLL_MaxLookEvent; + + /// <summary> + /// The sum of the lookahead required for LL prediction for this decision. + /// Note that LL prediction is only used when SLL prediction reaches a + /// conflict state. + /// </summary> + long long LL_TotalLook = 0; + + /// <summary> + /// Gets the minimum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MinLook = 0; + + /// <summary> + /// Gets the maximum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// <seealso cref="PredictionMode#LL"/>, an ambiguity state (for + /// <seealso cref="PredictionMode#LL_EXACT_AMBIG_DETECTION"/>, or a syntax error. + /// </summary> + long long LL_MaxLook = 0; + + /// <summary> + /// Gets the <seealso cref="LookaheadEventInfo"/> associated with the event where the + /// <seealso cref="#LL_MaxLook"/> value was set. + /// </summary> + Ref<LookaheadEventInfo> LL_MaxLookEvent; + + /// <summary> + /// A collection of <seealso cref="ContextSensitivityInfo"/> instances describing the + /// context sensitivities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= ContextSensitivityInfo </seealso> + std::vector<ContextSensitivityInfo> contextSensitivities; + + /// <summary> + /// A collection of <seealso cref="ErrorInfo"/> instances describing the parse errors + /// identified during calls to <seealso cref="ParserATNSimulator#adaptivePredict"/> for + /// this decision. + /// </summary> + /// <seealso cref= ErrorInfo </seealso> + std::vector<ErrorInfo> errors; + + /// <summary> + /// A collection of <seealso cref="AmbiguityInfo"/> instances describing the + /// ambiguities encountered during LL prediction for this decision. + /// </summary> + /// <seealso cref= AmbiguityInfo </seealso> + std::vector<AmbiguityInfo> ambiguities; + + /// <summary> + /// A collection of <seealso cref="PredicateEvalInfo"/> instances describing the + /// results of evaluating individual predicates during prediction for this + /// decision. + /// </summary> + /// <seealso cref= PredicateEvalInfo </seealso> + std::vector<PredicateEvalInfo> predicateEvals; + + /// <summary> + /// The total number of ATN transitions required during SLL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of SLL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the SLL parsing algorithm + /// will use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #SLL_ATNTransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long SLL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during SLL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for SLL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long SLL_DFATransitions = 0; + + /// <summary> + /// Gets the total number of times SLL prediction completed in a conflict + /// state, resulting in fallback to LL prediction. + /// + /// <para>Note that this value is not related to whether or not + /// <seealso cref="PredictionMode#SLL"/> may be used successfully with a particular + /// grammar. If the ambiguity resolution algorithm applied to the SLL + /// conflicts for this decision produce the same result as LL prediction for + /// this decision, <seealso cref="PredictionMode#SLL"/> would produce the same overall + /// parsing result as <seealso cref="PredictionMode#LL"/>.</para> + /// </summary> + long long LL_Fallback = 0; + + /// <summary> + /// The total number of ATN transitions required during LL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + /// <para> + /// If DFA caching of LL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the LL parsing algorithm will + /// use ATN transitions exclusively.</para> + /// </summary> + /// <seealso cref= #LL_DFATransitions </seealso> + /// <seealso cref= ParserATNSimulator#computeTargetState </seealso> + /// <seealso cref= LexerATNSimulator#computeTargetState </seealso> + long long LL_ATNTransitions = 0; + + /// <summary> + /// The total number of DFA transitions required during LL prediction for + /// this decision. + /// + /// <para>If the ATN simulator implementation does not use DFA caching for LL + /// transitions, this value will be 0.</para> + /// </summary> + /// <seealso cref= ParserATNSimulator#getExistingTargetState </seealso> + /// <seealso cref= LexerATNSimulator#getExistingTargetState </seealso> + long long LL_DFATransitions = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="DecisionInfo"/> class to contain + /// statistics for a particular decision. + /// </summary> + /// <param name="decision"> The decision number </param> + explicit DecisionInfo(size_t decision); + + std::string toString() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp new file mode 100644 index 0000000000..72adb210f5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/DecisionState.h" + +using namespace antlr4::atn; + +std::string DecisionState::toString() const { + return "DECISION " + ATNState::toString(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h new file mode 100644 index 0000000000..b7341ac6c9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/DecisionState.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC DecisionState : public ATNState { + public: + static bool is(const ATNState &atnState) { + const auto stateType = atnState.getStateType(); + return (stateType >= ATNStateType::BLOCK_START && stateType <= ATNStateType::TOKEN_START) || + stateType == ATNStateType::PLUS_LOOP_BACK || + stateType == ATNStateType::STAR_LOOP_ENTRY; + } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + int decision = -1; + bool nonGreedy = false; + + virtual std::string toString() const override; + + protected: + using ATNState::ATNState; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp new file mode 100644 index 0000000000..503fb1630e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/EpsilonTransition.h" + +using namespace antlr4::atn; + +EpsilonTransition::EpsilonTransition(ATNState *target) : EpsilonTransition(target, INVALID_INDEX) { +} + +EpsilonTransition::EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn) + : Transition(TransitionType::EPSILON, target), _outermostPrecedenceReturn(outermostPrecedenceReturn) { +} + +size_t EpsilonTransition::outermostPrecedenceReturn() const { + return _outermostPrecedenceReturn; +} + +bool EpsilonTransition::isEpsilon() const { + return true; +} + +bool EpsilonTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string EpsilonTransition::toString() const { + return "EPSILON " + Transition::toString() + " {}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h new file mode 100644 index 0000000000..21bc812822 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/EpsilonTransition.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC EpsilonTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::EPSILON; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + explicit EpsilonTransition(ATNState *target); + EpsilonTransition(ATNState *target, size_t outermostPrecedenceReturn); + + /** + * @return the rule index of a precedence rule for which this transition is + * returning from, where the precedence value is 0; otherwise, INVALID_INDEX. + * + * @see ATNConfig#isPrecedenceFilterSuppressed() + * @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + * @since 4.4.1 + */ + size_t outermostPrecedenceReturn() const; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + private: + const size_t _outermostPrecedenceReturn; // A rule index. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp new file mode 100644 index 0000000000..efe8507124 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" + +#include "atn/ErrorInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +ErrorInfo::ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h new file mode 100644 index 0000000000..d34642a195 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ErrorInfo.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for a syntax error + /// identified during prediction. Syntax errors occur when the prediction + /// algorithm is unable to identify an alternative which would lead to a + /// successful parse. + /// </summary> + /// <seealso cref= Parser#notifyErrorListeners(Token, String, RecognitionException) </seealso> + /// <seealso cref= ANTLRErrorListener#syntaxError + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC ErrorInfo : public DecisionEventInfo { + public: + /// <summary> + /// Constructs a new instance of the <seealso cref="ErrorInfo"/> class with the + /// specified detailed syntax error information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set reached during prediction + /// prior to reaching the <seealso cref="ATNSimulator#ERROR"/> state </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the syntax error was identified </param> + /// <param name="fullCtx"> {@code true} if the syntax error was identified during LL + /// prediction; otherwise, {@code false} if the syntax error was identified + /// during SLL prediction </param> + ErrorInfo(size_t decision, ATNConfigSet *configs, TokenStream *input, size_t startIndex, size_t stopIndex, + bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp new file mode 100644 index 0000000000..1d43697584 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.cpp @@ -0,0 +1,189 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/Transition.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/WildcardTransition.h" +#include "atn/NotSetTransition.h" +#include "misc/IntervalSet.h" +#include "atn/ATNConfig.h" + +#include "support/CPPUtils.h" + +#include "atn/LL1Analyzer.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + struct ATNConfigHasher final { + size_t operator()(const ATNConfig& atn_config) const { + return atn_config.hashCode(); + } + }; + + struct ATNConfigComparer final { + bool operator()(const ATNConfig& lhs, const ATNConfig& rhs) const { + return lhs == rhs; + } + }; + + class LL1AnalyzerImpl final { + public: + LL1AnalyzerImpl(const ATN& atn, misc::IntervalSet& look, bool seeThruPreds, bool addEOF) : _atn(atn), _look(look), _seeThruPreds(seeThruPreds), _addEOF(addEOF) {} + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and {@code stopState} or the end of the + /// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to + /// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + /// {@code true} and {@code stopState} or the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state. </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> The outer context, or {@code null} if the outer context should + /// not be used. </param> + /// <param name="look"> The result lookahead set. </param> + /// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN + /// from causing a stack overflow. Outside code should pass + /// {@code new HashSet<ATNConfig>} for this argument. </param> + /// <param name="calledRuleStack"> A set used for preventing left recursion in the + /// ATN from causing a stack overflow. Outside code should pass + /// {@code new BitSet()} for this argument. </param> + /// <param name="seeThruPreds"> {@code true} to true semantic predicates as + /// implicitly {@code true} and "see through them", otherwise {@code false} + /// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the + /// result if one is encountered. </param> + /// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the + /// outermost context is reached. This parameter has no effect if {@code ctx} + /// is {@code null}. </param> + void LOOK(ATNState *s, ATNState *stopState, Ref<const PredictionContext> const& ctx) { + if (!_lookBusy.insert(ATNConfig(s, 0, ctx)).second) { + return; + } + + // ml: s can never be null, hence no need to check if stopState is != null. + if (s == stopState) { + if (ctx == nullptr) { + _look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && _addEOF) { + _look.add(Token::EOF); + return; + } + } + + if (s->getStateType() == ATNStateType::RULE_STOP) { + if (ctx == nullptr) { + _look.add(Token::EPSILON); + return; + } else if (ctx->isEmpty() && _addEOF) { + _look.add(Token::EOF); + return; + } + + if (ctx != PredictionContext::EMPTY) { + bool removed = _calledRuleStack.test(s->ruleIndex); + _calledRuleStack[s->ruleIndex] = false; + // run thru all possible stack tops in ctx + for (size_t i = 0; i < ctx->size(); i++) { + ATNState *returnState = _atn.states[ctx->getReturnState(i)]; + LOOK(returnState, stopState, ctx->getParent(i)); + } + if (removed) { + _calledRuleStack.set(s->ruleIndex); + } + return; + } + } + + size_t n = s->transitions.size(); + for (size_t i = 0; i < n; i++) { + const Transition *t = s->transitions[i].get(); + const auto tType = t->getTransitionType(); + + if (tType == TransitionType::RULE) { + if (_calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex]) { + continue; + } + + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<const RuleTransition*>(t))->followState->stateNumber); + + _calledRuleStack.set((static_cast<const RuleTransition*>(t))->target->ruleIndex); + LOOK(t->target, stopState, newContext); + _calledRuleStack[(static_cast<const RuleTransition*>(t))->target->ruleIndex] = false; + + } else if (tType == TransitionType::PREDICATE || tType == TransitionType::PRECEDENCE) { + if (_seeThruPreds) { + LOOK(t->target, stopState, ctx); + } else { + _look.add(LL1Analyzer::HIT_PRED); + } + } else if (t->isEpsilon()) { + LOOK(t->target, stopState, ctx); + } else if (tType == TransitionType::WILDCARD) { + _look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } else { + misc::IntervalSet set = t->label(); + if (!set.isEmpty()) { + if (tType == TransitionType::NOT_SET) { + set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType))); + } + _look.addAll(set); + } + } + } + } + + private: + const ATN& _atn; + misc::IntervalSet& _look; + antlrcpp::BitSet _calledRuleStack; + std::unordered_set<ATNConfig, ATNConfigHasher, ATNConfigComparer> _lookBusy; + bool _seeThruPreds; + bool _addEOF; + }; + +} + +std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) const { + std::vector<misc::IntervalSet> look; + + if (s == nullptr) { + return look; + } + + look.resize(s->transitions.size()); // Fills all interval sets with defaults. + for (size_t alt = 0; alt < s->transitions.size(); alt++) { + LL1AnalyzerImpl impl(_atn, look[alt], false, false); + impl.LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY); + // Wipe out lookahead for this alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if (look[alt].size() == 0 || look[alt].contains(LL1Analyzer::HIT_PRED)) { + look[alt].clear(); + } + } + return look; +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, RuleContext *ctx) const { + return LOOK(s, nullptr, ctx); +} + +misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const { + Ref<const PredictionContext> lookContext = ctx != nullptr ? PredictionContext::fromRuleContext(_atn, ctx) : nullptr; + misc::IntervalSet r; + LL1AnalyzerImpl impl(_atn, r, true, true); + impl.LOOK(s, stopState, lookContext); + return r; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h new file mode 100644 index 0000000000..7d47c7610f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LL1Analyzer.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" +#include "atn/ATNConfig.h" +#include "atn/PredictionContext.h" +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LL1Analyzer final { + public: + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. + static constexpr size_t HIT_PRED = Token::INVALID_TYPE; + + explicit LL1Analyzer(const atn::ATN &atn) : _atn(atn) {} + + /// <summary> + /// Calculates the SLL(1) expected lookahead set for each outgoing transition + /// of an <seealso cref="ATNState"/>. The returned array has one element for each + /// outgoing transition in {@code s}. If the closure from transition + /// <em>i</em> leads to a semantic predicate before matching a symbol, the + /// element at index <em>i</em> of the result will be {@code null}. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <returns> the expected symbols for each outgoing transition of {@code s}. </returns> + std::vector<misc::IntervalSet> getDecisionLookahead(ATNState *s) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + misc::IntervalSet LOOK(ATNState *s, RuleContext *ctx) const; + + /// <summary> + /// Compute set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. + /// <p/> + /// If {@code ctx} is {@code null} and the end of the rule containing + /// {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to the result set. + /// If {@code ctx} is not {@code null} and the end of the outermost rule is + /// reached, <seealso cref="Token#EOF"/> is added to the result set. + /// </summary> + /// <param name="s"> the ATN state </param> + /// <param name="stopState"> the ATN state to stop at. This can be a + /// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param> + /// <param name="ctx"> the complete parser context, or {@code null} if the context + /// should be ignored + /// </param> + /// <returns> The set of tokens that can follow {@code s} in the ATN in the + /// specified {@code ctx}. </returns> + misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const; + + private: + const atn::ATN &_atn; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp new file mode 100644 index 0000000000..e70cfac2ca --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/DecisionState.h" +#include "atn/PredictionContext.h" +#include "SemanticContext.h" +#include "atn/LexerActionExecutor.h" + +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerATNConfig.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context) + : ATNConfig(state, alt, std::move(context)) {} + +LexerATNConfig::LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor) + : ATNConfig(state, alt, std::move(context)), _lexerActionExecutor(std::move(lexerActionExecutor)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state) + : ATNConfig(other, state), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor) + : ATNConfig(other, state), _lexerActionExecutor(std::move(lexerActionExecutor)), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +LexerATNConfig::LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context) + : ATNConfig(other, state, std::move(context)), _lexerActionExecutor(other._lexerActionExecutor), _passedThroughNonGreedyDecision(checkNonGreedyDecision(other, state)) {} + +size_t LexerATNConfig::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, alt); + hashCode = misc::MurmurHash::update(hashCode, context); + hashCode = misc::MurmurHash::update(hashCode, semanticContext); + hashCode = misc::MurmurHash::update(hashCode, _passedThroughNonGreedyDecision ? 1 : 0); + hashCode = misc::MurmurHash::update(hashCode, _lexerActionExecutor); + hashCode = misc::MurmurHash::finish(hashCode, 6); + return hashCode; +} + +bool LexerATNConfig::operator==(const LexerATNConfig& other) const +{ + if (this == &other) + return true; + + if (_passedThroughNonGreedyDecision != other._passedThroughNonGreedyDecision) + return false; + + if (_lexerActionExecutor == nullptr) + return other._lexerActionExecutor == nullptr; + if (*_lexerActionExecutor != *(other._lexerActionExecutor)) { + return false; + } + + return ATNConfig::operator==(other); +} + +bool LexerATNConfig::checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target) { + return source._passedThroughNonGreedyDecision || + (DecisionState::is(target) && downCast<DecisionState*>(target)->nonGreedy); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h new file mode 100644 index 0000000000..7d1d6b40e2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNConfig.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC LexerATNConfig final : public ATNConfig { + public: + LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context); + LexerATNConfig(ATNState *state, int alt, Ref<const PredictionContext> context, Ref<const LexerActionExecutor> lexerActionExecutor); + + LexerATNConfig(LexerATNConfig const& other, ATNState *state); + LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const LexerActionExecutor> lexerActionExecutor); + LexerATNConfig(LexerATNConfig const& other, ATNState *state, Ref<const PredictionContext> context); + + /** + * Gets the {@link LexerActionExecutor} capable of executing the embedded + * action(s) for the current configuration. + */ + const Ref<const LexerActionExecutor>& getLexerActionExecutor() const { return _lexerActionExecutor; } + bool hasPassedThroughNonGreedyDecision() const { return _passedThroughNonGreedyDecision; } + + virtual size_t hashCode() const override; + + bool operator==(const LexerATNConfig& other) const; + + private: + /** + * This is the backing field for {@link #getLexerActionExecutor}. + */ + const Ref<const LexerActionExecutor> _lexerActionExecutor; + const bool _passedThroughNonGreedyDecision = false; + + static bool checkNonGreedyDecision(LexerATNConfig const& source, ATNState *target); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp new file mode 100644 index 0000000000..ef1b1cf2f1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.cpp @@ -0,0 +1,617 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "IntStream.h" +#include "atn/OrderedATNConfigSet.h" +#include "Token.h" +#include "LexerNoViableAltException.h" +#include "atn/RuleStopState.h" +#include "atn/RuleTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/PredicateTransition.h" +#include "atn/ActionTransition.h" +#include "atn/TokensStartState.h" +#include "misc/Interval.h" +#include "dfa/DFA.h" +#include "Lexer.h" +#include "internal/Synchronization.h" + +#include "dfa/DFAState.h" +#include "atn/LexerATNConfig.h" +#include "atn/LexerActionExecutor.h" + +#include "atn/LexerATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_DFA 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +void LexerATNSimulator::SimState::reset() { + *this = SimState(); +} + +LexerATNSimulator::LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : LexerATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +LexerATNSimulator::LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) + : ATNSimulator(atn, sharedContextCache), _recog(recog), _decisionToDFA(decisionToDFA) { + InitializeInstanceFields(); +} + +void LexerATNSimulator::copyState(LexerATNSimulator *simulator) { + _charPositionInLine = simulator->_charPositionInLine; + _line = simulator->_line; + _mode = simulator->_mode; + _startIndex = simulator->_startIndex; +} + +size_t LexerATNSimulator::match(CharStream *input, size_t mode) { + _mode = mode; + ssize_t mark = input->mark(); + + auto onExit = finally([input, mark] { + input->release(mark); + }); + + _startIndex = input->index(); + _prevAccept.reset(); + const dfa::DFA &dfa = _decisionToDFA[mode]; + dfa::DFAState* s0; + { + SharedLock<SharedMutex> stateLock(atn._stateMutex); + s0 = dfa.s0; + } + if (s0 == nullptr) { + return matchATN(input); + } else { + return execATN(input, s0); + } +} + +void LexerATNSimulator::reset() { + _prevAccept.reset(); + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = Lexer::DEFAULT_MODE; +} + +void LexerATNSimulator::clearDFA() { + size_t size = _decisionToDFA.size(); + _decisionToDFA.clear(); + for (size_t d = 0; d < size; ++d) { + _decisionToDFA.emplace_back(atn.getDecisionState(d), d); + } +} + +size_t LexerATNSimulator::matchATN(CharStream *input) { + ATNState *startState = atn.modeToStartState[_mode]; + + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(input, startState); + + bool suppressEdge = s0_closure->hasSemanticContext; + s0_closure->hasSemanticContext = false; + + dfa::DFAState *next = addDFAState(s0_closure.release(), suppressEdge); + + size_t predict = execATN(input, next); + + return predict; +} + +size_t LexerATNSimulator::execATN(CharStream *input, dfa::DFAState *ds0) { + if (ds0->isAcceptState) { + // allow zero-length tokens + // ml: in Java code this method uses 3 params. The first is a member var of the class anyway (_prevAccept), so why pass it here? + captureSimState(input, ds0); + } + + size_t t = input->LA(1); + dfa::DFAState *s = ds0; // s is current/from DFA state + + while (true) { // while more work + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=null, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=null, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set; there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + dfa::DFAState *target = getExistingTargetState(s, t); + if (target == nullptr) { + target = computeTargetState(input, s, t); + } + + if (target == ERROR.get()) { + break; + } + + // If this is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if (t != Token::EOF) { + consume(input); + } + + if (target->isAcceptState) { + captureSimState(input, target); + if (t == Token::EOF) { + break; + } + } + + t = input->LA(1); + s = target; // flip; current DFA target becomes new src/from state + } + + return failOrAccept(input, s->configs.get(), t); +} + +dfa::DFAState *LexerATNSimulator::getExistingTargetState(dfa::DFAState *s, size_t t) { + dfa::DFAState* retval = nullptr; + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + if (t <= MAX_DFA_EDGE) { + auto iterator = s->edges.find(t - MIN_DFA_EDGE); +#if DEBUG_ATN == 1 + if (iterator != s->edges.end()) { + std::cout << std::string("reuse state ") << s->stateNumber << std::string(" edge to ") << iterator->second->stateNumber << std::endl; + } +#endif + + if (iterator != s->edges.end()) + retval = iterator->second; + } + return retval; +} + +dfa::DFAState *LexerATNSimulator::computeTargetState(CharStream *input, dfa::DFAState *s, size_t t) { + OrderedATNConfigSet *reach = new OrderedATNConfigSet(); /* mem-check: deleted on error or managed by new DFA state. */ + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + getReachableConfigSet(input, s->configs.get(), reach, t); + + if (reach->isEmpty()) { // we got nowhere on t from s + if (!reach->hasSemanticContext) { + // we got nowhere on t, don't throw out this knowledge; it'd + // cause a failover from DFA later. + addDFAEdge(s, t, ERROR.get()); + } + delete reach; + + // stop when we can't match any more char + return ERROR.get(); + } + + // Add an edge from s to target DFA found/created for reach + return addDFAEdge(s, t, reach); +} + +size_t LexerATNSimulator::failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t) { + if (_prevAccept.dfaState != nullptr) { + accept(input, _prevAccept.dfaState->lexerActionExecutor, _startIndex, _prevAccept.index, _prevAccept.line, _prevAccept.charPos); + return _prevAccept.dfaState->prediction; + } else { + // if no accept and EOF is first char, return EOF + if (t == Token::EOF && input->index() == _startIndex) { + return Token::EOF; + } + + throw LexerNoViableAltException(_recog, input, _startIndex, reach); + } +} + +void LexerATNSimulator::getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, ATNConfigSet *reach, size_t t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + size_t skipAlt = ATN::INVALID_ALT_NUMBER; + + for (const auto &c : closure_->configs) { + bool currentAltReachedAcceptState = c->alt == skipAlt; + if (currentAltReachedAcceptState && (std::static_pointer_cast<LexerATNConfig>(c))->hasPassedThroughNonGreedyDecision()) { + continue; + } + +#if DEBUG_ATN == 1 + std::cout << "testing " << getTokenName((int)t) << " at " << c->toString(true) << std::endl; +#endif + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + const Transition *trans = c->state->transitions[ti].get(); + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + auto lexerActionExecutor = downCast<const LexerATNConfig&>(*c).getLexerActionExecutor(); + if (lexerActionExecutor != nullptr) { + lexerActionExecutor = lexerActionExecutor->fixOffsetBeforeMatch((int)input->index() - (int)_startIndex); + } + + bool treatEofAsEpsilon = t == Token::EOF; + Ref<LexerATNConfig> config = std::make_shared<LexerATNConfig>(downCast<const LexerATNConfig&>(*c), + target, std::move(lexerActionExecutor)); + + if (closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c->alt; + break; + } + } + } + } +} + +void LexerATNSimulator::accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t /*startIndex*/, + size_t index, size_t line, size_t charPos) { +#if DEBUG_ATN == 1 + std::cout << "ACTION "; + std::cout << toString(lexerActionExecutor) << std::endl; +#endif + + // seek to after last char in token + input->seek(index); + _line = line; + _charPositionInLine = (int)charPos; + + if (lexerActionExecutor != nullptr && _recog != nullptr) { + lexerActionExecutor->execute(_recog, input, _startIndex); + } +} + +atn::ATNState *LexerATNSimulator::getReachableTarget(const Transition *trans, size_t t) { + if (trans->matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + return trans->target; + } + + return nullptr; +} + +std::unique_ptr<ATNConfigSet> LexerATNSimulator::computeStartState(CharStream *input, ATNState *p) { + Ref<const PredictionContext> initialContext = PredictionContext::EMPTY; // ml: the purpose of this assignment is unclear + std::unique_ptr<ATNConfigSet> configs(new OrderedATNConfigSet()); + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(target, (int)(i + 1), initialContext); + closure(input, c, configs.get(), false, false, false); + } + + return configs; +} + +bool LexerATNSimulator::closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon) { +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { +#if DEBUG_ATN == 1 + if (_recog != nullptr) { + std::cout << "closure at " << _recog->getRuleNames()[config->state->ruleIndex] << " rule stop " << config << std::endl; + } else { + std::cout << "closure at rule stop " << config << std::endl; + } +#endif + + if (config->context == nullptr || config->context->hasEmptyPath()) { + if (config->context == nullptr || config->context->isEmpty()) { + configs->add(config); + return true; + } else { + configs->add(std::make_shared<LexerATNConfig>(*config, config->state, PredictionContext::EMPTY)); + currentAltReachedAcceptState = true; + } + } + + if (config->context != nullptr && !config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE) { + Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<LexerATNConfig> c = std::make_shared<LexerATNConfig>(*config, returnState, newContext); + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + } + + return currentAltReachedAcceptState; + } + + // optimization + if (!config->state->epsilonOnlyTransitions) { + if (!currentAltReachedAcceptState || !config->hasPassedThroughNonGreedyDecision()) { + configs->add(config); + } + } + + ATNState *p = config->state; + for (size_t i = 0; i < p->transitions.size(); i++) { + const Transition *t = p->transitions[i].get(); + Ref<LexerATNConfig> c = getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon); + if (c != nullptr) { + currentAltReachedAcceptState = closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + + return currentAltReachedAcceptState; +} + +Ref<LexerATNConfig> LexerATNSimulator::getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon) { + + Ref<LexerATNConfig> c = nullptr; + switch (t->getTransitionType()) { + case TransitionType::RULE: { + const RuleTransition *ruleTransition = static_cast<const RuleTransition*>(t); + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, ruleTransition->followState->stateNumber); + c = std::make_shared<LexerATNConfig>(*config, t->target, newContext); + break; + } + + case TransitionType::PRECEDENCE: + throw UnsupportedOperationException("Precedence predicates are not supported in lexers."); + + case TransitionType::PREDICATE: { + /* Track traversing semantic predicates. If we traverse, + we cannot add a DFA state for this "reach" computation + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not used that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + const PredicateTransition *pt = static_cast<const PredicateTransition*>(t); + +#if DEBUG_ATN == 1 + std::cout << "EVAL rule " << pt->getRuleIndex() << ":" << pt->getPredIndex() << std::endl; +#endif + + configs->hasSemanticContext = true; + if (evaluatePredicate(input, pt->getRuleIndex(), pt->getPredIndex(), speculative)) { + c = std::make_shared<LexerATNConfig>(*config, t->target); + } + break; + } + + case TransitionType::ACTION: + if (config->context == nullptr|| config->context->hasEmptyPath()) { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty() is false. In this case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + auto lexerActionExecutor = LexerActionExecutor::append(config->getLexerActionExecutor(), + atn.lexerActions[static_cast<const ActionTransition *>(t)->actionIndex]); + c = std::make_shared<LexerATNConfig>(*config, t->target, std::move(lexerActionExecutor)); + break; + } + else { + // ignore actions in referenced rules + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + } + + case TransitionType::EPSILON: + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + + case TransitionType::ATOM: + case TransitionType::RANGE: + case TransitionType::SET: + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { + c = std::make_shared<LexerATNConfig>(*config, t->target); + break; + } + } + + break; + + default: // To silence the compiler. Other transition types are not used here. + break; + } + + return c; +} + +bool LexerATNSimulator::evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative) { + // assume true if no recognizer was provided + if (_recog == nullptr) { + return true; + } + + if (!speculative) { + return _recog->sempred(nullptr, ruleIndex, predIndex); + } + + size_t savedCharPositionInLine = _charPositionInLine; + size_t savedLine = _line; + size_t index = input->index(); + ssize_t marker = input->mark(); + + auto onExit = finally([this, input, savedCharPositionInLine, savedLine, index, marker] { + _charPositionInLine = savedCharPositionInLine; + _line = savedLine; + input->seek(index); + input->release(marker); + }); + + consume(input); + return _recog->sempred(nullptr, ruleIndex, predIndex); +} + +void LexerATNSimulator::captureSimState(CharStream *input, dfa::DFAState *dfaState) { + _prevAccept.index = input->index(); + _prevAccept.line = _line; + _prevAccept.charPos = _charPositionInLine; + _prevAccept.dfaState = dfaState; +} + +dfa::DFAState *LexerATNSimulator::addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q) { + /* leading to this call, ATNConfigSet.hasSemanticContext is used as a + * marker indicating dynamic predicate evaluation makes this edge + * dependent on the specific input sequence, so the static edge in the + * DFA should be omitted. The target DFAState is still created since + * execATN has the ability to resynchronize with the DFA state cache + * following the predicate evaluation step. + * + * TJP notes: next time through the DFA, we see a pred again and eval. + * If that gets us to a previously created (but dangling) DFA + * state, we can continue in pure DFA mode from there. + */ + bool suppressEdge = q->hasSemanticContext; + q->hasSemanticContext = false; + + dfa::DFAState *to = addDFAState(q); + + if (suppressEdge) { + return to; + } + + addDFAEdge(from, t, to); + return to; +} + +void LexerATNSimulator::addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q) { + if (/*t < MIN_DFA_EDGE ||*/ t > MAX_DFA_EDGE) { // MIN_DFA_EDGE is 0 + // Only track edges within the DFA bounds + return; + } + + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + p->edges[t - MIN_DFA_EDGE] = q; // connect +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs) { + return addDFAState(configs, true); +} + +dfa::DFAState *LexerATNSimulator::addDFAState(ATNConfigSet *configs, bool suppressEdge) { + /* the lexer evaluates predicates on-the-fly; by this point configs + * should not contain any configurations with unevaluated predicates. + */ + assert(!configs->hasSemanticContext); + + dfa::DFAState *proposed = new dfa::DFAState(std::unique_ptr<ATNConfigSet>(configs)); /* mem-check: managed by the DFA or deleted below */ + Ref<ATNConfig> firstConfigWithRuleStopState = nullptr; + for (const auto &c : configs->configs) { + if (RuleStopState::is(c->state)) { + firstConfigWithRuleStopState = c; + break; + } + } + + if (firstConfigWithRuleStopState != nullptr) { + proposed->isAcceptState = true; + proposed->lexerActionExecutor = downCast<const LexerATNConfig&>(*firstConfigWithRuleStopState).getLexerActionExecutor(); + proposed->prediction = atn.ruleToTokenType[firstConfigWithRuleStopState->state->ruleIndex]; + } + + dfa::DFA &dfa = _decisionToDFA[_mode]; + + { + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + auto [existing, inserted] = dfa.states.insert(proposed); + if (!inserted) { + delete proposed; + proposed = *existing; + } else { + // Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`, + // since we already inserted we need to subtract one. + proposed->stateNumber = static_cast<int>(dfa.states.size() - 1); + proposed->configs->setReadonly(true); + } + if (!suppressEdge) { + dfa.s0 = proposed; + } + } + + return proposed; +} + +dfa::DFA& LexerATNSimulator::getDFA(size_t mode) { + return _decisionToDFA[mode]; +} + +std::string LexerATNSimulator::getText(CharStream *input) { + // index is first lookahead char, don't include. + return input->getText(misc::Interval(_startIndex, input->index() - 1)); +} + +size_t LexerATNSimulator::getLine() const { + return _line; +} + +void LexerATNSimulator::setLine(size_t line) { + _line = line; +} + +size_t LexerATNSimulator::getCharPositionInLine() { + return _charPositionInLine; +} + +void LexerATNSimulator::setCharPositionInLine(size_t charPositionInLine) { + _charPositionInLine = charPositionInLine; +} + +void LexerATNSimulator::consume(CharStream *input) { + size_t curChar = input->LA(1); + if (curChar == '\n') { + _line++; + _charPositionInLine = 0; + } else { + _charPositionInLine++; + } + input->consume(); +} + +std::string LexerATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + return std::string("'") + static_cast<char>(t) + std::string("'"); +} + +void LexerATNSimulator::InitializeInstanceFields() { + _startIndex = 0; + _line = 1; + _charPositionInLine = 0; + _mode = antlr4::Lexer::DEFAULT_MODE; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h new file mode 100644 index 0000000000..304430b04d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h @@ -0,0 +1,199 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <atomic> + +#include "atn/ATNSimulator.h" +#include "atn/LexerATNConfig.h" +#include "atn/ATNConfigSet.h" + +namespace antlr4 { +namespace atn { + + /// "dup" of ParserInterpreter + class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { + protected: + struct ANTLR4CPP_PUBLIC SimState final { + size_t index = INVALID_INDEX; + size_t line = 0; + size_t charPos = INVALID_INDEX; + dfa::DFAState *dfaState = nullptr; + + void reset(); + }; + + public: + static constexpr size_t MIN_DFA_EDGE = 0; + static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN + + protected: + /// <summary> + /// When we hit an accept state in either the DFA or the ATN, we + /// have to notify the character stream to start buffering characters + /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state + /// includes the current index into the input, the current line, + /// and current character position in that line. Note that the Lexer is + /// tracking the starting line and characterization of the token. These + /// variables track the "state" of the simulator when it hits an accept state. + /// <p/> + /// We track these variables separately for the DFA and ATN simulation + /// because the DFA simulation often has to fail over to the ATN + /// simulation. If the ATN simulation fails, we need the DFA to fall + /// back to its previously accepted state, if any. If the ATN succeeds, + /// then the ATN does the accept and the DFA simulator that invoked it + /// can simply return the predicted token type. + /// </summary> + Lexer *const _recog; + + /// The current token's starting index into the character stream. + /// Shared across DFA to ATN simulation in case the ATN fails and the + /// DFA did not have a previous accept state. In this case, we use the + /// ATN-generated exception object. + size_t _startIndex; + + /// line number 1..n within the input. + size_t _line; + + /// The index of the character relative to the beginning of the line 0..n-1. + size_t _charPositionInLine; + + public: + std::vector<dfa::DFA> &_decisionToDFA; + + protected: + size_t _mode; + + /// Used during DFA/ATN exec to record the most recent accept configuration info. + SimState _prevAccept; + + public: + LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache); + virtual ~LexerATNSimulator() = default; + + virtual void copyState(LexerATNSimulator *simulator); + virtual size_t match(CharStream *input, size_t mode); + virtual void reset() override; + + virtual void clearDFA() override; + + protected: + virtual size_t matchATN(CharStream *input); + virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="input"> The input stream </param> + /// <param name="s"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); + + virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); + + /// <summary> + /// Given a starting configuration set, figure out all ATN configurations + /// we can reach upon input {@code t}. Parameter {@code reach} is a return + /// parameter. + /// </summary> + void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already + ATNConfigSet *reach, size_t t); + + virtual void accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index, + size_t line, size_t charPos); + + virtual ATNState *getReachableTarget(const Transition *trans, size_t t); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p); + + /// <summary> + /// Since the alternatives within any lexer decision are ordered by + /// preference, this method stops pursuing the closure as soon as an accept + /// state is reached. After the first accept state is reached by depth-first + /// search from {@code config}, all other (potentially reachable) states for + /// this rule would have a lower priority. + /// </summary> + /// <returns> {@code true} if an accept state is reached, otherwise + /// {@code false}. </returns> + virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs, + bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); + + // side-effect: can alter configs.hasSemanticContext + virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t, + ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); + + /// <summary> + /// Evaluate a predicate specified in the lexer. + /// <p/> + /// If {@code speculative} is {@code true}, this method was called before + /// <seealso cref="#consume"/> for the matched character. This method should call + /// <seealso cref="#consume"/> before evaluating the predicate to ensure position + /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>, + /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current + /// lexer state. This method should restore {@code input} and the simulator + /// to the original state before returning (i.e. undo the actions made by the + /// call to <seealso cref="#consume"/>. + /// </summary> + /// <param name="input"> The input stream. </param> + /// <param name="ruleIndex"> The rule containing the predicate. </param> + /// <param name="predIndex"> The index of the predicate within the rule. </param> + /// <param name="speculative"> {@code true} if the current index in {@code input} is + /// one character before the predicate's location. + /// </param> + /// <returns> {@code true} if the specified predicate evaluates to + /// {@code true}. </returns> + virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); + + virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); + virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); + virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); + + /// <summary> + /// Add a new DFA state if there isn't one with this set of + /// configurations already. This method also detects the first + /// configuration containing an ATN rule stop state. Later, when + /// traversing the DFA, we will know which rule to accept. + /// </summary> + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); + + virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge); + + public: + dfa::DFA& getDFA(size_t mode); + + /// Get the text matched so far for the current token. + virtual std::string getText(CharStream *input); + virtual size_t getLine() const; + virtual void setLine(size_t line); + virtual size_t getCharPositionInLine(); + virtual void setCharPositionInLine(size_t charPositionInLine); + virtual void consume(CharStream *input); + virtual std::string getTokenName(size_t t); + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp new file mode 100644 index 0000000000..a9d9a6771b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.cpp @@ -0,0 +1,15 @@ +#include "LexerAction.h" + +using namespace antlr4::atn; + +size_t LexerAction::hashCode() const { + auto hash = cachedHashCode(); + if (hash == 0) { + hash = hashCodeImpl(); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h new file mode 100644 index 0000000000..5c30a89608 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerAction.h @@ -0,0 +1,100 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents a single action which can be executed following the successful + /// match of a lexer rule. Lexer actions are used for both embedded action syntax + /// and ANTLR 4's new lexer command syntax. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerAction { + public: + virtual ~LexerAction() = default; + + /// <summary> + /// Gets the serialization type of the lexer action. + /// </summary> + /// <returns> The serialization type of the lexer action. </returns> + /// + /// IMPORTANT: Unlike Java, this returns LexerActionType::INDEXED_CUSTOM for instances of + /// LexerIndexedCustomAction. If you need the wrapped action type, use + /// LexerIndexedCustomAction::getAction()->getActionType(). + LexerActionType getActionType() const { return _actionType; } + + /// <summary> + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the <seealso cref="CharStream"/> + /// index at the time the action is executed. + /// + /// <para>Many lexer commands, including {@code type}, {@code skip}, and + /// {@code more}, do not check the input index during their execution. + /// Actions like this are position-independent, and may be stored more + /// efficiently as part of the <seealso cref="LexerATNConfig#lexerActionExecutor"/>.</para> + /// </summary> + /// <returns> {@code true} if the lexer action semantics can be affected by the + /// position of the input <seealso cref="CharStream"/> at the time it is executed; + /// otherwise, {@code false}. </returns> + bool isPositionDependent() const { return _positionDependent; } + + /// <summary> + /// Execute the lexer action in the context of the specified <seealso cref="Lexer"/>. + /// + /// <para>For position-dependent actions, the input stream must already be + /// positioned correctly prior to calling this method.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + virtual void execute(Lexer *lexer) const = 0; + + size_t hashCode() const; + + virtual bool equals(const LexerAction &other) const = 0; + + virtual std::string toString() const = 0; + + protected: + LexerAction(LexerActionType actionType, bool positionDependent) + : _actionType(actionType), _hashCode(0), _positionDependent(positionDependent) {} + + virtual size_t hashCodeImpl() const = 0; + + size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); } + + private: + const LexerActionType _actionType; + mutable std::atomic<size_t> _hashCode; + const bool _positionDependent; + }; + + inline bool operator==(const LexerAction &lhs, const LexerAction &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const LexerAction &lhs, const LexerAction &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::LexerAction> { + size_t operator()(const ::antlr4::atn::LexerAction &lexerAction) const { + return lexerAction.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp new file mode 100644 index 0000000000..490351b892 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.cpp @@ -0,0 +1,111 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "atn/LexerIndexedCustomAction.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" +#include "support/Casts.h" + +#include "atn/LexerActionExecutor.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + + bool lexerActionEqual(const Ref<const LexerAction> &lhs, const Ref<const LexerAction> &rhs) { + return *lhs == *rhs; + } + +} + +LexerActionExecutor::LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions) + : _lexerActions(std::move(lexerActions)), _hashCode(0) {} + +Ref<const LexerActionExecutor> LexerActionExecutor::append(const Ref<const LexerActionExecutor> &lexerActionExecutor, + Ref<const LexerAction> lexerAction) { + if (lexerActionExecutor == nullptr) { + return std::make_shared<LexerActionExecutor>(std::vector<Ref<const LexerAction>>{ std::move(lexerAction) }); + } + std::vector<Ref<const LexerAction>> lexerActions; + lexerActions.reserve(lexerActionExecutor->_lexerActions.size() + 1); + lexerActions.insert(lexerActions.begin(), lexerActionExecutor->_lexerActions.begin(), lexerActionExecutor->_lexerActions.end()); + lexerActions.push_back(std::move(lexerAction)); + return std::make_shared<LexerActionExecutor>(std::move(lexerActions)); +} + +Ref<const LexerActionExecutor> LexerActionExecutor::fixOffsetBeforeMatch(int offset) const { + std::vector<Ref<const LexerAction>> updatedLexerActions; + for (size_t i = 0; i < _lexerActions.size(); i++) { + if (_lexerActions[i]->isPositionDependent() && !LexerIndexedCustomAction::is(*_lexerActions[i])) { + if (updatedLexerActions.empty()) { + updatedLexerActions = _lexerActions; // Make a copy. + } + updatedLexerActions[i] = std::make_shared<LexerIndexedCustomAction>(offset, _lexerActions[i]); + } + } + if (updatedLexerActions.empty()) { + return shared_from_this(); + } + return std::make_shared<LexerActionExecutor>(std::move(updatedLexerActions)); +} + +const std::vector<Ref<const LexerAction>>& LexerActionExecutor::getLexerActions() const { + return _lexerActions; +} + +void LexerActionExecutor::execute(Lexer *lexer, CharStream *input, size_t startIndex) const { + bool requiresSeek = false; + size_t stopIndex = input->index(); + + auto onExit = finally([requiresSeek, input, stopIndex]() { + if (requiresSeek) { + input->seek(stopIndex); + } + }); + for (const auto &lexerAction : _lexerActions) { + if (LexerIndexedCustomAction::is(*lexerAction)) { + int offset = downCast<const LexerIndexedCustomAction&>(*lexerAction).getOffset(); + input->seek(startIndex + offset); + requiresSeek = (startIndex + offset) != stopIndex; + } else if (lexerAction->isPositionDependent()) { + input->seek(stopIndex); + requiresSeek = false; + } + lexerAction->execute(lexer); + } +} + +size_t LexerActionExecutor::hashCode() const { + auto hash = _hashCode.load(std::memory_order_relaxed); + if (hash == 0) { + hash = MurmurHash::initialize(); + for (const auto &lexerAction : _lexerActions) { + hash = MurmurHash::update(hash, lexerAction); + } + hash = MurmurHash::finish(hash, _lexerActions.size()); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} + +bool LexerActionExecutor::equals(const LexerActionExecutor &other) const { + if (this == std::addressof(other)) { + return true; + } + return cachedHashCodeEqual(_hashCode.load(std::memory_order_relaxed), other._hashCode.load(std::memory_order_relaxed)) && + _lexerActions.size() == other._lexerActions.size() && + std::equal(_lexerActions.begin(), _lexerActions.end(), other._lexerActions.begin(), lexerActionEqual); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h new file mode 100644 index 0000000000..28bb1e28ec --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Represents an executor for a sequence of lexer actions which traversed during + /// the matching operation of a lexer rule (token). + /// + /// <para>The executor tracks position information for position-dependent lexer actions + /// efficiently, ensuring that actions appearing only at the end of the rule do + /// not cause bloating of the <seealso cref="DFA"/> created for the lexer.</para> + class ANTLR4CPP_PUBLIC LexerActionExecutor final : public std::enable_shared_from_this<LexerActionExecutor> { + public: + /// <summary> + /// Constructs an executor for a sequence of <seealso cref="LexerAction"/> actions. </summary> + /// <param name="lexerActions"> The lexer actions to execute. </param> + explicit LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which executes the actions for + /// the input {@code lexerActionExecutor} followed by a specified + /// {@code lexerAction}. + /// </summary> + /// <param name="lexerActionExecutor"> The executor for actions already traversed by + /// the lexer while matching a token within a particular + /// <seealso cref="LexerATNConfig"/>. If this is {@code null}, the method behaves as + /// though it were an empty executor. </param> + /// <param name="lexerAction"> The lexer action to execute after the actions + /// specified in {@code lexerActionExecutor}. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> for executing the combine actions + /// of {@code lexerActionExecutor} and {@code lexerAction}. </returns> + static Ref<const LexerActionExecutor> append(const Ref<const LexerActionExecutor> &lexerActionExecutor, + Ref<const LexerAction> lexerAction); + + /// <summary> + /// Creates a <seealso cref="LexerActionExecutor"/> which encodes the current offset + /// for position-dependent lexer actions. + /// + /// <para>Normally, when the executor encounters lexer actions where + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}, it calls + /// <seealso cref="IntStream#seek"/> on the input <seealso cref="CharStream"/> to set the input + /// position to the <em>end</em> of the current token. This behavior provides + /// for efficient DFA representation of lexer actions which appear at the end + /// of a lexer rule, even when the lexer rule matches a variable number of + /// characters.</para> + /// + /// <para>Prior to traversing a match transition in the ATN, the current offset + /// from the token start index is assigned to all position-dependent lexer + /// actions which have not already been assigned a fixed offset. By storing + /// the offsets relative to the token start index, the DFA representation of + /// lexer actions which appear in the middle of tokens remains efficient due + /// to sharing among tokens of the same length, regardless of their absolute + /// position in the input stream.</para> + /// + /// <para>If the current executor already has offsets assigned to all + /// position-dependent lexer actions, the method returns {@code this}.</para> + /// </summary> + /// <param name="offset"> The current offset to assign to all position-dependent + /// lexer actions which do not already have offsets assigned. + /// </param> + /// <returns> A <seealso cref="LexerActionExecutor"/> which stores input stream offsets + /// for all position-dependent lexer actions. </returns> + Ref<const LexerActionExecutor> fixOffsetBeforeMatch(int offset) const; + + /// <summary> + /// Gets the lexer actions to be executed by this executor. </summary> + /// <returns> The lexer actions to be executed by this executor. </returns> + const std::vector<Ref<const LexerAction>>& getLexerActions() const; + + /// <summary> + /// Execute the actions encapsulated by this executor within the context of a + /// particular <seealso cref="Lexer"/>. + /// + /// <para>This method calls <seealso cref="IntStream#seek"/> to set the position of the + /// {@code input} <seealso cref="CharStream"/> prior to calling + /// <seealso cref="LexerAction#execute"/> on a position-dependent action. Before the + /// method returns, the input position will be restored to the same position + /// it was in when the method was invoked.</para> + /// </summary> + /// <param name="lexer"> The lexer instance. </param> + /// <param name="input"> The input stream which is the source for the current token. + /// When this method is called, the current <seealso cref="IntStream#index"/> for + /// {@code input} should be the start of the following token, i.e. 1 + /// character past the end of the current token. </param> + /// <param name="startIndex"> The token start index. This value may be passed to + /// <seealso cref="IntStream#seek"/> to set the {@code input} position to the beginning + /// of the token. </param> + void execute(Lexer *lexer, CharStream *input, size_t startIndex) const; + + size_t hashCode() const; + + bool equals(const LexerActionExecutor &other) const; + + private: + const std::vector<Ref<const LexerAction>> _lexerActions; + mutable std::atomic<size_t> _hashCode; + }; + + inline bool operator==(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::LexerActionExecutor> { + size_t operator()(const ::antlr4::atn::LexerActionExecutor &lexerActionExecutor) const { + return lexerActionExecutor.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h new file mode 100644 index 0000000000..aab4033415 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionType.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Represents the serialization type of a <seealso cref="LexerAction"/>. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + enum class LexerActionType : size_t { + /// <summary> + /// The type of a <seealso cref="LexerChannelAction"/> action. + /// </summary> + CHANNEL = 0, + /// <summary> + /// The type of a <seealso cref="LexerCustomAction"/> action. + /// </summary> + CUSTOM, + /// <summary> + /// The type of a <seealso cref="LexerModeAction"/> action. + /// </summary> + MODE, + /// <summary> + /// The type of a <seealso cref="LexerMoreAction"/> action. + /// </summary> + MORE, + /// <summary> + /// The type of a <seealso cref="LexerPopModeAction"/> action. + /// </summary> + POP_MODE, + /// <summary> + /// The type of a <seealso cref="LexerPushModeAction"/> action. + /// </summary> + PUSH_MODE, + /// <summary> + /// The type of a <seealso cref="LexerSkipAction"/> action. + /// </summary> + SKIP, + /// <summary> + /// The type of a <seealso cref="LexerTypeAction"/> action. + /// </summary> + TYPE, + + INDEXED_CUSTOM, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp new file mode 100644 index 0000000000..b6cda6cff0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerChannelAction.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerChannelAction::LexerChannelAction(int channel) + : LexerAction(LexerActionType::CHANNEL, false), _channel(channel) {} + +void LexerChannelAction::execute(Lexer *lexer) const { + lexer->setChannel(getChannel()); +} + +size_t LexerChannelAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getChannel()); + return MurmurHash::finish(hash, 2); +} + +bool LexerChannelAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerChannelAction&>(other); + return getChannel() == lexerAction.getChannel(); +} + +std::string LexerChannelAction::toString() const { + return "channel(" + std::to_string(getChannel()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h new file mode 100644 index 0000000000..1a5c53efef --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerChannelAction.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + using antlr4::Lexer; + + /// <summary> + /// Implements the {@code channel} lexer action by calling + /// <seealso cref="Lexer#setChannel"/> with the assigned channel. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerChannelAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CHANNEL; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code channel} action with the specified channel value. </summary> + /// <param name="channel"> The channel value to pass to <seealso cref="Lexer#setChannel"/>. </param> + explicit LexerChannelAction(int channel); + + /// <summary> + /// Gets the channel to use for the <seealso cref="Token"/> created by the lexer. + /// </summary> + /// <returns> The channel to use for the <seealso cref="Token"/> created by the lexer. </returns> + int getChannel() const { return _channel; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setChannel"/> with the + /// value provided by <seealso cref="#getChannel"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _channel; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp new file mode 100644 index 0000000000..b6edd89ea1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.cpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerCustomAction::LexerCustomAction(size_t ruleIndex, size_t actionIndex) + : LexerAction(LexerActionType::CUSTOM, true), _ruleIndex(ruleIndex), _actionIndex(actionIndex) {} + +void LexerCustomAction::execute(Lexer *lexer) const { + lexer->action(nullptr, getRuleIndex(), getActionIndex()); +} + +size_t LexerCustomAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getRuleIndex()); + hash = MurmurHash::update(hash, getActionIndex()); + return MurmurHash::finish(hash, 3); +} + +bool LexerCustomAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerCustomAction&>(other); + return getRuleIndex() == lexerAction.getRuleIndex() && getActionIndex() == lexerAction.getActionIndex(); +} + +std::string LexerCustomAction::toString() const { + return "custom(" + std::to_string(getRuleIndex()) + ", " + std::to_string(getActionIndex()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h new file mode 100644 index 0000000000..7973271c62 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerCustomAction.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Executes a custom lexer action by calling <seealso cref="Recognizer#action"/> with the + /// rule and action indexes assigned to the custom action. The implementation of + /// a custom action is added to the generated code for the lexer in an override + /// of <seealso cref="Recognizer#action"/> when the grammar is compiled. + /// + /// <para>This class may represent embedded actions created with the <code>{...}</code> + /// syntax in ANTLR 4, as well as actions created for lexer commands where the + /// command argument could not be evaluated when the grammar was compiled.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerCustomAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::CUSTOM; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a custom lexer action with the specified rule and action + /// indexes. + /// </summary> + /// <param name="ruleIndex"> The rule index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + /// <param name="actionIndex"> The action index to use for calls to + /// <seealso cref="Recognizer#action"/>. </param> + LexerCustomAction(size_t ruleIndex, size_t actionIndex); + + /// <summary> + /// Gets the rule index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The rule index for the custom action. </returns> + size_t getRuleIndex() const { return _ruleIndex; } + + /// <summary> + /// Gets the action index to use for calls to <seealso cref="Recognizer#action"/>. + /// </summary> + /// <returns> The action index for the custom action. </returns> + size_t getActionIndex() const { return _actionIndex; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>Custom actions are implemented by calling <seealso cref="Lexer#action"/> with the + /// appropriate rule and action indexes.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const size_t _ruleIndex; + const size_t _actionIndex; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp new file mode 100644 index 0000000000..114863702c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/LexerIndexedCustomAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + +} + +LexerIndexedCustomAction::LexerIndexedCustomAction(int offset, Ref<const LexerAction> action) + : LexerAction(LexerActionType::INDEXED_CUSTOM, true), _action(std::move(action)), _offset(offset) {} + +void LexerIndexedCustomAction::execute(Lexer *lexer) const { + // assume the input stream position was properly set by the calling code + getAction()->execute(lexer); +} + +size_t LexerIndexedCustomAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getOffset()); + hash = MurmurHash::update(hash, getAction()); + return MurmurHash::finish(hash, 3); +} + +bool LexerIndexedCustomAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerIndexedCustomAction&>(other); + return getOffset() == lexerAction.getOffset() && + cachedHashCodeEqual(cachedHashCode(), lexerAction.cachedHashCode()) && + *getAction() == *lexerAction.getAction(); +} + +std::string LexerIndexedCustomAction::toString() const { + return "indexedCustom(" + std::to_string(getOffset()) + ", " + getAction()->toString() + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h new file mode 100644 index 0000000000..5693bac62b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerIndexedCustomAction.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RuleContext.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This implementation of <seealso cref="LexerAction"/> is used for tracking input offsets + /// for position-dependent actions within a <seealso cref="LexerActionExecutor"/>. + /// + /// <para>This action is not serialized as part of the ATN, and is only required for + /// position-dependent lexer actions which appear at a location other than the + /// end of a rule. For more information about DFA optimizations employed for + /// lexer actions, see <seealso cref="LexerActionExecutor#append"/> and + /// <seealso cref="LexerActionExecutor#fixOffsetBeforeMatch"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerIndexedCustomAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::INDEXED_CUSTOM; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new indexed custom action by associating a character offset + /// with a <seealso cref="LexerAction"/>. + /// + /// <para>Note: This class is only required for lexer actions for which + /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}.</para> + /// </summary> + /// <param name="offset"> The offset into the input <seealso cref="CharStream"/>, relative to + /// the token start index, at which the specified lexer action should be + /// executed. </param> + /// <param name="action"> The lexer action to execute at a particular offset in the + /// input <seealso cref="CharStream"/>. </param> + LexerIndexedCustomAction(int offset, Ref<const LexerAction> action); + + /// <summary> + /// Gets the location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. The value is interpreted as an offset relative + /// to the token start index. + /// </summary> + /// <returns> The location in the input <seealso cref="CharStream"/> at which the lexer + /// action should be executed. </returns> + int getOffset() const { return _offset; } + + /// <summary> + /// Gets the lexer action to execute. + /// </summary> + /// <returns> A <seealso cref="LexerAction"/> object which executes the lexer action. </returns> + const Ref<const LexerAction>& getAction() const { return _action; } + + void execute(Lexer *lexer) const override; + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const Ref<const LexerAction> _action; + const int _offset; + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp new file mode 100644 index 0000000000..a4ca3b3d79 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerModeAction::LexerModeAction(int mode) : LexerAction(LexerActionType::MODE, false), _mode(mode) {} + +void LexerModeAction::execute(Lexer *lexer) const { + lexer->setMode(getMode()); +} + +size_t LexerModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getMode()); + return MurmurHash::finish(hash, 2); +} + +bool LexerModeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerModeAction&>(other); + return getMode() == lexerAction.getMode(); +} + +std::string LexerModeAction::toString() const { + return "mode(" + std::to_string(getMode()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h new file mode 100644 index 0000000000..6fa61a2e67 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code mode} lexer action by calling <seealso cref="Lexer#mode"/> with + /// the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code mode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#mode"/>. </param> + explicit LexerModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code mode} command. </returns> + int getMode() const { return _mode; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#mode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp new file mode 100644 index 0000000000..30df87b7b6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerMoreAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerMoreAction>& LexerMoreAction::getInstance() { + static const Ref<const LexerMoreAction> instance(new LexerMoreAction()); + return instance; +} + +void LexerMoreAction::execute(Lexer *lexer) const { + lexer->more(); +} + +size_t LexerMoreAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerMoreAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerMoreAction::toString() const { + return "more"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h new file mode 100644 index 0000000000..fc4b8fcbfc --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerMoreAction.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code more} lexer action by calling <seealso cref="Lexer#more"/>. + /// + /// <para>The {@code more} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerMoreAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::MORE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<const LexerMoreAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#more"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code more} command. + LexerMoreAction() : LexerAction(LexerActionType::MORE, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp new file mode 100644 index 0000000000..5192049348 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerPopModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerPopModeAction>& LexerPopModeAction::getInstance() { + static const Ref<const LexerPopModeAction> instance(new LexerPopModeAction()); + return instance; +} + +void LexerPopModeAction::execute(Lexer *lexer) const { + lexer->popMode(); +} + +size_t LexerPopModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerPopModeAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerPopModeAction::toString() const { + return "popMode"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h new file mode 100644 index 0000000000..8d712cad8c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPopModeAction.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code popMode} lexer action by calling <seealso cref="Lexer#popMode"/>. + /// + /// <para>The {@code popMode} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPopModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::POP_MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Provides a singleton instance of this parameterless lexer action. + /// </summary> + static const Ref<const LexerPopModeAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#popMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code popMode} command. + LexerPopModeAction() : LexerAction(LexerActionType::POP_MODE, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp new file mode 100644 index 0000000000..3ebd21fab2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerPushModeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerPushModeAction::LexerPushModeAction(int mode) : LexerAction(LexerActionType::PUSH_MODE, false), _mode(mode) {} + +void LexerPushModeAction::execute(Lexer *lexer) const { + lexer->pushMode(getMode()); +} + +size_t LexerPushModeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getMode()); + return MurmurHash::finish(hash, 2); +} + +bool LexerPushModeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerPushModeAction&>(other); + return getMode() == lexerAction.getMode(); +} + +std::string LexerPushModeAction::toString() const { + return "pushMode(" + std::to_string(getMode()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h new file mode 100644 index 0000000000..32b706b583 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerPushModeAction.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code pushMode} lexer action by calling + /// <seealso cref="Lexer#pushMode"/> with the assigned mode. + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerPushModeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::PUSH_MODE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code pushMode} action with the specified mode value. </summary> + /// <param name="mode"> The mode value to pass to <seealso cref="Lexer#pushMode"/>. </param> + explicit LexerPushModeAction(int mode); + + /// <summary> + /// Get the lexer mode this action should transition the lexer to. + /// </summary> + /// <returns> The lexer mode for this {@code pushMode} command. </returns> + int getMode() const { return _mode; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#pushMode"/> with the + /// value provided by <seealso cref="#getMode"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _mode; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp new file mode 100644 index 0000000000..72f9de3e1f --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" + +#include "atn/LexerSkipAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; + +const Ref<const LexerSkipAction>& LexerSkipAction::getInstance() { + static const Ref<const LexerSkipAction> instance(new LexerSkipAction()); + return instance; +} + +void LexerSkipAction::execute(Lexer *lexer) const { + lexer->skip(); +} + +size_t LexerSkipAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + return MurmurHash::finish(hash, 1); +} + +bool LexerSkipAction::equals(const LexerAction &other) const { + return this == std::addressof(other); +} + +std::string LexerSkipAction::toString() const { + return "skip"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h new file mode 100644 index 0000000000..afdf4702f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerSkipAction.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerAction.h" +#include "atn/LexerActionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// Implements the {@code skip} lexer action by calling <seealso cref="Lexer#skip"/>. + /// + /// <para>The {@code skip} command does not have any parameters, so this action is + /// implemented as a singleton instance exposed by <seealso cref="#INSTANCE"/>.</para> + /// + /// @author Sam Harwell + /// @since 4.2 + /// </summary> + class ANTLR4CPP_PUBLIC LexerSkipAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::SKIP; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// Provides a singleton instance of this parameterless lexer action. + static const Ref<const LexerSkipAction>& getInstance(); + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#skip"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + /// Constructs the singleton instance of the lexer {@code skip} command. + LexerSkipAction() : LexerAction(LexerActionType::SKIP, false) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp new file mode 100644 index 0000000000..55ccf358ba --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "support/Casts.h" + +#include "atn/LexerTypeAction.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::misc; +using namespace antlrcpp; + +LexerTypeAction::LexerTypeAction(int type) : LexerAction(LexerActionType::TYPE, false), _type(type) {} + +void LexerTypeAction::execute(Lexer *lexer) const { + lexer->setType(getType()); +} + +size_t LexerTypeAction::hashCodeImpl() const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, static_cast<size_t>(getActionType())); + hash = MurmurHash::update(hash, getType()); + return MurmurHash::finish(hash, 2); +} + +bool LexerTypeAction::equals(const LexerAction &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getActionType() != other.getActionType()) { + return false; + } + const auto &lexerAction = downCast<const LexerTypeAction&>(other); + return getType() == lexerAction.getType(); +} + +std::string LexerTypeAction::toString() const { + return "type(" + std::to_string(getType()) + ")"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h new file mode 100644 index 0000000000..1cd7d71fd3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LexerTypeAction.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/LexerActionType.h" +#include "atn/LexerAction.h" + +namespace antlr4 { +namespace atn { + + /// Implements the {@code type} lexer action by calling <seealso cref="Lexer#setType"/> + /// with the assigned type. + class ANTLR4CPP_PUBLIC LexerTypeAction final : public LexerAction { + public: + static bool is(const LexerAction &lexerAction) { return lexerAction.getActionType() == LexerActionType::TYPE; } + + static bool is(const LexerAction *lexerAction) { return lexerAction != nullptr && is(*lexerAction); } + + /// <summary> + /// Constructs a new {@code type} action with the specified token type value. </summary> + /// <param name="type"> The type to assign to the token using <seealso cref="Lexer#setType"/>. </param> + explicit LexerTypeAction(int type); + + /// <summary> + /// Gets the type to assign to a token created by the lexer. </summary> + /// <returns> The type to assign to a token created by the lexer. </returns> + int getType() const { return _type; } + + /// <summary> + /// {@inheritDoc} + /// + /// <para>This action is implemented by calling <seealso cref="Lexer#setType"/> with the + /// value provided by <seealso cref="#getType"/>.</para> + /// </summary> + void execute(Lexer *lexer) const override; + + bool equals(const LexerAction &obj) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + + private: + const int _type; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp new file mode 100644 index 0000000000..aa3f9124c7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LookaheadEventInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +LookaheadEventInfo::LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, + TokenStream *input, size_t startIndex, size_t stopIndex, bool fullCtx) + : DecisionEventInfo(decision, configs, input, startIndex, stopIndex, fullCtx) { + + this->predictedAlt = predictedAlt; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h new file mode 100644 index 0000000000..f5fc24fde2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LookaheadEventInfo.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// This class represents profiling event information for tracking the lookahead + /// depth required in order to make a prediction. + class ANTLR4CPP_PUBLIC LookaheadEventInfo : public DecisionEventInfo { + public: + /// The alternative chosen by adaptivePredict(), not necessarily + /// the outermost alt shown for a rule; left-recursive rules have + /// user-level alts that differ from the rewritten rule with a (...) block + /// and a (..)* loop. + size_t predictedAlt = 0; + + /// <summary> + /// Constructs a new instance of the <seealso cref="LookaheadEventInfo"/> class with + /// the specified detailed lookahead information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="configs"> The final configuration set containing the necessary + /// information to determine the result of a prediction, or {@code null} if + /// the final configuration set is not available </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the prediction was finally made </param> + /// <param name="fullCtx"> {@code true} if the current lookahead is part of an LL + /// prediction; otherwise, {@code false} if the current lookahead is part of + /// an SLL prediction </param> + LookaheadEventInfo(size_t decision, ATNConfigSet *configs, size_t predictedAlt, TokenStream *input, size_t startIndex, + size_t stopIndex, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h new file mode 100644 index 0000000000..2616b1c4b8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/LoopEndState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// Mark the end of a * or + loop. + class ANTLR4CPP_PUBLIC LoopEndState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::LOOP_END; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + ATNState *loopBackState = nullptr; + + LoopEndState() : ATNState(ATNStateType::LOOP_END) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp new file mode 100644 index 0000000000..ba796d7188 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.cpp @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/NotSetTransition.h" +#include "atn/ATNState.h" +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::atn; + +NotSetTransition::NotSetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::NOT_SET, target, std::move(set)) {} + +bool NotSetTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol + && !SetTransition::matches(symbol, minVocabSymbol, maxVocabSymbol); +} + +std::string NotSetTransition::toString() const { + return "NOT_SET " + Transition::toString() + " { " + SetTransition::toString() + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h new file mode 100644 index 0000000000..ef937a60fe --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/NotSetTransition.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/SetTransition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC NotSetTransition final : public SetTransition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::NOT_SET; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + NotSetTransition(ATNState *target, misc::IntervalSet set); + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp new file mode 100644 index 0000000000..48655424d8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.cpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/OrderedATNConfigSet.h" + +using namespace antlr4::atn; + +size_t OrderedATNConfigSet::hashCode(const ATNConfig &atnConfig) const { + return atnConfig.hashCode(); +} + +bool OrderedATNConfigSet::equals(const ATNConfig &lhs, const ATNConfig &rhs) const { + return lhs == rhs; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h new file mode 100644 index 0000000000..18bf6bcb21 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/OrderedATNConfigSet.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC OrderedATNConfigSet final : public ATNConfigSet { + public: + OrderedATNConfigSet() = default; + + private: + size_t hashCode(const ATNConfig &atnConfig) const override; + + bool equals(const ATNConfig &lhs, const ATNConfig &rhs) const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp new file mode 100644 index 0000000000..95a89ac855 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ProfilingATNSimulator.h" +#include "dfa/DFA.h" + +#include "atn/ParseInfo.h" + +using namespace antlr4::atn; + +ParseInfo::ParseInfo(ProfilingATNSimulator *atnSimulator) : _atnSimulator(atnSimulator) { +} + +ParseInfo::~ParseInfo() { +} + +std::vector<DecisionInfo> ParseInfo::getDecisionInfo() { + return _atnSimulator->getDecisionInfo(); +} + +std::vector<size_t> ParseInfo::getLLDecisions() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + std::vector<size_t> LL; + for (size_t i = 0; i < decisions.size(); ++i) { + long long fallBack = decisions[i].LL_Fallback; + if (fallBack > 0) { + LL.push_back(i); + } + } + return LL; +} + +long long ParseInfo::getTotalTimeInPrediction() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long t = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + t += decisions[i].timeInPrediction; + } + return t; +} + +long long ParseInfo::getTotalSLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalLLLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); i++) { + k += decisions[i].LL_TotalLook; + } + return k; +} + +long long ParseInfo::getTotalSLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalLLATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +long long ParseInfo::getTotalATNLookaheadOps() { + std::vector<DecisionInfo> decisions = _atnSimulator->getDecisionInfo(); + long long k = 0; + for (size_t i = 0; i < decisions.size(); ++i) { + k += decisions[i].SLL_ATNTransitions; + k += decisions[i].LL_ATNTransitions; + } + return k; +} + +size_t ParseInfo::getDFASize() { + size_t n = 0; + std::vector<dfa::DFA> &decisionToDFA = _atnSimulator->decisionToDFA; + for (size_t i = 0; i < decisionToDFA.size(); ++i) { + n += getDFASize(i); + } + return n; +} + +size_t ParseInfo::getDFASize(size_t decision) { + dfa::DFA &decisionToDFA = _atnSimulator->decisionToDFA[decision]; + return decisionToDFA.states.size(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h new file mode 100644 index 0000000000..7ced7de433 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParseInfo.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ProfilingATNSimulator; + + /// This class provides access to specific and aggregate statistics gathered + /// during profiling of a parser. + class ANTLR4CPP_PUBLIC ParseInfo { + public: + ParseInfo(ProfilingATNSimulator *atnSimulator); + ParseInfo(ParseInfo const&) = default; + virtual ~ParseInfo(); + + ParseInfo& operator=(ParseInfo const&) = default; + + /// <summary> + /// Gets an array of <seealso cref="DecisionInfo"/> instances containing the profiling + /// information gathered for each decision in the ATN. + /// </summary> + /// <returns> An array of <seealso cref="DecisionInfo"/> instances, indexed by decision + /// number. </returns> + virtual std::vector<DecisionInfo> getDecisionInfo(); + + /// <summary> + /// Gets the decision numbers for decisions that required one or more + /// full-context predictions during parsing. These are decisions for which + /// <seealso cref="DecisionInfo#LL_Fallback"/> is non-zero. + /// </summary> + /// <returns> A list of decision numbers which required one or more + /// full-context predictions during parsing. </returns> + virtual std::vector<size_t> getLLDecisions(); + + /// <summary> + /// Gets the total time spent during prediction across all decisions made + /// during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#timeInPrediction"/> for all decisions. + /// </summary> + virtual long long getTotalTimeInPrediction(); + + /// <summary> + /// Gets the total number of SLL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#SLL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalSLLLookaheadOps(); + + /// <summary> + /// Gets the total number of LL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// <seealso cref="DecisionInfo#LL_TotalLook"/> for all decisions. + /// </summary> + virtual long long getTotalLLLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalSLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for LL prediction + /// across all decisions made during parsing. + /// </summary> + virtual long long getTotalLLATNLookaheadOps(); + + /// <summary> + /// Gets the total number of ATN lookahead operations for SLL and LL + /// prediction across all decisions made during parsing. + /// + /// <para> + /// This value is the sum of <seealso cref="#getTotalSLLATNLookaheadOps"/> and + /// <seealso cref="#getTotalLLATNLookaheadOps"/>.</para> + /// </summary> + virtual long long getTotalATNLookaheadOps(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for all + /// decisions in the ATN. + /// </summary> + virtual size_t getDFASize(); + + /// <summary> + /// Gets the total number of DFA states stored in the DFA cache for a + /// particular decision. + /// </summary> + virtual size_t getDFASize(size_t decision); + + protected: + const ProfilingATNSimulator *_atnSimulator; // non-owning, we are created by this simulator. + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp new file mode 100644 index 0000000000..ad1da03570 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.cpp @@ -0,0 +1,1387 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "NoViableAltException.h" +#include "atn/DecisionState.h" +#include "ParserRuleContext.h" +#include "misc/IntervalSet.h" +#include "Parser.h" +#include "CommonTokenStream.h" +#include "atn/NotSetTransition.h" +#include "atn/AtomTransition.h" +#include "atn/RuleTransition.h" +#include "atn/PredicateTransition.h" +#include "atn/PrecedencePredicateTransition.h" +#include "atn/SingletonPredictionContext.h" +#include "atn/ActionTransition.h" +#include "atn/EpsilonTransition.h" +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "internal/Synchronization.h" + +#include "atn/StarLoopEntryState.h" +#include "atn/BlockStartState.h" +#include "atn/BlockEndState.h" + +#include "misc/Interval.h" +#include "ANTLRErrorListener.h" + +#include "Vocabulary.h" +#include "support/Arrays.h" +#include "support/Casts.h" + +#include "atn/ParserATNSimulator.h" + +#define DEBUG_ATN 0 +#define DEBUG_LIST_ATN_DECISIONS 0 +#define DEBUG_DFA 0 +#define RETRY_DEBUG 0 + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::internal; +using namespace antlrcpp; + +const bool ParserATNSimulator::TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = ParserATNSimulator::getLrLoopSetting(); + +ParserATNSimulator::ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(nullptr, atn, decisionToDFA, sharedContextCache) { +} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache) +: ParserATNSimulator(parser, atn, decisionToDFA, sharedContextCache, ParserATNSimulatorOptions()) {} + +ParserATNSimulator::ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache, + const ParserATNSimulatorOptions &options) +: ATNSimulator(atn, sharedContextCache), decisionToDFA(decisionToDFA), parser(parser), + mergeCache(options.getPredictionContextMergeCacheOptions()) { + InitializeInstanceFields(); +} + +void ParserATNSimulator::reset() { +} + +void ParserATNSimulator::clearDFA() { + int size = (int)decisionToDFA.size(); + decisionToDFA.clear(); + for (int d = 0; d < size; ++d) { + decisionToDFA.push_back(dfa::DFA(atn.getDecisionState(d), d)); + } +} + +size_t ParserATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "adaptivePredict decision " << decision << " exec LA(1)==" << getLookaheadName(input) << " line " + << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + _input = input; + _startIndex = input->index(); + _outerContext = outerContext; + dfa::DFA &dfa = decisionToDFA[decision]; + _dfa = &dfa; + + ssize_t m = input->mark(); + size_t index = _startIndex; + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + auto onExit = finally([this, input, index, m] { + if (mergeCache.getOptions().getClearEveryN() != 0) { + if (++_mergeCacheCounter == mergeCache.getOptions().getClearEveryN()) { + mergeCache.clear(); + _mergeCacheCounter = 0; + } + } + _dfa = nullptr; + input->seek(index); + input->release(m); + }); + + dfa::DFAState *s0; + { + SharedLock<SharedMutex> stateLock(atn._stateMutex); + if (dfa.isPrecedenceDfa()) { + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + s0 = dfa.getPrecedenceStartState(parser->getPrecedence()); + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.s0; + } + } + + if (s0 == nullptr) { + auto s0_closure = computeStartState(dfa.atnStartState, &ParserRuleContext::EMPTY, false); + std::unique_ptr<dfa::DFAState> newState; + std::unique_ptr<dfa::DFAState> oldState; + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + dfa::DFAState* ds0 = dfa.s0; + if (dfa.isPrecedenceDfa()) { + /* If this is a precedence DFA, we use applyPrecedenceFilter + * to convert the computed start state to a precedence start + * state. We then use DFA.setPrecedenceStartState to set the + * appropriate start state for the precedence level rather + * than simply setting DFA.s0. + */ + ds0->configs = std::move(s0_closure); // not used for prediction but useful to know start configs anyway + newState = std::make_unique<dfa::DFAState>(applyPrecedenceFilter(ds0->configs.get())); + s0 = addDFAState(dfa, newState.get()); + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + dfa.setPrecedenceStartState(parser->getPrecedence(), s0); + } else { + newState = std::make_unique<dfa::DFAState>(std::move(s0_closure)); + s0 = addDFAState(dfa, newState.get()); + if (ds0 != s0) { + oldState.reset(ds0); + dfa.s0 = s0; + } + } + if (s0 == newState.get()) { + newState.release(); + } + } + + // We can start with an existing DFA. + size_t alt = execATN(dfa, s0, input, index, outerContext != nullptr ? outerContext : &ParserRuleContext::EMPTY); + + return alt; +} + +size_t ParserATNSimulator::execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext) { + +#if DEBUG_ATN == 1 || DEBUG_LIST_ATN_DECISIONS == 1 + std::cout << "execATN decision " << dfa.decision << " exec LA(1)==" << getLookaheadName(input) << + " line " << input->LT(1)->getLine() << ":" << input->LT(1)->getCharPositionInLine() << std::endl; +#endif + + dfa::DFAState *previousD = s0; + +#if DEBUG_ATN == 1 + std::cout << "s0 = " << s0 << std::endl; +#endif + + size_t t = input->LA(1); + + while (true) { // while more work + dfa::DFAState *D = getExistingTargetState(previousD, t); + if (D == nullptr) { + D = computeTargetState(dfa, previousD, t); + } + + if (D == ERROR.get()) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previousD->configs.get(), startIndex, false); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD->configs.get(), outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + + throw e; + } + + if (D->requiresFullContext && _mode != PredictionMode::SLL) { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + BitSet conflictingAlts; + if (D->predicates.size() != 0) { +#if DEBUG_ATN == 1 + std::cout << "DFA state has preds in DFA sim LL failover" << std::endl; +#endif + + size_t conflictIndex = input->index(); + if (conflictIndex != startIndex) { + input->seek(startIndex); + } + + conflictingAlts = evalSemanticContext(D->predicates, outerContext, true); + if (conflictingAlts.count() == 1) { +#if DEBUG_ATN == 1 + std::cout << "Full LL avoided" << std::endl; +#endif + + return conflictingAlts.nextSetBit(0); + } + + if (conflictIndex != startIndex) { + // restore the index so reporting the fallback to full + // context occurs with the index at the correct spot + input->seek(conflictIndex); + } + } + +#if DEBUG_DFA == 1 + std::cout << "ctx sensitive state " << outerContext << " in " << D << std::endl; +#endif + + bool fullCtx = true; + std::unique_ptr<ATNConfigSet> s0_closure = computeStartState(dfa.atnStartState, outerContext, fullCtx); + reportAttemptingFullContext(dfa, conflictingAlts, D->configs.get(), startIndex, input->index()); + size_t alt = execATNWithFullContext(dfa, D, s0_closure.get(), input, startIndex, outerContext); + return alt; + } + + if (D->isAcceptState) { + if (D->predicates.empty()) { + return D->prediction; + } + + size_t stopIndex = input->index(); + input->seek(startIndex); + BitSet alts = evalSemanticContext(D->predicates, outerContext, true); + switch (alts.count()) { + case 0: + throw noViableAlt(input, outerContext, D->configs.get(), startIndex, false); + + case 1: + return alts.nextSetBit(0); + + default: + // report ambiguity after predicate evaluation to make sure the correct + // set of ambig alts is reported. + reportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D->configs.get()); + return alts.nextSetBit(0); + } + } + + previousD = D; + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } +} + +dfa::DFAState *ParserATNSimulator::getExistingTargetState(dfa::DFAState *previousD, size_t t) { + dfa::DFAState* retval; + SharedLock<SharedMutex> edgeLock(atn._edgeMutex); + auto iterator = previousD->edges.find(t); + retval = (iterator == previousD->edges.end()) ? nullptr : iterator->second; + return retval; +} + +dfa::DFAState *ParserATNSimulator::computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) { + std::unique_ptr<ATNConfigSet> reach = computeReachSet(previousD->configs.get(), t, false); + if (reach == nullptr) { + addDFAEdge(dfa, previousD, t, ERROR.get()); + return ERROR.get(); + } + + // create new target state; we'll add to DFA after it's complete + dfa::DFAState *D = new dfa::DFAState(std::move(reach)); /* mem-check: managed by the DFA or deleted below, "reach" is no longer valid now. */ + size_t predictedAlt = getUniqueAlt(D->configs.get()); + + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D->isAcceptState = true; + D->configs->uniqueAlt = predictedAlt; + D->prediction = predictedAlt; + } else if (PredictionModeClass::hasSLLConflictTerminatingPrediction(_mode, D->configs.get())) { + // MORE THAN ONE VIABLE ALTERNATIVE + D->configs->conflictingAlts = getConflictingAlts(D->configs.get()); + D->requiresFullContext = true; + // in SLL-only mode, we will stop at this state and return the minimum alt + D->isAcceptState = true; + D->prediction = D->configs->conflictingAlts.nextSetBit(0); + } + + if (D->isAcceptState && D->configs->hasSemanticContext) { + predicateDFAState(D, atn.getDecisionState(dfa.decision)); + if (D->predicates.size() != 0) { + D->prediction = ATN::INVALID_ALT_NUMBER; + } + } + + // all adds to dfa are done after we've created full D state + dfa::DFAState *state = addDFAEdge(dfa, previousD, t, D); + if (state != D) { + delete D; // If the new state exists already we don't need it and use the existing one instead. + } + return state; +} + +void ParserATNSimulator::predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + size_t nalts = decisionState->transitions.size(); + + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + BitSet altsToCollectPredsFrom = getConflictingAltsOrUniqueAlt(dfaState->configs.get()); + std::vector<Ref<const SemanticContext>> altToPred = getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState->configs.get(), nalts); + if (!altToPred.empty()) { + dfaState->predicates = getPredicatePredictions(altsToCollectPredsFrom, altToPred); + dfaState->prediction = ATN::INVALID_ALT_NUMBER; // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState->prediction = altsToCollectPredsFrom.nextSetBit(0); + } +} + +size_t ParserATNSimulator::execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext) { + + bool fullCtx = true; + bool foundExactAmbig = false; + + std::unique_ptr<ATNConfigSet> reach; + ATNConfigSet *previous = s0; + input->seek(startIndex); + size_t t = input->LA(1); + size_t predictedAlt; + + while (true) { + reach = computeReachSet(previous, t, fullCtx); + if (reach == nullptr) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + NoViableAltException e = noViableAlt(input, outerContext, previous, startIndex, previous != s0); + input->seek(startIndex); + size_t alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext); + if (alt != ATN::INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + if (previous != s0) // Don't delete the start set. + delete previous; + previous = nullptr; + + std::vector<BitSet> altSubSets = PredictionModeClass::getConflictingAltSubsets(reach.get()); + reach->uniqueAlt = getUniqueAlt(reach.get()); + // unique prediction? + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + predictedAlt = reach->uniqueAlt; + break; + } + if (_mode != PredictionMode::LL_EXACT_AMBIG_DETECTION) { + predictedAlt = PredictionModeClass::resolvesToJustOneViableAlt(altSubSets); + if (predictedAlt != ATN::INVALID_ALT_NUMBER) { + break; + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if (PredictionModeClass::allSubsetsConflict(altSubSets) && PredictionModeClass::allSubsetsEqual(altSubSets)) { + foundExactAmbig = true; + predictedAlt = PredictionModeClass::getSingleViableAlt(altSubSets); + break; + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + previous = reach.release(); + + if (t != Token::EOF) { + input->consume(); + t = input->LA(1); + } + } + + if (previous != s0) // Don't delete the start set + delete previous; + + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if (reach->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + reportContextSensitivity(dfa, predictedAlt, reach.get(), startIndex, input->index()); + return predictedAlt; + } + + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + /* + In non-exact ambiguity detection mode, we might actually be able to + detect an exact ambiguity, but I'm not going to spend the cycles + needed to check. We only emit ambiguity warnings in exact ambiguity + mode. + + For example, we might know that we have conflicting configurations. + But, that does not mean that there is no way forward without a + conflict. It's possible to have nonconflicting alt subsets as in: + + LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + from + + [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + + In this case, (17,1,[5 $]) indicates there is some next sequence that + would resolve this without conflict to alternative 1. Any other viable + next sequence, however, is associated with a conflict. We stop + looking for input because no amount of further lookahead will alter + the fact that we should predict alternative 1. We just can't say for + sure that there is an ambiguity without looking further. + */ + reportAmbiguity(dfa, D, startIndex, input->index(), foundExactAmbig, reach->getAlts(), reach.get()); + + return predictedAlt; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeReachSet(ATNConfigSet *closure_, size_t t, bool fullCtx) { + + std::unique_ptr<ATNConfigSet> intermediate(new ATNConfigSet(fullCtx)); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ + std::vector<Ref<ATNConfig>> skippedStopStates; + + // First figure out where we can reach on input t + for (const auto &c : closure_->configs) { + if (RuleStopState::is(c->state)) { + assert(c->context->isEmpty()); + + if (fullCtx || t == Token::EOF) { + skippedStopStates.push_back(c); + } + + continue; + } + + size_t n = c->state->transitions.size(); + for (size_t ti = 0; ti < n; ti++) { // for each transition + const Transition *trans = c->state->transitions[ti].get(); + ATNState *target = getReachableTarget(trans, (int)t); + if (target != nullptr) { + intermediate->add(std::make_shared<ATNConfig>(*c, target), &mergeCache); + } + } + } + + // Now figure out where the reach operation can take us... + std::unique_ptr<ATNConfigSet> reach; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates, or when the current symbol is EOF. + */ + if (skippedStopStates.empty() && t != Token::EOF) { + if (intermediate->size() == 1) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } else if (getUniqueAlt(intermediate.get()) != ATN::INVALID_ALT_NUMBER) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = std::move(intermediate); + } + } + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == nullptr) { + reach.reset(new ATNConfigSet(fullCtx)); + ATNConfig::Set closureBusy; + + bool treatEofAsEpsilon = t == Token::EOF; + for (const auto &c : intermediate->configs) { + closure(c, reach.get(), closureBusy, false, fullCtx, treatEofAsEpsilon); + } + } + + if (t == IntStream::EOF) { + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * When reach==intermediate, no closure operation was performed. In + * this case, removeAllConfigsNotInRuleStopState needs to check for + * reachable rule stop states as well as configurations already in + * a rule stop state. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + ATNConfigSet *temp = removeAllConfigsNotInRuleStopState(reach.get(), *reach == *intermediate); + if (temp != reach.get()) + reach.reset(temp); // We got a new set, so use that. + } + + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates.size() > 0 && (!fullCtx || !PredictionModeClass::hasConfigInRuleStopState(reach.get()))) { + assert(!skippedStopStates.empty()); + + for (const auto &c : skippedStopStates) { + reach->add(c, &mergeCache); + } + } + + if (reach->isEmpty()) { + return nullptr; + } + return reach; +} + +ATNConfigSet* ParserATNSimulator::removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, + bool lookToEndOfRule) { + if (PredictionModeClass::allConfigsInRuleStopStates(configs)) { + return configs; + } + + ATNConfigSet *result = new ATNConfigSet(configs->fullCtx); /* mem-check: released by caller */ + + for (const auto &config : configs->configs) { + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + result->add(config, &mergeCache); + continue; + } + + if (lookToEndOfRule && config->state->epsilonOnlyTransitions) { + misc::IntervalSet nextTokens = atn.nextTokens(config->state); + if (nextTokens.contains(Token::EPSILON)) { + ATNState *endOfRuleState = atn.ruleToStopState[config->state->ruleIndex]; + result->add(std::make_shared<ATNConfig>(*config, endOfRuleState), &mergeCache); + } + } + } + + return result; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx) { + // always at least the implicit call to start rule + Ref<const PredictionContext> initialContext = PredictionContext::fromRuleContext(atn, ctx); + std::unique_ptr<ATNConfigSet> configs(new ATNConfigSet(fullCtx)); + + for (size_t i = 0; i < p->transitions.size(); i++) { + ATNState *target = p->transitions[i]->target; + Ref<ATNConfig> c = std::make_shared<ATNConfig>(target, (int)i + 1, initialContext); + ATNConfig::Set closureBusy; + closure(c, configs.get(), closureBusy, true, fullCtx, false); + } + + return configs; +} + +std::unique_ptr<ATNConfigSet> ParserATNSimulator::applyPrecedenceFilter(ATNConfigSet *configs) { + std::map<size_t, Ref<const PredictionContext>> statesFromAlt1; + std::unique_ptr<ATNConfigSet> configSet(new ATNConfigSet(configs->fullCtx)); + for (const auto &config : configs->configs) { + // handle alt 1 first + if (config->alt != 1) { + continue; + } + + Ref<const SemanticContext> updatedContext = config->semanticContext->evalPrecedence(parser, _outerContext); + if (updatedContext == nullptr) { + // the configuration was eliminated + continue; + } + + statesFromAlt1[config->state->stateNumber] = config->context; + if (updatedContext != config->semanticContext) { + configSet->add(std::make_shared<ATNConfig>(*config, updatedContext), &mergeCache); + } + else { + configSet->add(config, &mergeCache); + } + } + + for (const auto &config : configs->configs) { + if (config->alt == 1) { + // already handled + continue; + } + + if (!config->isPrecedenceFilterSuppressed()) { + /* In the future, this elimination step could be updated to also + * filter the prediction context for alternatives predicting alt>1 + * (basically a graph subtraction algorithm). + */ + auto iterator = statesFromAlt1.find(config->state->stateNumber); + if (iterator != statesFromAlt1.end() && *iterator->second == *config->context) { + // eliminated + continue; + } + } + + configSet->add(config, &mergeCache); + } + + return configSet; +} + +atn::ATNState* ParserATNSimulator::getReachableTarget(const Transition *trans, size_t ttype) { + if (trans->matches(ttype, 0, atn.maxTokenType)) { + return trans->target; + } + + return nullptr; +} + +// Note that caller must memory manage the returned value from this function +std::vector<Ref<const SemanticContext>> ParserATNSimulator::getPredsForAmbigAlts(const BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts) { + // REACH=[1|1|[]|0:0, 1|2|[]|0:1] + /* altToPred starts as an array of all null contexts. The entry at index i + * corresponds to alternative i. altToPred[i] may have one of three values: + * 1. null: no ATNConfig c is found such that c.alt==i + * 2. SemanticContext.NONE: At least one ATNConfig c exists such that + * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + * alt i has at least one un-predicated config. + * 3. Non-NONE Semantic Context: There exists at least one, and for all + * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + * + * From this, it is clear that NONE||anything==NONE. + */ + std::vector<Ref<const SemanticContext>> altToPred(nalts + 1); + + for (const auto &c : configs->configs) { + if (ambigAlts.test(c->alt)) { + altToPred[c->alt] = SemanticContext::Or(altToPred[c->alt], c->semanticContext); + } + } + + size_t nPredAlts = 0; + for (size_t i = 1; i <= nalts; i++) { + if (altToPred[i] == nullptr) { + altToPred[i] = SemanticContext::Empty::Instance; + } else if (altToPred[i] != SemanticContext::Empty::Instance) { + nPredAlts++; + } + } + + // nonambig alts are null in altToPred + if (nPredAlts == 0) { + altToPred.clear(); + } +#if DEBUG_ATN == 1 + std::cout << "getPredsForAmbigAlts result " << Arrays::toString(altToPred) << std::endl; +#endif + + return altToPred; +} + +std::vector<dfa::DFAState::PredPrediction> ParserATNSimulator::getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + const std::vector<Ref<const SemanticContext>> &altToPred) { + bool containsPredicate = std::find_if(altToPred.begin(), altToPred.end(), [](const Ref<const SemanticContext> &context) { + return context != SemanticContext::Empty::Instance; + }) != altToPred.end(); + std::vector<dfa::DFAState::PredPrediction> pairs; + if (containsPredicate) { + for (size_t i = 1; i < altToPred.size(); i++) { + const auto &pred = altToPred[i]; + assert(pred != nullptr); // unpredicted is indicated by SemanticContext.NONE + if (ambigAlts.test(i)) { + pairs.emplace_back(pred, static_cast<int>(i)); + } + } + } + return pairs; +} + +size_t ParserATNSimulator::getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext) +{ + std::pair<ATNConfigSet *, ATNConfigSet *> sets = splitAccordingToSemanticValidity(configs, outerContext); + std::unique_ptr<ATNConfigSet> semValidConfigs(sets.first); + std::unique_ptr<ATNConfigSet> semInvalidConfigs(sets.second); + size_t alt = getAltThatFinishedDecisionEntryRule(semValidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // semantically/syntactically viable path exists + return alt; + } + // Is there a syntactically valid path with a failed pred? + if (!semInvalidConfigs->configs.empty()) { + alt = getAltThatFinishedDecisionEntryRule(semInvalidConfigs.get()); + if (alt != ATN::INVALID_ALT_NUMBER) { // syntactically viable path exists + return alt; + } + } + return ATN::INVALID_ALT_NUMBER; +} + +size_t ParserATNSimulator::getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs) { + misc::IntervalSet alts; + for (const auto &c : configs->configs) { + if (c->getOuterContextDepth() > 0 || (c->state != nullptr && c->state->getStateType() == ATNStateType::RULE_STOP && c->context->hasEmptyPath())) { + alts.add(c->alt); + } + } + if (alts.size() == 0) { + return ATN::INVALID_ALT_NUMBER; + } + return alts.getMinElement(); +} + +std::pair<ATNConfigSet *, ATNConfigSet *> ParserATNSimulator::splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext) { + + // mem-check: both pointers must be freed by the caller. + ATNConfigSet *succeeded(new ATNConfigSet(configs->fullCtx)); + ATNConfigSet *failed(new ATNConfigSet(configs->fullCtx)); + for (const auto &c : configs->configs) { + if (c->semanticContext != SemanticContext::Empty::Instance) { + bool predicateEvaluationResult = evalSemanticContext(c->semanticContext, outerContext, c->alt, configs->fullCtx); + if (predicateEvaluationResult) { + succeeded->add(c); + } else { + failed->add(c); + } + } else { + succeeded->add(c); + } + } + return { succeeded, failed }; +} + +BitSet ParserATNSimulator::evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions, + ParserRuleContext *outerContext, bool complete) { + BitSet predictions; + for (const auto &prediction : predPredictions) { + if (prediction.pred == SemanticContext::Empty::Instance) { + predictions.set(prediction.alt); + if (!complete) { + break; + } + continue; + } + + bool fullCtx = false; // in dfa + bool predicateEvaluationResult = evalSemanticContext(prediction.pred, outerContext, prediction.alt, fullCtx); +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "eval pred " << prediction.toString() << " = " << predicateEvaluationResult << std::endl; +#endif + + if (predicateEvaluationResult) { +#if DEBUG_ATN == 1 || DEBUG_DFA == 1 + std::cout << "PREDICT " << prediction.alt << std::endl; +#endif + + predictions.set(prediction.alt); + if (!complete) { + break; + } + } + } + + return predictions; +} + +bool ParserATNSimulator::evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t /*alt*/, bool /*fullCtx*/) { + return pred->eval(parser, parserCallStack); +} + +void ParserATNSimulator::closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon) { + const int initialDepth = 0; + closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, initialDepth, treatEofAsEpsilon); + + assert(!fullCtx || !configs->dipsIntoOuterContext); +} + +void ParserATNSimulator::closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, + ATNConfig::Set &closureBusy, bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + +#if DEBUG_ATN == 1 + std::cout << "closure(" << config->toString(true) << ")" << std::endl; +#endif + + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if (!config->context->isEmpty()) { + for (size_t i = 0; i < config->context->size(); i++) { + if (config->context->getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE) { + if (fullCtx) { + configs->add(std::make_shared<ATNConfig>(*config, config->state, PredictionContext::EMPTY), &mergeCache); + continue; + } else { + // we have no context info, just chase follow links (if greedy) +#if DEBUG_ATN == 1 + std::cout << "FALLING off rule " << getRuleName(config->state->ruleIndex) << std::endl; +#endif + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); + } + continue; + } + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + Ref<const PredictionContext> newContext = config->context->getParent(i); // "pop" return state + Ref<ATNConfig> c = std::make_shared<ATNConfig>(returnState, config->alt, newContext, config->semanticContext); + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + // + // This assignment also propagates the + // isPrecedenceFilterSuppressed() value to the new + // configuration. + c->reachesIntoOuterContext = config->reachesIntoOuterContext; + assert(depth > INT_MIN); + + closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon); + } + return; + } else if (fullCtx) { + // reached end of start rule + configs->add(config, &mergeCache); + return; + } else { + // else if we have no context info, just chase follow links (if greedy) + } + } + + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon); +} + +void ParserATNSimulator::closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon) { + ATNState *p = config->state; + // optimization + if (!p->epsilonOnlyTransitions) { + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. + configs->add(config, &mergeCache); + } + + for (size_t i = 0; i < p->transitions.size(); i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config.get())) + continue; + + const Transition *t = p->transitions[i].get(); + bool continueCollecting = !(t != nullptr && t->getTransitionType() == TransitionType::ACTION) && collectPredicates; + Ref<ATNConfig> c = getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon); + if (c != nullptr) { + int newDepth = depth; + if (config->state != nullptr && config->state->getStateType() == ATNStateType::RULE_STOP) { + assert(!fullCtx); + + // target fell off end of rule; mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if this is > 0. + + if (closureBusy.count(c) > 0) { + // avoid infinite recursion for right-recursive rules + continue; + } + closureBusy.insert(c); + + if (_dfa != nullptr && _dfa->isPrecedenceDfa()) { + size_t outermostPrecedenceReturn = downCast<const EpsilonTransition *>(t)->outermostPrecedenceReturn(); + if (outermostPrecedenceReturn == _dfa->atnStartState->ruleIndex) { + c->setPrecedenceFilterSuppressed(true); + } + } + + c->reachesIntoOuterContext++; + + if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + configs->dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method + assert(newDepth > INT_MIN); + + newDepth--; +#if DEBUG_DFA == 1 + std::cout << "dips into outer ctx: " << c << std::endl; +#endif + + } else if (!t->isEpsilon()) { + // avoid infinite recursion for EOF* and EOF+ + if (closureBusy.count(c) == 0) { + closureBusy.insert(c); + } else { + continue; + } + } + + if (t != nullptr && t->getTransitionType() == TransitionType::RULE) { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if (newDepth >= 0) { + newDepth++; + } + } + + closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon); + } + } +} + +bool ParserATNSimulator::canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) + return false; + + ATNState *p = config->state; + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if (p->getStateType() != ATNStateType::STAR_LOOP_ENTRY || + !((StarLoopEntryState *)p)->isPrecedenceDecision || // Are we the special loop entry/exit state? + config->context->isEmpty() || // If SLL wildcard + config->context->hasEmptyPath()) + { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + size_t numCtxs = config->context->size(); + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + ATNState *returnState = atn.states[config->context->getReturnState(i)]; + if (returnState->ruleIndex != p->ruleIndex) + return false; + } + + BlockStartState *decisionStartState = (BlockStartState *)p->transitions[0]->target; + size_t blockEndStateNum = decisionStartState->endState->stateNumber; + BlockEndState *blockEndState = (BlockEndState *)atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (size_t i = 0; i < numCtxs; i++) { // for each stack context + size_t returnStateNumber = config->context->getReturnState(i); + ATNState *returnState = atn.states[returnStateNumber]; + // All states must have single outgoing epsilon edge. + if (returnState->transitions.size() != 1 || !returnState->transitions[0]->isEpsilon()) + { + return false; + } + + // Look for prefix op case like 'not expr', (' type ')' expr + ATNState *returnStateTarget = returnState->transitions[0]->target; + if (returnState->getStateType() == ATNStateType::BLOCK_END && returnStateTarget == p) { + continue; + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget->getStateType() == ATNStateType::BLOCK_END && + returnStateTarget->transitions.size() == 1 && + returnStateTarget->transitions[0]->isEpsilon() && + returnStateTarget->transitions[0]->target == p) + { + continue; + } + + // Anything else ain't conforming. + return false; + } + + return true; +} + +std::string ParserATNSimulator::getRuleName(size_t index) { + if (parser != nullptr) { + return parser->getRuleNames()[index]; + } + return "<rule " + std::to_string(index) + ">"; +} + +Ref<ATNConfig> ParserATNSimulator::getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon) { + switch (t->getTransitionType()) { + case TransitionType::RULE: + return ruleTransition(config, static_cast<const RuleTransition*>(t)); + + case TransitionType::PRECEDENCE: + return precedenceTransition(config, static_cast<const PrecedencePredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case TransitionType::PREDICATE: + return predTransition(config, static_cast<const PredicateTransition*>(t), collectPredicates, inContext, fullCtx); + + case TransitionType::ACTION: + return actionTransition(config, static_cast<const ActionTransition*>(t)); + + case TransitionType::EPSILON: + return std::make_shared<ATNConfig>(*config, t->target); + + case TransitionType::ATOM: + case TransitionType::RANGE: + case TransitionType::SET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if (treatEofAsEpsilon) { + if (t->matches(Token::EOF, 0, 1)) { + return std::make_shared<ATNConfig>(*config, t->target); + } + } + + return nullptr; + + default: + return nullptr; + } +} + +Ref<ATNConfig> ParserATNSimulator::actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "ACTION edge " << t->ruleIndex << ":" << t->actionIndex << std::endl; +#endif + + return std::make_shared<ATNConfig>(*config, t->target); +} + +Ref<ATNConfig> ParserATNSimulator::precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->getPrecedence() << ">=_p" << ", ctx dependent=true" << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c; + if (collectPredicates && inContext) { + const auto &predicate = pt->getPredicate(); + + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(predicate, _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(*config, pt->target); // no pred context + } + } else { + Ref<const SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(*config, pt->target, std::move(newSemCtx)); + } + } else { + c = std::make_shared<ATNConfig>(*config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx) { +#if DEBUG_DFA == 1 + std::cout << "PRED (collectPredicates=" << collectPredicates << ") " << pt->getRuleIndex() << ":" << pt->getPredIndex() << ", ctx dependent=" << pt->isCtxDependent() << std::endl; + if (parser != nullptr) { + std::cout << "context surrounding pred is " << Arrays::listToString(parser->getRuleInvocationStack(), ", ") << std::endl; + } +#endif + + Ref<ATNConfig> c = nullptr; + if (collectPredicates && (!pt->isCtxDependent() || (pt->isCtxDependent() && inContext))) { + const auto &predicate = pt->getPredicate(); + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + size_t currentPosition = _input->index(); + _input->seek(_startIndex); + bool predSucceeds = evalSemanticContext(predicate, _outerContext, config->alt, fullCtx); + _input->seek(currentPosition); + if (predSucceeds) { + c = std::make_shared<ATNConfig>(*config, pt->target); // no pred context + } + } else { + Ref<const SemanticContext> newSemCtx = SemanticContext::And(config->semanticContext, predicate); + c = std::make_shared<ATNConfig>(*config, pt->target, std::move(newSemCtx)); + } + } else { + c = std::make_shared<ATNConfig>(*config, pt->target); + } + +#if DEBUG_DFA == 1 + std::cout << "config from pred transition=" << c << std::endl; +#endif + + return c; +} + +Ref<ATNConfig> ParserATNSimulator::ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t) { +#if DEBUG_DFA == 1 + std::cout << "CALL rule " << getRuleName(t->target->ruleIndex) << ", ctx=" << config->context << std::endl; +#endif + + atn::ATNState *returnState = t->followState; + Ref<const PredictionContext> newContext = SingletonPredictionContext::create(config->context, returnState->stateNumber); + return std::make_shared<ATNConfig>(*config, t->target, newContext); +} + +BitSet ParserATNSimulator::getConflictingAlts(ATNConfigSet *configs) { + std::vector<BitSet> altsets = PredictionModeClass::getConflictingAltSubsets(configs); + return PredictionModeClass::getAlts(altsets); +} + +BitSet ParserATNSimulator::getConflictingAltsOrUniqueAlt(ATNConfigSet *configs) { + BitSet conflictingAlts; + if (configs->uniqueAlt != ATN::INVALID_ALT_NUMBER) { + conflictingAlts.set(configs->uniqueAlt); + } else { + conflictingAlts = configs->conflictingAlts; + } + return conflictingAlts; +} + +std::string ParserATNSimulator::getTokenName(size_t t) { + if (t == Token::EOF) { + return "EOF"; + } + + const dfa::Vocabulary &vocabulary = parser != nullptr ? parser->getVocabulary() : dfa::Vocabulary(); + std::string displayName = vocabulary.getDisplayName(t); + if (displayName == std::to_string(t)) { + return displayName; + } + + return displayName + "<" + std::to_string(t) + ">"; +} + +std::string ParserATNSimulator::getLookaheadName(TokenStream *input) { + return getTokenName(input->LA(1)); +} + +void ParserATNSimulator::dumpDeadEndConfigs(NoViableAltException &nvae) { + std::cerr << "dead end configs: "; + for (const auto &c : nvae.getDeadEndConfigs()->configs) { + std::string trans = "no edges"; + if (c->state->transitions.size() > 0) { + const Transition *t = c->state->transitions[0].get(); + if (t != nullptr && t->getTransitionType() == TransitionType::ATOM) { + const AtomTransition *at = static_cast<const AtomTransition*>(t); + trans = "Atom " + getTokenName(at->_label); + } else if (t != nullptr && t->getTransitionType() == TransitionType::SET) { + const SetTransition *st = static_cast<const SetTransition*>(t); + trans = "Set "; + trans += st->set.toString(); + } else if (t != nullptr && t->getTransitionType() == TransitionType::NOT_SET) { + const SetTransition *st = static_cast<const NotSetTransition*>(t); + trans = "~Set "; + trans += st->set.toString(); + } + } + std::cerr << c->toString(true) + ":" + trans; + } +} + +NoViableAltException ParserATNSimulator::noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs) { + return NoViableAltException(parser, input, input->get(startIndex), input->LT(1), configs, outerContext, deleteConfigs); +} + +size_t ParserATNSimulator::getUniqueAlt(ATNConfigSet *configs) { + size_t alt = ATN::INVALID_ALT_NUMBER; + for (const auto &c : configs->configs) { + if (alt == ATN::INVALID_ALT_NUMBER) { + alt = c->alt; // found first alt + } else if (c->alt != alt) { + return ATN::INVALID_ALT_NUMBER; + } + } + return alt; +} + +dfa::DFAState *ParserATNSimulator::addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to) { +#if DEBUG_DFA == 1 + std::cout << "EDGE " << from << " -> " << to << " upon " << getTokenName(t) << std::endl; +#endif + + if (to == nullptr) { + return nullptr; + } + + { + UniqueLock<SharedMutex> stateLock(atn._stateMutex); + to = addDFAState(dfa, to); // used existing if possible not incoming + } + if (from == nullptr || t > (int)atn.maxTokenType) { + return to; + } + + { + UniqueLock<SharedMutex> edgeLock(atn._edgeMutex); + from->edges[t] = to; // connect + } + +#if DEBUG_DFA == 1 + std::string dfaText; + if (parser != nullptr) { + dfaText = dfa.toString(parser->getVocabulary()); + } else { + dfaText = dfa.toString(dfa::Vocabulary()); + } + std::cout << "DFA=\n" << dfaText << std::endl; +#endif + + return to; +} + +dfa::DFAState *ParserATNSimulator::addDFAState(dfa::DFA &dfa, dfa::DFAState *D) { + if (D == ERROR.get()) { + return D; + } + + // Optimizing the configs below should not alter the hash code. Thus we can just do an insert + // which will only succeed if an equivalent DFAState does not already exist. + auto [existing, inserted] = dfa.states.insert(D); + if (!inserted) { + return *existing; + } + + // Previously we did a lookup, then set fields, then inserted. It was `dfa.states.size()`, since + // we already inserted we need to subtract one. + D->stateNumber = static_cast<int>(dfa.states.size() - 1); + if (!D->configs->isReadonly()) { + D->configs->optimizeConfigs(this); + D->configs->setReadonly(true); + } + +#if DEBUG_DFA == 1 + std::cout << "adding new DFA state: " << D << std::endl; +#endif + + return D; +} + +void ParserATNSimulator::reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAttemptingFullContext decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAttemptingFullContext(parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + } +} + +void ParserATNSimulator::reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval(startIndex, stopIndex); + std::cout << "reportContextSensitivity decision=" << dfa.decision << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportContextSensitivity(parser, dfa, startIndex, stopIndex, prediction, configs); + } +} + +void ParserATNSimulator::reportAmbiguity(dfa::DFA &dfa, dfa::DFAState * /*D*/, size_t startIndex, size_t stopIndex, + bool exact, const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) { +#if DEBUG_DFA == 1 || RETRY_DEBUG == 1 + misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex); + std::cout << "reportAmbiguity " << ambigAlts << ":" << configs << ", input=" << parser->getTokenStream()->getText(interval) << std::endl; +#endif + + if (parser != nullptr) { + parser->getErrorListenerDispatch().reportAmbiguity(parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } +} + +void ParserATNSimulator::setPredictionMode(PredictionMode newMode) { + _mode = newMode; +} + +atn::PredictionMode ParserATNSimulator::getPredictionMode() { + return _mode; +} + +Parser* ParserATNSimulator::getParser() { + return parser; +} + +#ifdef _MSC_VER +#pragma warning (disable:4996) // 'getenv': This function or variable may be unsafe. Consider using _dupenv_s instead. +#endif + +bool ParserATNSimulator::getLrLoopSetting() { + char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT"); + if (var == nullptr) + return false; + std::string value(var); + return value == "true" || value == "1"; +} + +#ifdef _MSC_VER +#pragma warning (default:4996) +#endif + +void ParserATNSimulator::InitializeInstanceFields() { + _mode = PredictionMode::LL; + _startIndex = 0; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h new file mode 100644 index 0000000000..28fd059dd2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulator.h @@ -0,0 +1,911 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "PredictionMode.h" +#include "dfa/DFAState.h" +#include "atn/ATNSimulator.h" +#include "atn/PredictionContext.h" +#include "atn/PredictionContextMergeCache.h" +#include "atn/ParserATNSimulatorOptions.h" +#include "SemanticContext.h" +#include "atn/ATNConfig.h" + +namespace antlr4 { +namespace atn { + + /** + * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. + * + * <p> + * The basic complexity of the adaptive strategy makes it harder to understand. + * We begin with ATN simulation to build paths in a DFA. Subsequent prediction + * requests go through the DFA first. If they reach a state without an edge for + * the current symbol, the algorithm fails over to the ATN simulation to + * complete the DFA path for the current input (until it finds a conflict state + * or uniquely predicting state).</p> + * + * <p> + * All of that is done without using the outer context because we want to create + * a DFA that is not dependent upon the rule invocation stack when we do a + * prediction. One DFA works in all contexts. We avoid using context not + * necessarily because it's slower, although it can be, but because of the DFA + * caching problem. The closure routine only considers the rule invocation stack + * created during prediction beginning in the decision rule. For example, if + * prediction occurs without invoking another rule's ATN, there are no context + * stacks in the configurations. When lack of context leads to a conflict, we + * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing + * strategy (versus full LL(*)).</p> + * + * <p> + * When SLL yields a configuration set with conflict, we rewind the input and + * retry the ATN simulation, this time using full outer context without adding + * to the DFA. Configuration context stacks will be the full invocation stacks + * from the start rule. If we get a conflict using full context, then we can + * definitively say we have a true ambiguity for that input sequence. If we + * don't get a conflict, it implies that the decision is sensitive to the outer + * context. (It is not context-sensitive in the sense of context-sensitive + * grammars.)</p> + * + * <p> + * The next time we reach this DFA state with an SLL conflict, through DFA + * simulation, we will again retry the ATN simulation using full context mode. + * This is slow because we can't save the results and have to "interpret" the + * ATN each time we get that input.</p> + * + * <p> + * <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p> + * + * <p> + * We could cache results from full context to predicted alternative easily and + * that saves a lot of time but doesn't work in presence of predicates. The set + * of visible predicates from the ATN start state changes depending on the + * context, because closure can fall off the end of a rule. I tried to cache + * tuples (stack context, semantic context, predicted alt) but it was slower + * than interpreting and much more complicated. Also required a huge amount of + * memory. The goal is not to create the world's fastest parser anyway. I'd like + * to keep this algorithm simple. By launching multiple threads, we can improve + * the speed of parsing across a large number of files.</p> + * + * <p> + * There is no strict ordering between the amount of input used by SLL vs LL, + * which makes it really hard to build a cache for full context. Let's say that + * we have input A B C that leads to an SLL conflict with full context X. That + * implies that using X we might only use A B but we could also use A B C D to + * resolve conflict. Input A B C D could predict alternative 1 in one position + * in the input and A B C E could predict alternative 2 in another position in + * input. The conflicting SLL configurations could still be non-unique in the + * full context prediction, which would lead us to requiring more input than the + * original A B C. To make a prediction cache work, we have to track the exact + * input used during the previous prediction. That amounts to a cache that maps + * X to a specific DFA for that context.</p> + * + * <p> + * Something should be done for left-recursive expression predictions. They are + * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry + * with full LL thing Sam does.</p> + * + * <p> + * <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p> + * + * <p> + * We avoid doing full context retry when the outer context is empty, we did not + * dip into the outer context by falling off the end of the decision state rule, + * or when we force SLL mode.</p> + * + * <p> + * As an example of the not dip into outer context case, consider as super + * constructor calls versus function calls. One grammar might look like + * this:</p> + * + * <pre> + * ctorBody + * : '{' superCall? stat* '}' + * ; + * </pre> + * + * <p> + * Or, you might see something like</p> + * + * <pre> + * stat + * : superCall ';' + * | expression ';' + * | ... + * ; + * </pre> + * + * <p> + * In both cases I believe that no closure operations will dip into the outer + * context. In the first case ctorBody in the worst case will stop at the '}'. + * In the 2nd case it should stop at the ';'. Both cases should stay within the + * entry rule and not dip into the outer context.</p> + * + * <p> + * <strong>PREDICATES</strong></p> + * + * <p> + * Predicates are always evaluated if present in either SLL or LL both. SLL and + * LL simulation deals with predicates differently. SLL collects predicates as + * it performs closure operations like ANTLR v3 did. It delays predicate + * evaluation until it reaches and accept state. This allows us to cache the SLL + * ATN simulation whereas, if we had evaluated predicates on-the-fly during + * closure, the DFA state configuration sets would be different and we couldn't + * build up a suitable DFA.</p> + * + * <p> + * When building a DFA accept state during ATN simulation, we evaluate any + * predicates and return the sole semantically valid alternative. If there is + * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, + * we throw an exception. Alternatives without predicates act like they have + * true predicates. The simple way to think about it is to strip away all + * alternatives with false predicates and choose the minimum alternative that + * remains.</p> + * + * <p> + * When we start in the DFA and reach an accept state that's predicated, we test + * those and return the minimum semantically viable alternative. If no + * alternatives are viable, we throw an exception.</p> + * + * <p> + * During full LL ATN simulation, closure always evaluates predicates and + * on-the-fly. This is crucial to reducing the configuration set size during + * closure. It hits a landmine when parsing with the Java grammar, for example, + * without this on-the-fly evaluation.</p> + * + * <p> + * <strong>SHARING DFA</strong></p> + * + * <p> + * All instances of the same parser share the same decision DFAs through a + * static field. Each instance gets its own ATN simulator but they share the + * same {@link #decisionToDFA} field. They also share a + * {@link PredictionContextCache} object that makes sure that all + * {@link PredictionContext} objects are shared among the DFA states. This makes + * a big size difference.</p> + * + * <p> + * <strong>THREAD SAFETY</strong></p> + * + * <p> + * The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when + * it adds a new DFA object to that array. {@link #addDFAEdge} + * locks on the DFA for the current decision when setting the + * {@link DFAState#edges} field. {@link #addDFAState} locks on + * the DFA for the current decision when looking up a DFA state to see if it + * already exists. We must make sure that all requests to add DFA states that + * are equivalent result in the same shared DFA object. This is because lots of + * threads will be trying to update the DFA at once. The + * {@link #addDFAState} method also locks inside the DFA lock + * but this time on the shared context cache when it rebuilds the + * configurations' {@link PredictionContext} objects using cached + * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is + * safe as long as we can guarantee that all threads referencing + * {@code s.edge[t]} get the same physical target {@link DFAState}, or + * {@code null}. Once into the DFA, the DFA simulation does not reference the + * {@link DFA#states} map. It follows the {@link DFAState#edges} field to new + * targets. The DFA simulator will either find {@link DFAState#edges} to be + * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or + * {@code dfa.edges[t]} to be non-null. The + * {@link #addDFAEdge} method could be racing to set the field + * but in either case the DFA simulator works; if {@code null}, and requests ATN + * simulation. It could also race trying to get {@code dfa.edges[t]}, but either + * way it will work because it's not doing a test and set operation.</p> + * + * <p> + * <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage + * Parsing)</strong></p> + * + * <p> + * Sam pointed out that if SLL does not give a syntax error, then there is no + * point in doing full LL, which is slower. We only have to try LL if we get a + * syntax error. For maximum speed, Sam starts the parser set to pure SLL + * mode with the {@link BailErrorStrategy}:</p> + * + * <pre> + * parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )}; + * parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); + * </pre> + * + * <p> + * If it does not get a syntax error, then we're done. If it does get a syntax + * error, we need to retry with the combined SLL/LL strategy.</p> + * + * <p> + * The reason this works is as follows. If there are no SLL conflicts, then the + * grammar is SLL (at least for that input set). If there is an SLL conflict, + * the full LL analysis must yield a set of viable alternatives which is a + * subset of the alternatives reported by SLL. If the LL set is a singleton, + * then the grammar is LL but not SLL. If the LL set is the same size as the SLL + * set, the decision is SLL. If the LL set has size > 1, then that decision + * is truly ambiguous on the current input. If the LL set is smaller, then the + * SLL conflict resolution might choose an alternative that the full LL would + * rule out as a possibility based upon better context information. If that's + * the case, then the SLL parse will definitely get an error because the full LL + * analysis says it's not viable. If SLL conflict resolution chooses an + * alternative within the LL set, them both SLL and LL would choose the same + * alternative because they both choose the minimum of multiple conflicting + * alternatives.</p> + * + * <p> + * Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and + * a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL + * parsing will get an error because SLL will pursue alternative 1. If + * <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will + * choose the same alternative because alternative one is the minimum of either + * set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax + * error. If <em>s</em> is {@code {1}} then SLL will succeed.</p> + * + * <p> + * Of course, if the input is invalid, then we will get an error for sure in + * both SLL and LL parsing. Erroneous input will therefore require 2 passes over + * the input.</p> + */ + class ANTLR4CPP_PUBLIC ParserATNSimulator : public ATNSimulator { + public: + /// Testing only! + ParserATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache); + + ParserATNSimulator(Parser *parser, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, + PredictionContextCache &sharedContextCache, + const ParserATNSimulatorOptions &options); + + virtual void reset() override; + virtual void clearDFA() override; + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext); + + static const bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT; + + std::vector<dfa::DFA> &decisionToDFA; + + /** Implements first-edge (loop entry) elimination as an optimization + * during closure operations. See antlr/antlr4#1398. + * + * The optimization is to avoid adding the loop entry config when + * the exit path can only lead back to the same + * StarLoopEntryState after popping context at the rule end state + * (traversing only epsilon edges, so we're still in closure, in + * this same rule). + * + * We need to detect any state that can reach loop entry on + * epsilon w/o exiting rule. We don't have to look at FOLLOW + * links, just ensure that all stack tops for config refer to key + * states in LR rule. + * + * To verify we are in the right situation we must first check + * closure is at a StarLoopEntryState generated during LR removal. + * Then we check that each stack top of context is a return state + * from one of these cases: + * + * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + * 2. expr op expr. The return state is the block end of internal block of (...)* + * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + * That state points at block end of internal block of (...)*. + * 4. expr '?' expr ':' expr. The return state points at block end, + * which points at loop entry state. + * + * If any is true for each stack top, then closure does not add a + * config to the current config set for edge[0], the loop entry branch. + * + * Conditions fail if any context for the current config is: + * + * a. empty (we'd fall out of expr to do a global FOLLOW which could + * even be to some weird spot in expr) or, + * b. lies outside of expr or, + * c. lies within expr but at a state not the BlockEndState + * generated during LR removal + * + * Do we need to evaluate predicates ever in closure for this case? + * + * No. Predicates, including precedence predicates, are only + * evaluated when computing a DFA start state. I.e., only before + * the lookahead (but not parser) consumes a token. + * + * There are no epsilon edges allowed in LR rule alt blocks or in + * the "primary" part (ID here). If closure is in + * StarLoopEntryState any lookahead operation will have consumed a + * token as there are no epsilon-paths that lead to + * StarLoopEntryState. We do not have to evaluate predicates + * therefore if we are in the generated StarLoopEntryState of a LR + * rule. Note that when making a prediction starting at that + * decision point, decision d=2, compute-start-state performs + * closure starting at edges[0], edges[1] emanating from + * StarLoopEntryState. That means it is not performing closure on + * StarLoopEntryState during compute-start-state. + * + * How do we know this always gives same prediction answer? + * + * Without predicates, loop entry and exit paths are ambiguous + * upon remaining input +b (in, say, a+b). Either paths lead to + * valid parses. Closure can lead to consuming + immediately or by + * falling out of this call to expr back into expr and loop back + * again to StarLoopEntryState to match +b. In this special case, + * we choose the more efficient path, which is to take the bypass + * path. + * + * The lookahead language has not changed because closure chooses + * one path over the other. Both paths lead to consuming the same + * remaining input during a lookahead operation. If the next token + * is an operator, lookahead will enter the choice block with + * operators. If it is not, lookahead will exit expr. Same as if + * closure had chosen to enter the choice block immediately. + * + * Closure is examining one config (some loopentrystate, some alt, + * context) which means it is considering exactly one alt. Closure + * always copies the same alt to any derived configs. + * + * How do we know this optimization doesn't mess up precedence in + * our parse trees? + * + * Looking through expr from left edge of stat only has to confirm + * that an input, say, a+b+c; begins with any valid interpretation + * of an expression. The precedence actually doesn't matter when + * making a decision in stat seeing through expr. It is only when + * parsing rule expr that we must use the precedence to get the + * right interpretation and, hence, parse tree. + */ + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig *config) const; + virtual std::string getRuleName(size_t index); + + virtual Ref<ATNConfig> precedenceTransition(Ref<ATNConfig> const& config, const PrecedencePredicateTransition *pt, + bool collectPredicates, bool inContext, bool fullCtx); + + void setPredictionMode(PredictionMode newMode); + PredictionMode getPredictionMode(); + + Parser* getParser(); + + virtual std::string getTokenName(size_t t); + + virtual std::string getLookaheadName(TokenStream *input); + + /// <summary> + /// Used for debugging in adaptivePredict around execATN but I cut + /// it out for clarity now that alg. works well. We can leave this + /// "dead" code for a bit. + /// </summary> + virtual void dumpDeadEndConfigs(NoViableAltException &nvae); + + protected: + Parser *const parser; + + /// <summary> + /// Each prediction operation uses a cache for merge of prediction contexts. + /// Don't keep around as it wastes huge amounts of memory. The merge cache + /// isn't synchronized but we're ok since two threads shouldn't reuse same + /// parser/atnsim object because it can only handle one input at a time. + /// This maps graphs a and b to merged result c. (a,b)->c. We can avoid + /// the merge if we ever see a and b again. Note that (b,a)->c should + /// also be examined during cache lookup. + /// </summary> + PredictionContextMergeCache mergeCache; + size_t _mergeCacheCounter = 0; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream *_input; + size_t _startIndex; + ParserRuleContext *_outerContext; + dfa::DFA *_dfa; // Reference into the decisionToDFA vector. + + /// <summary> + /// Performs ATN simulation to compute a predicted alternative based + /// upon the remaining input, but also updates the DFA cache to avoid + /// having to traverse the ATN again for the same input sequence. + /// + /// There are some key conditions we're looking for after computing a new + /// set of ATN configs (proposed DFA state): + /// if the set is empty, there is no viable alternative for current symbol + /// does the state uniquely predict an alternative? + /// does the state have a conflict that would prevent us from + /// putting it on the work list? + /// + /// We also have some key operations to do: + /// add an edge from previous DFA state to potentially new DFA state, D, + /// upon current symbol but only if adding to work list, which means in all + /// cases except no viable alternative (and possibly non-greedy decisions?) + /// collecting predicates and adding semantic context to DFA accept states + /// adding rule context to context-sensitive DFA accept states + /// consuming an input symbol + /// reporting a conflict + /// reporting an ambiguity + /// reporting a context sensitivity + /// reporting insufficient predicates + /// + /// cover these cases: + /// dead end + /// single alt + /// single alt + preds + /// conflict + /// conflict + preds + /// </summary> + virtual size_t execATN(dfa::DFA &dfa, dfa::DFAState *s0, TokenStream *input, size_t startIndex, + ParserRuleContext *outerContext); + + /// <summary> + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns {@code null}. + /// </summary> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol </param> + /// <returns> The existing target DFA state for the given input symbol + /// {@code t}, or {@code null} if the target state for this edge is not + /// already cached </returns> + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t); + + /// <summary> + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="previousD"> The current DFA state </param> + /// <param name="t"> The next input symbol + /// </param> + /// <returns> The computed target DFA state for the given input symbol + /// {@code t}. If {@code t} does not lead to a valid DFA state, this method + /// returns <seealso cref="#ERROR"/>. </returns> + virtual dfa::DFAState *computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t); + + virtual void predicateDFAState(dfa::DFAState *dfaState, DecisionState *decisionState); + + // comes back with reach.uniqueAlt set to a valid alt + virtual size_t execATNWithFullContext(dfa::DFA &dfa, dfa::DFAState *D, ATNConfigSet *s0, + TokenStream *input, size_t startIndex, ParserRuleContext *outerContext); // how far we got before failing over + + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx); + + /// <summary> + /// Return a configuration set containing only the configurations from + /// {@code configs} which are in a <seealso cref="RuleStopState"/>. If all + /// configurations in {@code configs} are already in a rule stop state, this + /// method simply returns {@code configs}. + /// <p/> + /// When {@code lookToEndOfRule} is true, this method uses + /// <seealso cref="ATN#nextTokens"/> for each configuration in {@code configs} which is + /// not already in a rule stop state to see if a rule stop state is reachable + /// from the configuration via epsilon-only transitions. + /// </summary> + /// <param name="configs"> the configuration set to update </param> + /// <param name="lookToEndOfRule"> when true, this method checks for rule stop states + /// reachable by epsilon-only transitions from each configuration in + /// {@code configs}. + /// </param> + /// <returns> {@code configs} if all configurations in {@code configs} are in a + /// rule stop state, otherwise return a new configuration set containing only + /// the configurations from {@code configs} which are in a rule stop state </returns> + virtual ATNConfigSet* removeAllConfigsNotInRuleStopState(ATNConfigSet *configs, bool lookToEndOfRule); + + virtual std::unique_ptr<ATNConfigSet> computeStartState(ATNState *p, RuleContext *ctx, bool fullCtx); + + /* parrt internal source braindump that doesn't mess up + * external API spec. + + applyPrecedenceFilter is an optimization to avoid highly + nonlinear prediction of expressions and other left recursive + rules. The precedence predicates such as {3>=prec}? Are highly + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /** + * This method transforms the start state computed by + * {@link #computeStartState} to the special start state used by a + * precedence DFA for a particular precedence value. The transformation + * process applies the following changes to the start state's configuration + * set. + * + * <ol> + * <li>Evaluate the precedence predicates for each configuration using + * {@link SemanticContext#evalPrecedence}.</li> + * <li>When {@link ATNConfig#isPrecedenceFilterSuppressed} is {@code false}, + * remove all configurations which predict an alternative greater than 1, + * for which another configuration that predicts alternative 1 is in the + * same ATN state with the same prediction context. This transformation is + * valid for the following reasons: + * <ul> + * <li>The closure block cannot contain any epsilon transitions which bypass + * the body of the closure, so all states reachable via alternative 1 are + * part of the precedence alternatives of the transformed left-recursive + * rule.</li> + * <li>The "primary" portion of a left recursive rule cannot contain an + * epsilon transition, so the only way an alternative other than 1 can exist + * in a state that is also reachable via alternative 1 is by nesting calls + * to the left-recursive rule, with the outer calls not being at the + * preferred precedence level. The + * {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + * configurations which do not meet this condition, and therefore are not + * eligible for elimination during the filtering process.</li> + * </ul> + * </li> + * </ol> + * + * <p> + * The prediction context must be considered by this filter to address + * situations like the following. + * </p> + * <code> + * <pre> + * grammar TA; + * prog: statement* EOF; + * statement: letterA | statement letterA 'b' ; + * letterA: 'a'; + * </pre> + * </code> + * <p> + * If the above grammar, the ATN state immediately before the token + * reference {@code 'a'} in {@code letterA} is reachable from the left edge + * of both the primary and closure blocks of the left-recursive rule + * {@code statement}. The prediction context associated with each of these + * configurations distinguishes between them, and prevents the alternative + * which stepped out to {@code prog} (and then back in to {@code statement} + * from being eliminated by the filter. + * </p> + * + * @param configs The configuration set computed by + * {@link #computeStartState} as the start state for the DFA. + * @return The transformed configuration set representing the start state + * for a precedence DFA at a particular precedence level (determined by + * calling {@link Parser#getPrecedence}). + */ + std::unique_ptr<ATNConfigSet> applyPrecedenceFilter(ATNConfigSet *configs); + + virtual ATNState *getReachableTarget(const Transition *trans, size_t ttype); + + virtual std::vector<Ref<const SemanticContext>> getPredsForAmbigAlts(const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs, size_t nalts); + + std::vector<dfa::DFAState::PredPrediction> getPredicatePredictions(const antlrcpp::BitSet &ambigAlts, + const std::vector<Ref<const SemanticContext>> &altToPred); + + /** + * This method is used to improve the localization of error messages by + * choosing an alternative rather than throwing a + * {@link NoViableAltException} in particular prediction scenarios where the + * {@link #ERROR} state was reached during ATN simulation. + * + * <p> + * The default implementation of this method uses the following + * algorithm to identify an ATN configuration which successfully parsed the + * decision entry rule. Choosing such an alternative ensures that the + * {@link ParserRuleContext} returned by the calling rule will be complete + * and valid, and the syntax error will be reported later at a more + * localized location.</p> + * + * <ul> + * <li>If a syntactically valid path or paths reach the end of the decision rule and + * they are semantically valid if predicated, return the min associated alt.</li> + * <li>Else, if a semantically invalid but syntactically valid path exist + * or paths exist, return the minimum associated alt. + * </li> + * <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li> + * </ul> + * + * <p> + * In some scenarios, the algorithm described above could predict an + * alternative which will result in a {@link FailedPredicateException} in + * the parser. Specifically, this could occur if the <em>only</em> configuration + * capable of successfully parsing to the end of the decision rule is + * blocked by a semantic predicate. By choosing this alternative within + * {@link #adaptivePredict} instead of throwing a + * {@link NoViableAltException}, the resulting + * {@link FailedPredicateException} in the parser will identify the specific + * predicate which is preventing the parser from successfully parsing the + * decision rule, which helps developers identify and correct logic errors + * in semantic predicates. + * </p> + * + * @param configs The ATN configurations which were valid immediately before + * the {@link #ERROR} state was reached + * @param outerContext The is the \gamma_0 initial parser context from the paper + * or the parser stack at the instant before prediction commences. + * + * @return The value to return from {@link #adaptivePredict}, or + * {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + * identified and {@link #adaptivePredict} should report an error instead. + */ + size_t getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + virtual size_t getAltThatFinishedDecisionEntryRule(ATNConfigSet *configs); + + /** Walk the list of configurations and split them according to + * those that have preds evaluating to true/false. If no pred, assume + * true pred and include in succeeded set. Returns Pair of sets. + * + * Create a new set so as not to alter the incoming parameter. + * + * Assumption: the input stream has been restored to the starting point + * prediction, which is where predicates need to evaluate. + */ + std::pair<ATNConfigSet *, ATNConfigSet *> splitAccordingToSemanticValidity(ATNConfigSet *configs, + ParserRuleContext *outerContext); + + /// <summary> + /// Look through a list of predicate/alt pairs, returning alts for the + /// pairs that win. A {@code NONE} predicate indicates an alt containing an + /// unpredicated config which behaves as "always true." If !complete + /// then we stop at the first predicate that evaluates to true. This + /// includes pairs with null predicates. + /// </summary> + antlrcpp::BitSet evalSemanticContext(const std::vector<dfa::DFAState::PredPrediction> &predPredictions, + ParserRuleContext *outerContext, bool complete); + + /** + * Evaluate a semantic context within a specific parser context. + * + * <p> + * This method might not be called for every semantic context evaluated + * during the prediction process. In particular, we currently do not + * evaluate the following but it may change in the future:</p> + * + * <ul> + * <li>Precedence predicates (represented by + * {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + * through this method.</li> + * <li>Operator predicates (represented by {@link SemanticContext.AND} and + * {@link SemanticContext.OR}) are evaluated as a single semantic + * context, rather than evaluating the operands individually. + * Implementations which require evaluation results from individual + * predicates should override this method to explicitly handle evaluation of + * the operands within operator predicates.</li> + * </ul> + * + * @param pred The semantic context to evaluate + * @param parserCallStack The parser context in which to evaluate the + * semantic context + * @param alt The alternative which is guarded by {@code pred} + * @param fullCtx {@code true} if the evaluation is occurring during LL + * prediction; otherwise, {@code false} if the evaluation is occurring + * during SLL prediction + * + * @since 4.3 + */ + virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx); + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + virtual void closure(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, bool treatEofAsEpsilon); + + virtual void closureCheckingStopState(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + /// Do the actual work of walking epsilon edges. + virtual void closure_(Ref<ATNConfig> const& config, ATNConfigSet *configs, ATNConfig::Set &closureBusy, + bool collectPredicates, bool fullCtx, int depth, bool treatEofAsEpsilon); + + virtual Ref<ATNConfig> getEpsilonTarget(Ref<ATNConfig> const& config, const Transition *t, bool collectPredicates, + bool inContext, bool fullCtx, bool treatEofAsEpsilon); + virtual Ref<ATNConfig> actionTransition(Ref<ATNConfig> const& config, const ActionTransition *t); + + virtual Ref<ATNConfig> predTransition(Ref<ATNConfig> const& config, const PredicateTransition *pt, bool collectPredicates, + bool inContext, bool fullCtx); + + virtual Ref<ATNConfig> ruleTransition(Ref<ATNConfig> const& config, const RuleTransition *t); + + /** + * Gets a {@link BitSet} containing the alternatives in {@code configs} + * which are part of one or more conflicting alternative subsets. + * + * @param configs The {@link ATNConfigSet} to analyze. + * @return The alternatives in {@code configs} which are part of one or more + * conflicting alternative subsets. If {@code configs} does not contain any + * conflicting subsets, this method returns an empty {@link BitSet}. + */ + virtual antlrcpp::BitSet getConflictingAlts(ATNConfigSet *configs); + + /// <summary> + /// Sam pointed out a problem with the previous definition, v3, of + /// ambiguous states. If we have another state associated with conflicting + /// alternatives, we should keep going. For example, the following grammar + /// + /// s : (ID | ID ID?) ';' ; + /// + /// When the ATN simulation reaches the state before ';', it has a DFA + /// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + /// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + /// because alternative to has another way to continue, via [6|2|[]]. + /// The key is that we have a single state that has config's only associated + /// with a single alternative, 2, and crucially the state transitions + /// among the configurations are all non-epsilon transitions. That means + /// we don't consider any conflicts that include alternative 2. So, we + /// ignore the conflict between alts 1 and 2. We ignore a set of + /// conflicting alts when there is an intersection with an alternative + /// associated with a single alt state in the state->config-list map. + /// + /// It's also the case that we might have two conflicting configurations but + /// also a 3rd nonconflicting configuration for a different alternative: + /// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + /// + /// a : A | A | A B ; + /// + /// After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not + /// stop working on this state. In the previous example, we're concerned + /// with states associated with the conflicting alternatives. Here alt + /// 3 is not associated with the conflicting configs, but since we can continue + /// looking for input reasonably, I don't declare the state done. We + /// ignore a set of conflicting alts when we have an alternative + /// that we still need to pursue. + /// </summary> + + virtual antlrcpp::BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet *configs); + + virtual NoViableAltException noViableAlt(TokenStream *input, ParserRuleContext *outerContext, + ATNConfigSet *configs, size_t startIndex, bool deleteConfigs); + + static size_t getUniqueAlt(ATNConfigSet *configs); + + /// <summary> + /// Add an edge to the DFA, if possible. This method calls + /// <seealso cref="#addDFAState"/> to ensure the {@code to} state is present in the + /// DFA. If {@code from} is {@code null}, or if {@code t} is outside the + /// range of edges that can be represented in the DFA tables, this method + /// returns without adding the edge to the DFA. + /// <p/> + /// If {@code to} is {@code null}, this method returns {@code null}. + /// Otherwise, this method returns the <seealso cref="DFAState"/> returned by calling + /// <seealso cref="#addDFAState"/> for the {@code to} state. + /// </summary> + /// <param name="dfa"> The DFA </param> + /// <param name="from"> The source state for the edge </param> + /// <param name="t"> The input symbol </param> + /// <param name="to"> The target state for the edge + /// </param> + /// <returns> If {@code to} is {@code null}, this method returns {@code null}; + /// otherwise this method returns the result of calling <seealso cref="#addDFAState"/> + /// on {@code to} </returns> + virtual dfa::DFAState *addDFAEdge(dfa::DFA &dfa, dfa::DFAState *from, ssize_t t, dfa::DFAState *to); + + /// <summary> + /// Add state {@code D} to the DFA if it is not already present, and return + /// the actual instance stored in the DFA. If a state equivalent to {@code D} + /// is already in the DFA, the existing state is returned. Otherwise this + /// method returns {@code D} after adding it to the DFA. + /// <p/> + /// If {@code D} is <seealso cref="#ERROR"/>, this method returns <seealso cref="#ERROR"/> and + /// does not change the DFA. + /// </summary> + /// <param name="dfa"> The dfa </param> + /// <param name="D"> The DFA state to add </param> + /// <returns> The state stored in the DFA. This will be either the existing + /// state if {@code D} is already in the DFA, or {@code D} itself if the + /// state was not already present. </returns> + virtual dfa::DFAState *addDFAState(dfa::DFA &dfa, dfa::DFAState *D); + + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, + ATNConfigSet *configs, size_t startIndex, size_t stopIndex); + + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex); + + /// If context sensitive parsing, we know it's ambiguity not conflict. + virtual void reportAmbiguity(dfa::DFA &dfa, + dfa::DFAState *D, // the DFA state from execATN() that had SLL conflicts + size_t startIndex, size_t stopIndex, + bool exact, + const antlrcpp::BitSet &ambigAlts, + ATNConfigSet *configs); // configs that LL not SLL considered conflicting + + private: + // SLL, LL, or LL + exact ambig detection? + PredictionMode _mode; + + static bool getLrLoopSetting(); + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 + diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h new file mode 100644 index 0000000000..ea31226d25 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ParserATNSimulatorOptions.h @@ -0,0 +1,50 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "atn/PredictionContextMergeCacheOptions.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ParserATNSimulatorOptions final { + public: + ParserATNSimulatorOptions& setPredictionContextMergeCacheOptions( + PredictionContextMergeCacheOptions predictionContextMergeCacheOptions) { + _predictionContextMergeCacheOptions = std::move(predictionContextMergeCacheOptions); + return *this; + } + + const PredictionContextMergeCacheOptions& getPredictionContextMergeCacheOptions() const { + return _predictionContextMergeCacheOptions; + } + + private: + PredictionContextMergeCacheOptions _predictionContextMergeCacheOptions; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h new file mode 100644 index 0000000000..b6103dc4d0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusBlockStartState.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// Start of {@code (A|B|...)+} loop. Technically a decision state, but + /// we don't use for code generation; somebody might need it, so I'm defining + /// it for completeness. In reality, the <seealso cref="PlusLoopbackState"/> node is the + /// real decision-making note for {@code A+}. + class ANTLR4CPP_PUBLIC PlusBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + PlusLoopbackState *loopBackState = nullptr; + + PlusBlockStartState() : BlockStartState(ATNStateType::PLUS_BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h new file mode 100644 index 0000000000..07f25aa0c9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PlusLoopbackState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: + /// one to the loop back to start of the block and one to exit. + class ANTLR4CPP_PUBLIC PlusLoopbackState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::PLUS_LOOP_BACK; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + PlusLoopbackState() : DecisionState(ATNStateType::PLUS_LOOP_BACK) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp new file mode 100644 index 0000000000..b8685e9516 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PrecedencePredicateTransition.h" + +using namespace antlr4::atn; + +PrecedencePredicateTransition::PrecedencePredicateTransition(ATNState *target, int precedence) + : Transition(TransitionType::PRECEDENCE, target), _predicate(std::make_shared<SemanticContext::PrecedencePredicate>(precedence)) {} + +bool PrecedencePredicateTransition::isEpsilon() const { + return true; +} + +bool PrecedencePredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string PrecedencePredicateTransition::toString() const { + return "PRECEDENCE " + Transition::toString() + " { precedence: " + std::to_string(getPrecedence()) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h new file mode 100644 index 0000000000..3db79a9b73 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PrecedencePredicateTransition.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PrecedencePredicateTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PRECEDENCE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + PrecedencePredicateTransition(ATNState *target, int precedence); + + int getPrecedence() const { return _predicate->precedence; } + + bool isEpsilon() const override; + bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + std::string toString() const override; + + const Ref<const SemanticContext::PrecedencePredicate>& getPredicate() const { return _predicate; } + + private: + const std::shared_ptr<const SemanticContext::PrecedencePredicate> _predicate; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp new file mode 100644 index 0000000000..73ee2a2b97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "SemanticContext.h" + +#include "atn/PredicateEvalInfo.h" + +using namespace antlr4; +using namespace antlr4::atn; + +PredicateEvalInfo::PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx) + : DecisionEventInfo(decision, nullptr, input, startIndex, stopIndex, fullCtx), + semctx(std::move(semctx)), predictedAlt(predictedAlt), evalResult(evalResult) { +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h new file mode 100644 index 0000000000..f343f541cb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateEvalInfo.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionEventInfo.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// This class represents profiling event information for semantic predicate + /// evaluations which occur during prediction. + /// </summary> + /// <seealso cref= ParserATNSimulator#evalSemanticContext + /// + /// @since 4.3 </seealso> + class ANTLR4CPP_PUBLIC PredicateEvalInfo : public DecisionEventInfo { + public: + /// The semantic context which was evaluated. + const Ref<const SemanticContext> semctx; + + /// <summary> + /// The alternative number for the decision which is guarded by the semantic + /// context <seealso cref="#semctx"/>. Note that other ATN + /// configurations may predict the same alternative which are guarded by + /// other semantic contexts and/or <seealso cref="SemanticContext#NONE"/>. + /// </summary> + const size_t predictedAlt; + + /// The result of evaluating the semantic context <seealso cref="#semctx"/>. + const bool evalResult; + + /// <summary> + /// Constructs a new instance of the <seealso cref="PredicateEvalInfo"/> class with the + /// specified detailed predicate evaluation information. + /// </summary> + /// <param name="decision"> The decision number </param> + /// <param name="input"> The input token stream </param> + /// <param name="startIndex"> The start index for the current prediction </param> + /// <param name="stopIndex"> The index at which the predicate evaluation was + /// triggered. Note that the input stream may be reset to other positions for + /// the actual evaluation of individual predicates. </param> + /// <param name="semctx"> The semantic context which was evaluated </param> + /// <param name="evalResult"> The results of evaluating the semantic context </param> + /// <param name="predictedAlt"> The alternative number for the decision which is + /// guarded by the semantic context {@code semctx}. See <seealso cref="#predictedAlt"/> + /// for more information. </param> + /// <param name="fullCtx"> {@code true} if the semantic context was + /// evaluated during LL prediction; otherwise, {@code false} if the semantic + /// context was evaluated during SLL prediction + /// </param> + /// <seealso cref= ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) </seealso> + /// <seealso cref= SemanticContext#eval(Recognizer, RuleContext) </seealso> + PredicateEvalInfo(size_t decision, TokenStream *input, size_t startIndex, size_t stopIndex, + Ref<const SemanticContext> semctx, bool evalResult, size_t predictedAlt, bool fullCtx); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp new file mode 100644 index 0000000000..d76dbd203a --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.cpp @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateTransition.h" + +using namespace antlr4::atn; + +PredicateTransition::PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent) + : Transition(TransitionType::PREDICATE, target), _predicate(std::make_shared<SemanticContext::Predicate>(ruleIndex, predIndex, isCtxDependent)) {} + +bool PredicateTransition::isEpsilon() const { + return true; +} + +bool PredicateTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string PredicateTransition::toString() const { + return "PREDICATE " + Transition::toString() + " { ruleIndex: " + std::to_string(getRuleIndex()) + + ", predIndex: " + std::to_string(getPredIndex()) + ", isCtxDependent: " + std::to_string(isCtxDependent()) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h new file mode 100644 index 0000000000..e889b1c198 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredicateTransition.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" +#include "atn/SemanticContext.h" + +namespace antlr4 { +namespace atn { + + /// TODO: this is old comment: + /// A tree of semantic predicates from the grammar AST if label==SEMPRED. + /// In the ATN, labels will always be exactly one predicate, but the DFA + /// may have to combine a bunch of them as it collects predicates from + /// multiple ATN configurations into a single DFA state. + class ANTLR4CPP_PUBLIC PredicateTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::PREDICATE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + PredicateTransition(ATNState *target, size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + size_t getRuleIndex() const { + return _predicate->ruleIndex; + } + + size_t getPredIndex() const { + return _predicate->predIndex; + } + + bool isCtxDependent() const { + return _predicate->isCtxDependent; + } + + bool isEpsilon() const override; + bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + std::string toString() const override; + + const Ref<const SemanticContext::Predicate>& getPredicate() const { return _predicate; } + + private: + const std::shared_ptr<const SemanticContext::Predicate> _predicate; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp new file mode 100644 index 0000000000..704408f04d --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.cpp @@ -0,0 +1,579 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/SingletonPredictionContext.h" +#include "misc/MurmurHash.h" +#include "atn/ArrayPredictionContext.h" +#include "atn/PredictionContextCache.h" +#include "atn/PredictionContextMergeCache.h" +#include "RuleContext.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "support/Arrays.h" +#include "support/CPPUtils.h" +#include "support/Casts.h" + +#include "atn/PredictionContext.h" + +using namespace antlr4; +using namespace antlr4::misc; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + void combineCommonParents(std::vector<Ref<const PredictionContext>> &parents) { + std::unordered_set<Ref<const PredictionContext>> uniqueParents; + uniqueParents.reserve(parents.size()); + for (const auto &parent : parents) { + uniqueParents.insert(parent); + } + for (auto &parent : parents) { + parent = *uniqueParents.find(parent); + } + } + + Ref<const PredictionContext> getCachedContextImpl(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache, + std::unordered_map<Ref<const PredictionContext>, + Ref<const PredictionContext>> &visited) { + if (context->isEmpty()) { + return context; + } + + { + auto iterator = visited.find(context); + if (iterator != visited.end()) { + return iterator->second; // Not necessarly the same as context. + } + } + + auto cached = contextCache.get(context); + if (cached) { + visited[context] = cached; + return cached; + } + + bool changed = false; + + std::vector<Ref<const PredictionContext>> parents(context->size()); + for (size_t i = 0; i < parents.size(); i++) { + auto parent = getCachedContextImpl(context->getParent(i), contextCache, visited); + if (changed || parent != context->getParent(i)) { + if (!changed) { + parents.clear(); + for (size_t j = 0; j < context->size(); j++) { + parents.push_back(context->getParent(j)); + } + + changed = true; + } + + parents[i] = std::move(parent); + } + } + + if (!changed) { + visited[context] = context; + contextCache.put(context); + return context; + } + + Ref<const PredictionContext> updated; + if (parents.empty()) { + updated = PredictionContext::EMPTY; + } else if (parents.size() == 1) { + updated = SingletonPredictionContext::create(std::move(parents[0]), context->getReturnState(0)); + contextCache.put(updated); + } else { + updated = std::make_shared<ArrayPredictionContext>(std::move(parents), downCast<const ArrayPredictionContext*>(context.get())->returnStates); + contextCache.put(updated); + } + + visited[updated] = updated; + visited[context] = updated; + + return updated; + } + + void getAllContextNodesImpl(const Ref<const PredictionContext> &context, + std::vector<Ref<const PredictionContext>> &nodes, + std::unordered_set<const PredictionContext*> &visited) { + + if (visited.find(context.get()) != visited.end()) { + return; // Already done. + } + + visited.insert(context.get()); + nodes.push_back(context); + + for (size_t i = 0; i < context->size(); i++) { + getAllContextNodesImpl(context->getParent(i), nodes, visited); + } + } + + size_t insertOrAssignNodeId(std::unordered_map<const PredictionContext*, size_t> &nodeIds, size_t &nodeId, const PredictionContext *node) { + auto existing = nodeIds.find(node); + if (existing != nodeIds.end()) { + return existing->second; + } + return nodeIds.insert({node, nodeId++}).first->second; + } + +} + +const Ref<const PredictionContext> PredictionContext::EMPTY = std::make_shared<SingletonPredictionContext>(nullptr, PredictionContext::EMPTY_RETURN_STATE); + +//----------------- PredictionContext ---------------------------------------------------------------------------------- + +PredictionContext::PredictionContext(PredictionContextType contextType) : _contextType(contextType), _hashCode(0) {} + +PredictionContext::PredictionContext(PredictionContext&& other) : _contextType(other._contextType), _hashCode(other._hashCode.exchange(0, std::memory_order_relaxed)) {} + +Ref<const PredictionContext> PredictionContext::fromRuleContext(const ATN &atn, RuleContext *outerContext) { + if (outerContext == nullptr) { + return PredictionContext::EMPTY; + } + + // if we are in RuleContext of start rule, s, then PredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if (outerContext->parent == nullptr || outerContext == &ParserRuleContext::EMPTY) { + return PredictionContext::EMPTY; + } + + // If we have a parent, convert it to a PredictionContext graph + auto parent = PredictionContext::fromRuleContext(atn, RuleContext::is(outerContext->parent) ? downCast<RuleContext*>(outerContext->parent) : nullptr); + const auto *transition = downCast<const RuleTransition*>(atn.states[outerContext->invokingState]->transitions[0].get()); + return SingletonPredictionContext::create(std::move(parent), transition->followState->stateNumber); +} + +bool PredictionContext::hasEmptyPath() const { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one + return getReturnState(size() - 1) == EMPTY_RETURN_STATE; +} + +size_t PredictionContext::hashCode() const { + auto hash = cachedHashCode(); + if (hash == 0) { + hash = hashCodeImpl(); + if (hash == 0) { + hash = std::numeric_limits<size_t>::max(); + } + _hashCode.store(hash, std::memory_order_relaxed); + } + return hash; +} + +Ref<const PredictionContext> PredictionContext::merge(Ref<const PredictionContext> a, Ref<const PredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + assert(a && b); + + // share same graph if both same + if (a == b || *a == *b) { + return a; + } + + const auto aType = a->getContextType(); + const auto bType = b->getContextType(); + + if (aType == PredictionContextType::SINGLETON && bType == PredictionContextType::SINGLETON) { + return mergeSingletons(std::static_pointer_cast<const SingletonPredictionContext>(std::move(a)), + std::static_pointer_cast<const SingletonPredictionContext>(std::move(b)), rootIsWildcard, mergeCache); + } + + // At least one of a or b is array. + // If one is $ and rootIsWildcard, return $ as * wildcard. + if (rootIsWildcard) { + if (a == PredictionContext::EMPTY) { + return a; + } + if (b == PredictionContext::EMPTY) { + return b; + } + } + + // convert singleton so both are arrays to normalize + Ref<const ArrayPredictionContext> left; + if (aType == PredictionContextType::SINGLETON) { + left = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*a)); + } else { + left = std::static_pointer_cast<const ArrayPredictionContext>(std::move(a)); + } + Ref<const ArrayPredictionContext> right; + if (bType == PredictionContextType::SINGLETON) { + right = std::make_shared<ArrayPredictionContext>(downCast<const SingletonPredictionContext&>(*b)); + } else { + right = std::static_pointer_cast<const ArrayPredictionContext>(std::move(b)); + } + return mergeArrays(std::move(left), std::move(right), rootIsWildcard, mergeCache); +} + +Ref<const PredictionContext> PredictionContext::mergeSingletons(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + auto rootMerge = mergeRoot(a, b, rootIsWildcard); + if (rootMerge) { + if (mergeCache) { + return mergeCache->put(a, b, std::move(rootMerge)); + } + return rootMerge; + } + + const auto& parentA = a->parent; + const auto& parentB = b->parent; + if (a->returnState == b->returnState) { // a == b + auto parent = merge(parentA, parentB, rootIsWildcard, mergeCache); + + // If parent is same as existing a or b parent or reduced to a parent, return it. + if (parent == parentA) { // ax + bx = ax, if a=b + return a; + } + if (parent == parentB) { // ax + bx = bx, if a=b + return b; + } + + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // new joined parent so create new singleton pointing to it, a' + auto c = SingletonPredictionContext::create(std::move(parent), a->returnState); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + Ref<const PredictionContext> singleParent; + if (a == b || (*parentA == *parentB)) { // ax + bx = [a,b]x + singleParent = parentA; + } + if (singleParent) { // parents are same, sort payloads and use same parent + std::vector<size_t> payloads = { a->returnState, b->returnState }; + if (a->returnState > b->returnState) { + payloads[0] = b->returnState; + payloads[1] = a->returnState; + } + std::vector<Ref<const PredictionContext>> parents = { singleParent, singleParent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + + // parents differ and can't merge them. Just pack together + // into array; can't merge. + // ax + by = [ax,by] + if (a->returnState > b->returnState) { // sort by payload + std::vector<size_t> payloads = { b->returnState, a->returnState }; + std::vector<Ref<const PredictionContext>> parents = { b->parent, a->parent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + std::vector<size_t> payloads = {a->returnState, b->returnState}; + std::vector<Ref<const PredictionContext>> parents = { a->parent, b->parent }; + auto c = std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; +} + +Ref<const PredictionContext> PredictionContext::mergeRoot(Ref<const SingletonPredictionContext> a, Ref<const SingletonPredictionContext> b, + bool rootIsWildcard) { + if (rootIsWildcard) { + if (a == EMPTY) { // * + b = * + return EMPTY; + } + if (b == EMPTY) { // a + * = * + return EMPTY; + } + } else { + if (a == EMPTY && b == EMPTY) { // $ + $ = $ + return EMPTY; + } + if (a == EMPTY) { // $ + x = [$,x] + std::vector<size_t> payloads = { b->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<const PredictionContext>> parents = { b->parent, nullptr }; + return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + } + if (b == EMPTY) { // x + $ = [$,x] ($ is always first if present) + std::vector<size_t> payloads = { a->returnState, EMPTY_RETURN_STATE }; + std::vector<Ref<const PredictionContext>> parents = { a->parent, nullptr }; + return std::make_shared<ArrayPredictionContext>(std::move(parents), std::move(payloads)); + } + } + return nullptr; +} + +Ref<const PredictionContext> PredictionContext::mergeArrays(Ref<const ArrayPredictionContext> a, Ref<const ArrayPredictionContext> b, + bool rootIsWildcard, PredictionContextMergeCache *mergeCache) { + + if (mergeCache) { + auto existing = mergeCache->get(a, b); + if (existing) { + return existing; + } + existing = mergeCache->get(b, a); + if (existing) { + return existing; + } + } + + // merge sorted payloads a + b => M + size_t i = 0; // walks a + size_t j = 0; // walks b + size_t k = 0; // walks target M array + + std::vector<size_t> mergedReturnStates(a->returnStates.size() + b->returnStates.size()); + std::vector<Ref<const PredictionContext>> mergedParents(a->returnStates.size() + b->returnStates.size()); + + // walk and merge to yield mergedParents, mergedReturnStates + while (i < a->returnStates.size() && j < b->returnStates.size()) { + const auto& parentA = a->parents[i]; + const auto& parentB = b->parents[j]; + if (a->returnStates[i] == b->returnStates[j]) { + // same payload (stack tops are equal), must yield merged singleton + size_t payload = a->returnStates[i]; + // $+$ = $ + bool both$ = payload == EMPTY_RETURN_STATE && !parentA && !parentB; + bool ax_ax = (parentA && parentB) && *parentA == *parentB; // ax+ax -> ax + if (both$ || ax_ax) { + mergedParents[k] = parentA; // choose left + mergedReturnStates[k] = payload; + } else { // ax+ay -> a'[x,y] + mergedParents[k] = merge(parentA, parentB, rootIsWildcard, mergeCache); + mergedReturnStates[k] = payload; + } + i++; // hop over left one as usual + j++; // but also skip one in right side since we merge + } else if (a->returnStates[i] < b->returnStates[j]) { // copy a[i] to M + mergedParents[k] = parentA; + mergedReturnStates[k] = a->returnStates[i]; + i++; + } else { // b > a, copy b[j] to M + mergedParents[k] = parentB; + mergedReturnStates[k] = b->returnStates[j]; + j++; + } + k++; + } + + // copy over any payloads remaining in either array + if (i < a->returnStates.size()) { + for (auto p = i; p < a->returnStates.size(); p++) { + mergedParents[k] = a->parents[p]; + mergedReturnStates[k] = a->returnStates[p]; + k++; + } + } else { + for (auto p = j; p < b->returnStates.size(); p++) { + mergedParents[k] = b->parents[p]; + mergedReturnStates[k] = b->returnStates[p]; + k++; + } + } + + // trim merged if we combined a few that had same stack tops + if (k < mergedParents.size()) { // write index < last position; trim + if (k == 1) { // for just one merged element, return singleton top + auto c = SingletonPredictionContext::create(std::move(mergedParents[0]), mergedReturnStates[0]); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; + } + mergedParents.resize(k); + mergedReturnStates.resize(k); + } + + ArrayPredictionContext m(std::move(mergedParents), std::move(mergedReturnStates)); + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if (m == *a) { + if (mergeCache) { + return mergeCache->put(a, b, a); + } + return a; + } + if (m == *b) { + if (mergeCache) { + return mergeCache->put(a, b, b); + } + return b; + } + + combineCommonParents(m.parents); + auto c = std::make_shared<ArrayPredictionContext>(std::move(m)); + if (mergeCache) { + return mergeCache->put(a, b, std::move(c)); + } + return c; +} + +std::string PredictionContext::toDOTString(const Ref<const PredictionContext> &context) { + if (context == nullptr) { + return ""; + } + + std::stringstream ss; + ss << "digraph G {\n" << "rankdir=LR;\n"; + + std::vector<Ref<const PredictionContext>> nodes = getAllContextNodes(context); + std::unordered_map<const PredictionContext*, size_t> nodeIds; + size_t nodeId = 0; + + for (const auto ¤t : nodes) { + if (current->getContextType() == PredictionContextType::SINGLETON) { + std::string s = std::to_string(insertOrAssignNodeId(nodeIds, nodeId, current.get())); + ss << " s" << s; + std::string returnState = std::to_string(current->getReturnState(0)); + if (current == PredictionContext::EMPTY) { + returnState = "$"; + } + ss << " [label=\"" << returnState << "\"];\n"; + continue; + } + Ref<const ArrayPredictionContext> arr = std::static_pointer_cast<const ArrayPredictionContext>(current); + ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, arr.get()) << " [shape=box, label=\"" << "["; + bool first = true; + for (auto inv : arr->returnStates) { + if (!first) { + ss << ", "; + } + if (inv == EMPTY_RETURN_STATE) { + ss << "$"; + } else { + ss << inv; + } + first = false; + } + ss << "]"; + ss << "\"];\n"; + } + + for (const auto ¤t : nodes) { + if (current == EMPTY) { + continue; + } + for (size_t i = 0; i < current->size(); i++) { + if (!current->getParent(i)) { + continue; + } + ss << " s" << insertOrAssignNodeId(nodeIds, nodeId, current.get()) << "->" << "s" << insertOrAssignNodeId(nodeIds, nodeId, current->getParent(i).get()); + if (current->size() > 1) { + ss << " [label=\"parent[" << i << "]\"];\n"; + } else { + ss << ";\n"; + } + } + } + + ss << "}\n"; + return ss.str(); +} + +// The "visited" map is just a temporary structure to control the retrieval process (which is recursive). +Ref<const PredictionContext> PredictionContext::getCachedContext(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache) { + std::unordered_map<Ref<const PredictionContext>, Ref<const PredictionContext>> visited; + return getCachedContextImpl(context, contextCache, visited); +} + +std::vector<Ref<const PredictionContext>> PredictionContext::getAllContextNodes(const Ref<const PredictionContext> &context) { + std::vector<Ref<const PredictionContext>> nodes; + std::unordered_set<const PredictionContext*> visited; + getAllContextNodesImpl(context, nodes, visited); + return nodes; +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, int currentState) const { + return toStrings(recognizer, EMPTY, currentState); +} + +std::vector<std::string> PredictionContext::toStrings(Recognizer *recognizer, const Ref<const PredictionContext> &stop, int currentState) const { + + std::vector<std::string> result; + + for (size_t perm = 0; ; perm++) { + size_t offset = 0; + bool last = true; + const PredictionContext *p = this; + size_t stateNumber = currentState; + + std::stringstream ss; + ss << "["; + bool outerContinue = false; + while (!p->isEmpty() && p != stop.get()) { + size_t index = 0; + if (p->size() > 0) { + size_t bits = 1; + while ((1ULL << bits) < p->size()) { + bits++; + } + + size_t mask = (1 << bits) - 1; + index = (perm >> offset) & mask; + last &= index >= p->size() - 1; + if (index >= p->size()) { + outerContinue = true; + break; + } + offset += bits; + } + + if (recognizer != nullptr) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + const ATN &atn = recognizer->getATN(); + ATNState *s = atn.states[stateNumber]; + std::string ruleName = recognizer->getRuleNames()[s->ruleIndex]; + ss << ruleName; + } else if (p->getReturnState(index) != EMPTY_RETURN_STATE) { + if (!p->isEmpty()) { + if (ss.tellp() > 1) { + // first char is '[', if more than that this isn't the first rule + ss << ' '; + } + + ss << p->getReturnState(index); + } + } + stateNumber = p->getReturnState(index); + p = p->getParent(index).get(); + } + + if (outerContinue) + continue; + + ss << "]"; + result.push_back(ss.str()); + + if (last) { + break; + } + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h new file mode 100644 index 0000000000..967355af17 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContext.h @@ -0,0 +1,225 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <atomic> + +#include "Recognizer.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "atn/PredictionContextType.h" + +namespace antlr4 { + + class RuleContext; + +namespace atn { + + class ATN; + class ArrayPredictionContext; + class SingletonPredictionContext; + class PredictionContextCache; + class PredictionContextMergeCache; + + class ANTLR4CPP_PUBLIC PredictionContext { + public: + /// Represents $ in local context prediction, which means wildcard. + /// *+x = *. + static const Ref<const PredictionContext> EMPTY; + + /// Represents $ in an array in full context mode, when $ + /// doesn't mean wildcard: $ + x = [$,x]. Here, + /// $ = EMPTY_RETURN_STATE. + // ml: originally Integer.MAX_VALUE, which would be -1 for us, but this is already used in places where + // -1 is converted to unsigned, so we use a different value here. Any value does the job provided it doesn't + // conflict with real return states. + static constexpr size_t EMPTY_RETURN_STATE = std::numeric_limits<size_t>::max() - 9; + + // dispatch + static Ref<const PredictionContext> merge(Ref<const PredictionContext> a, + Ref<const PredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + /// <summary> + /// Merge two <seealso cref="SingletonPredictionContext"/> instances. + /// + /// <p/> + /// + /// Stack tops equal, parents merge is same; return left graph.<br/> + /// <embed src="images/SingletonMerge_SameRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Same stack top, parents differ; merge parents giving array node, then + /// remainders of those graphs. A new root node is created to point to the + /// merged parents.<br/> + /// <embed src="images/SingletonMerge_SameRootDiffPar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to same parent. Make array node for the + /// root where both element in the root point to the same (original) + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootSamePar.svg" type="image/svg+xml"/> + /// + /// <p/> + /// + /// Different stack tops pointing to different parents. Make array node for + /// the root where each element points to the corresponding original + /// parent.<br/> + /// <embed src="images/SingletonMerge_DiffRootDiffPar.svg" type="image/svg+xml"/> + /// </summary> + /// <param name="a"> the first <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="b"> the second <seealso cref="SingletonPredictionContext"/> </param> + /// <param name="rootIsWildcard"> {@code true} if this is a local-context merge, + /// otherwise false to indicate a full-context merge </param> + /// <param name="mergeCache"> </param> + static Ref<const PredictionContext> mergeSingletons(Ref<const SingletonPredictionContext> a, + Ref<const SingletonPredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + /** + * Handle case where at least one of {@code a} or {@code b} is + * {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + * to represent {@link #EMPTY}. + * + * <h2>Local-Context Merges</h2> + * + * <p>These local-context merge operations are used when {@code rootIsWildcard} + * is true.</p> + * + * <p>{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.<br> + * <embed src="images/LocalMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p>{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + * {@code #EMPTY}; return left graph.<br> + * <embed src="images/LocalMerge_EmptyParent.svg" type="image/svg+xml"/></p> + * + * <p>Special case of last merge if local context.<br> + * <embed src="images/LocalMerge_DiffRoots.svg" type="image/svg+xml"/></p> + * + * <h2>Full-Context Merges</h2> + * + * <p>These full-context merge operations are used when {@code rootIsWildcard} + * is false.</p> + * + * <p><embed src="images/FullMerge_EmptyRoots.svg" type="image/svg+xml"/></p> + * + * <p>Must keep all contexts; {@link #EMPTY} in array is a special value (and + * null parent).<br> + * <embed src="images/FullMerge_EmptyRoot.svg" type="image/svg+xml"/></p> + * + * <p><embed src="images/FullMerge_SameRoot.svg" type="image/svg+xml"/></p> + * + * @param a the first {@link SingletonPredictionContext} + * @param b the second {@link SingletonPredictionContext} + * @param rootIsWildcard {@code true} if this is a local-context merge, + * otherwise false to indicate a full-context merge + */ + static Ref<const PredictionContext> mergeRoot(Ref<const SingletonPredictionContext> a, + Ref<const SingletonPredictionContext> b, + bool rootIsWildcard); + + /** + * Merge two {@link ArrayPredictionContext} instances. + * + * <p>Different tops, different parents.<br> + * <embed src="images/ArrayMerge_DiffTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, same parents.<br> + * <embed src="images/ArrayMerge_ShareTopSamePar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, different parents.<br> + * <embed src="images/ArrayMerge_ShareTopDiffPar.svg" type="image/svg+xml"/></p> + * + * <p>Shared top, all shared parents.<br> + * <embed src="images/ArrayMerge_ShareTopSharePar.svg" type="image/svg+xml"/></p> + * + * <p>Equal tops, merge parents and reduce top to + * {@link SingletonPredictionContext}.<br> + * <embed src="images/ArrayMerge_EqualTop.svg" type="image/svg+xml"/></p> + */ + static Ref<const PredictionContext> mergeArrays(Ref<const ArrayPredictionContext> a, + Ref<const ArrayPredictionContext> b, + bool rootIsWildcard, + PredictionContextMergeCache *mergeCache); + + static std::string toDOTString(const Ref<const PredictionContext> &context); + + static Ref<const PredictionContext> getCachedContext(const Ref<const PredictionContext> &context, + PredictionContextCache &contextCache); + + static std::vector<Ref<const PredictionContext>> getAllContextNodes(const Ref<const PredictionContext> &context); + + /// Convert a RuleContext tree to a PredictionContext graph. + /// Return EMPTY if outerContext is empty. + static Ref<const PredictionContext> fromRuleContext(const ATN &atn, RuleContext *outerContext); + + PredictionContext(const PredictionContext&) = delete; + + virtual ~PredictionContext() = default; + + PredictionContext& operator=(const PredictionContext&) = delete; + PredictionContext& operator=(PredictionContext&&) = delete; + + PredictionContextType getContextType() const { return _contextType; } + + virtual size_t size() const = 0; + virtual const Ref<const PredictionContext>& getParent(size_t index) const = 0; + virtual size_t getReturnState(size_t index) const = 0; + + /// This means only the EMPTY (wildcard? not sure) context is in set. + virtual bool isEmpty() const = 0; + bool hasEmptyPath() const; + + size_t hashCode() const; + + virtual bool equals(const PredictionContext &other) const = 0; + + virtual std::string toString() const = 0; + + std::vector<std::string> toStrings(Recognizer *recognizer, int currentState) const; + std::vector<std::string> toStrings(Recognizer *recognizer, + const Ref<const PredictionContext> &stop, + int currentState) const; + + protected: + explicit PredictionContext(PredictionContextType contextType); + + PredictionContext(PredictionContext&& other); + + virtual size_t hashCodeImpl() const = 0; + + size_t cachedHashCode() const { return _hashCode.load(std::memory_order_relaxed); } + + private: + const PredictionContextType _contextType; + mutable std::atomic<size_t> _hashCode; + }; + + inline bool operator==(const PredictionContext &lhs, const PredictionContext &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const PredictionContext &lhs, const PredictionContext &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::PredictionContext> { + size_t operator()(const ::antlr4::atn::PredictionContext &predictionContext) const { + return predictionContext.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp new file mode 100644 index 0000000000..031a35cbf7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.cpp @@ -0,0 +1,56 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "atn/PredictionContextCache.h" + +using namespace antlr4::atn; + +void PredictionContextCache::put(const Ref<const PredictionContext> &value) { + assert(value); + + _data.insert(value); +} + +Ref<const PredictionContext> PredictionContextCache::get( + const Ref<const PredictionContext> &value) const { + assert(value); + + auto iterator = _data.find(value); + if (iterator == _data.end()) { + return nullptr; + } + return *iterator; +} + +size_t PredictionContextCache::PredictionContextHasher::operator()( + const Ref<const PredictionContext> &predictionContext) const { + return predictionContext->hashCode(); +} + +bool PredictionContextCache::PredictionContextComparer::operator()( + const Ref<const PredictionContext> &lhs, + const Ref<const PredictionContext> &rhs) const { + return *lhs == *rhs; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h new file mode 100644 index 0000000000..78c8210d97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextCache.h @@ -0,0 +1,63 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include "atn/PredictionContext.h" +#include "FlatHashSet.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextCache final { + public: + PredictionContextCache() = default; + + PredictionContextCache(const PredictionContextCache&) = delete; + PredictionContextCache(PredictionContextCache&&) = delete; + + PredictionContextCache& operator=(const PredictionContextCache&) = delete; + PredictionContextCache& operator=(PredictionContextCache&&) = delete; + + void put(const Ref<const PredictionContext> &value); + + Ref<const PredictionContext> get(const Ref<const PredictionContext> &value) const; + + private: + struct ANTLR4CPP_PUBLIC PredictionContextHasher final { + size_t operator()(const Ref<const PredictionContext> &predictionContext) const; + }; + + struct ANTLR4CPP_PUBLIC PredictionContextComparer final { + bool operator()(const Ref<const PredictionContext> &lhs, + const Ref<const PredictionContext> &rhs) const; + }; + + FlatHashSet<Ref<const PredictionContext>, + PredictionContextHasher, PredictionContextComparer> _data; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp new file mode 100644 index 0000000000..7160b59998 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.cpp @@ -0,0 +1,167 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "atn/PredictionContextMergeCache.h" + +#include "misc/MurmurHash.h" + +using namespace antlr4::atn; +using namespace antlr4::misc; + +PredictionContextMergeCache::PredictionContextMergeCache( + const PredictionContextMergeCacheOptions &options) : _options(options) {} + +Ref<const PredictionContext> PredictionContextMergeCache::put( + const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2, + Ref<const PredictionContext> value) { + assert(key1); + assert(key2); + + if (getOptions().getMaxSize() == 0) { + // Cache is effectively disabled. + return value; + } + + auto [existing, inserted] = _entries.try_emplace(std::make_pair(key1.get(), key2.get())); + if (inserted) { + try { + existing->second.reset(new Entry()); + } catch (...) { + _entries.erase(existing); + throw; + } + existing->second->key = std::make_pair(key1, key2); + existing->second->value = std::move(value); + pushToFront(existing->second.get()); + } else { + if (existing->second->value != value) { + existing->second->value = std::move(value); + } + moveToFront(existing->second.get()); + } + compact(existing->second.get()); + return existing->second->value; +} + +Ref<const PredictionContext> PredictionContextMergeCache::get( + const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2) const { + assert(key1); + assert(key2); + + if (getOptions().getMaxSize() == 0) { + // Cache is effectively disabled. + return nullptr; + } + + auto iterator = _entries.find(std::make_pair(key1.get(), key2.get())); + if (iterator == _entries.end()) { + return nullptr; + } + moveToFront(iterator->second.get()); + return iterator->second->value; +} + +void PredictionContextMergeCache::clear() { + Container().swap(_entries); + _head = _tail = nullptr; + _size = 0; +} + +void PredictionContextMergeCache::moveToFront(Entry *entry) const { + if (entry->prev == nullptr) { + assert(entry == _head); + return; + } + entry->prev->next = entry->next; + if (entry->next != nullptr) { + entry->next->prev = entry->prev; + } else { + assert(entry == _tail); + _tail = entry->prev; + } + entry->prev = nullptr; + entry->next = _head; + _head->prev = entry; + _head = entry; + assert(entry->prev == nullptr); +} + +void PredictionContextMergeCache::pushToFront(Entry *entry) { + ++_size; + entry->prev = nullptr; + entry->next = _head; + if (_head != nullptr) { + _head->prev = entry; + _head = entry; + } else { + assert(entry->next == nullptr); + _head = entry; + _tail = entry; + } + assert(entry->prev == nullptr); +} + +void PredictionContextMergeCache::remove(Entry *entry) { + if (entry->prev != nullptr) { + entry->prev->next = entry->next; + } else { + assert(entry == _head); + _head = entry->next; + } + if (entry->next != nullptr) { + entry->next->prev = entry->prev; + } else { + assert(entry == _tail); + _tail = entry->prev; + } + --_size; + _entries.erase(std::make_pair(entry->key.first.get(), entry->key.second.get())); +} + +void PredictionContextMergeCache::compact(const Entry *preserve) { + Entry *entry = _tail; + while (entry != nullptr && _size > getOptions().getMaxSize()) { + Entry *next = entry->prev; + if (entry != preserve) { + remove(entry); + } + entry = next; + } +} + +size_t PredictionContextMergeCache::PredictionContextHasher::operator()( + const PredictionContextPair &value) const { + size_t hash = MurmurHash::initialize(); + hash = MurmurHash::update(hash, value.first->hashCode()); + hash = MurmurHash::update(hash, value.second->hashCode()); + return MurmurHash::finish(hash, 2); +} + +bool PredictionContextMergeCache::PredictionContextComparer::operator()( + const PredictionContextPair &lhs, const PredictionContextPair &rhs) const { + return *lhs.first == *rhs.first && *lhs.second == *rhs.second; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h new file mode 100644 index 0000000000..efaeaef578 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCache.h @@ -0,0 +1,101 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <utility> + +#include "atn/PredictionContext.h" +#include "atn/PredictionContextMergeCacheOptions.h" +#include "FlatHashMap.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextMergeCache final { + public: + PredictionContextMergeCache() + : PredictionContextMergeCache(PredictionContextMergeCacheOptions()) {} + + explicit PredictionContextMergeCache(const PredictionContextMergeCacheOptions &options); + + PredictionContextMergeCache(const PredictionContextMergeCache&) = delete; + PredictionContextMergeCache(PredictionContextMergeCache&&) = delete; + + PredictionContextMergeCache& operator=(const PredictionContextMergeCache&) = delete; + PredictionContextMergeCache& operator=(PredictionContextMergeCache&&) = delete; + + Ref<const PredictionContext> put(const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2, + Ref<const PredictionContext> value); + + Ref<const PredictionContext> get(const Ref<const PredictionContext> &key1, + const Ref<const PredictionContext> &key2) const; + + const PredictionContextMergeCacheOptions& getOptions() const { return _options; } + + void clear(); + + private: + using PredictionContextPair = std::pair<const PredictionContext*, const PredictionContext*>; + + struct ANTLR4CPP_PUBLIC PredictionContextHasher final { + size_t operator()(const PredictionContextPair &value) const; + }; + + struct ANTLR4CPP_PUBLIC PredictionContextComparer final { + bool operator()(const PredictionContextPair &lhs, const PredictionContextPair &rhs) const; + }; + + struct ANTLR4CPP_PUBLIC Entry final { + std::pair<Ref<const PredictionContext>, Ref<const PredictionContext>> key; + Ref<const PredictionContext> value; + Entry *prev = nullptr; + Entry *next = nullptr; + }; + + void moveToFront(Entry *entry) const; + + void pushToFront(Entry *entry); + + void remove(Entry *entry); + + void compact(const Entry *preserve); + + using Container = FlatHashMap<PredictionContextPair, std::unique_ptr<Entry>, + PredictionContextHasher, PredictionContextComparer>; + + const PredictionContextMergeCacheOptions _options; + + Container _entries; + + mutable Entry *_head = nullptr; + mutable Entry *_tail = nullptr; + + size_t _size = 0; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h new file mode 100644 index 0000000000..7331cc17e0 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextMergeCacheOptions.h @@ -0,0 +1,71 @@ +// Copyright 2012-2022 The ANTLR Project +// +// Redistribution and use in source and binary forms, with or without modification, are permitted +// provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of conditions +// and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY +// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <limits> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC PredictionContextMergeCacheOptions final { + public: + PredictionContextMergeCacheOptions() = default; + + size_t getMaxSize() const { return _maxSize; } + + bool hasMaxSize() const { return getMaxSize() != std::numeric_limits<size_t>::max(); } + + PredictionContextMergeCacheOptions& setMaxSize(size_t maxSize) { + _maxSize = maxSize; + return *this; + } + + size_t getClearEveryN() const { + return _clearEveryN; + } + + bool hasClearEveryN() const { return getClearEveryN() != 0; } + + PredictionContextMergeCacheOptions& setClearEveryN(uint64_t clearEveryN) { + _clearEveryN = clearEveryN; + return *this; + } + + PredictionContextMergeCacheOptions& neverClear() { + return setClearEveryN(0); + } + + private: + size_t _maxSize = std::numeric_limits<size_t>::max(); + uint64_t _clearEveryN = 1; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h new file mode 100644 index 0000000000..c8c4473e13 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionContextType.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + enum class PredictionContextType : size_t { + SINGLETON = 1, + ARRAY = 2, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp new file mode 100644 index 0000000000..9db0b8bdb9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.cpp @@ -0,0 +1,202 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStopState.h" +#include "atn/ATNConfigSet.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" +#include "SemanticContext.h" + +#include "PredictionMode.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +struct AltAndContextConfigHasher +{ + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ + size_t operator () (ATNConfig *o) const { + size_t hashCode = misc::MurmurHash::initialize(7); + hashCode = misc::MurmurHash::update(hashCode, o->state->stateNumber); + hashCode = misc::MurmurHash::update(hashCode, o->context); + return misc::MurmurHash::finish(hashCode, 2); + } +}; + +struct AltAndContextConfigComparer { + bool operator()(ATNConfig *a, ATNConfig *b) const + { + if (a == b) { + return true; + } + return a->state->stateNumber == b->state->stateNumber && *a->context == *b->context; + } +}; + +bool PredictionModeClass::hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs) { + /* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { + return true; + } + + bool heuristic; + + // Pure SLL mode parsing or SLL+LL if: + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL; costs more time + // since we'll often fail over anyway. + if (mode == PredictionMode::SLL || !configs->hasSemanticContext) { + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(configs); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + } else { + // dup configs, tossing out semantic predicates + ATNConfigSet dup(true); + for (auto &config : configs->configs) { + Ref<ATNConfig> c = std::make_shared<ATNConfig>(*config, SemanticContext::Empty::Instance); + dup.add(c); + } + std::vector<antlrcpp::BitSet> altsets = getConflictingAltSubsets(&dup); + heuristic = hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(&dup); + } + + return heuristic; +} + +bool PredictionModeClass::hasConfigInRuleStopState(ATNConfigSet *configs) { + for (const auto &config : configs->configs) { + if (RuleStopState::is(config->state)) { + return true; + } + } + + return false; +} + +bool PredictionModeClass::allConfigsInRuleStopStates(ATNConfigSet *configs) { + for (const auto &config : configs->configs) { + if (!RuleStopState::is(config->state)) { + return false; + } + } + + return true; +} + +size_t PredictionModeClass::resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + return getSingleViableAlt(altsets); +} + +bool PredictionModeClass::allSubsetsConflict(const std::vector<antlrcpp::BitSet>& altsets) { + return !hasNonConflictingAltSet(altsets); +} + +bool PredictionModeClass::hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() == 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::hasConflictingAltSet(const std::vector<antlrcpp::BitSet>& altsets) { + for (antlrcpp::BitSet alts : altsets) { + if (alts.count() > 1) { + return true; + } + } + return false; +} + +bool PredictionModeClass::allSubsetsEqual(const std::vector<antlrcpp::BitSet>& altsets) { + if (altsets.empty()) { + return true; + } + + const antlrcpp::BitSet& first = *altsets.begin(); + for (const antlrcpp::BitSet& alts : altsets) { + if (alts != first) { + return false; + } + } + return true; +} + +size_t PredictionModeClass::getUniqueAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all = getAlts(altsets); + if (all.count() == 1) { + return all.nextSetBit(0); + } + return ATN::INVALID_ALT_NUMBER; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet all; + for (const auto &alts : altsets) { + all |= alts; + } + + return all; +} + +antlrcpp::BitSet PredictionModeClass::getAlts(ATNConfigSet *configs) { + antlrcpp::BitSet alts; + for (const auto &config : configs->configs) { + alts.set(config->alt); + } + return alts; +} + +std::vector<antlrcpp::BitSet> PredictionModeClass::getConflictingAltSubsets(ATNConfigSet *configs) { + std::unordered_map<ATNConfig*, antlrcpp::BitSet, AltAndContextConfigHasher, AltAndContextConfigComparer> configToAlts; + for (auto &config : configs->configs) { + configToAlts[config.get()].set(config->alt); + } + std::vector<antlrcpp::BitSet> values; + values.reserve(configToAlts.size()); + for (const auto &pair : configToAlts) { + values.push_back(pair.second); + } + return values; +} + +std::unordered_map<ATNState*, antlrcpp::BitSet> PredictionModeClass::getStateToAltMap(ATNConfigSet *configs) { + std::unordered_map<ATNState*, antlrcpp::BitSet> m; + for (const auto &c : configs->configs) { + m[c->state].set(c->alt); + } + return m; +} + +bool PredictionModeClass::hasStateAssociatedWithOneAlt(ATNConfigSet *configs) { + auto x = getStateToAltMap(configs); + for (const auto &pair : x){ + if (pair.second.count() == 1) return true; + } + return false; +} + +size_t PredictionModeClass::getSingleViableAlt(const std::vector<antlrcpp::BitSet>& altsets) { + antlrcpp::BitSet viableAlts; + for (const auto &alts : altsets) { + size_t minAlt = alts.nextSetBit(0); + + viableAlts.set(minAlt); + if (viableAlts.count() > 1) // more than 1 viable alt + { + return ATN::INVALID_ALT_NUMBER; + } + } + + return viableAlts.nextSetBit(0); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h new file mode 100644 index 0000000000..4868ea2ff2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/PredictionMode.h @@ -0,0 +1,436 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "support/BitSet.h" + +namespace antlr4 { +namespace atn { + + /** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ + enum class PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + * <p> + * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + SLL, + + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + * <p> + * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for <em>exactly which</em> alternatives are + * ambiguous.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL, + + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + * <p> + * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.</p> + * + * <p> + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.</p> + */ + LL_EXACT_AMBIG_DETECTION + }; + + class ANTLR4CPP_PUBLIC PredictionModeClass { + public: + /** + * Computes the SLL prediction termination condition. + * + * <p> + * This method computes the SLL prediction termination condition for both of + * the following cases.</p> + * + * <ul> + * <li>The usual SLL+LL fallback upon SLL conflict</li> + * <li>Pure SLL without LL fallback</li> + * </ul> + * + * <p><strong>COMBINED SLL+LL PARSING</strong></p> + * + * <p>When LL-fallback is enabled upon SLL conflict, correct predictions are + * ensured regardless of how the termination condition is computed by this + * method. Due to the substantially higher cost of LL prediction, the + * prediction should only fall back to LL when the additional lookahead + * cannot lead to a unique SLL prediction.</p> + * + * <p>Assuming combined SLL+LL parsing, an SLL configuration set with only + * conflicting subsets should fall back to full LL, even if the + * configuration sets don't resolve to the same alternative (e.g. + * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + * configuration, SLL could continue with the hopes that more lookahead will + * resolve via one of those non-conflicting configurations.</p> + * + * <p>Here's the prediction termination rule them: SLL (for SLL+LL parsing) + * stops when it sees only conflicting configuration subsets. In contrast, + * full LL keeps going when there is uncertainty.</p> + * + * <p><strong>HEURISTIC</strong></p> + * + * <p>As a heuristic, we stop prediction when we see any conflicting subset + * unless we see a state that only has one alternative associated with it. + * The single-alt-state thing lets prediction continue upon rules like + * (otherwise, it would admit defeat too soon):</p> + * + * <p>{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}</p> + * + * <p>When the ATN simulation reaches the state before {@code ';'}, it has a + * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + * processing this node because alternative to has another way to continue, + * via {@code [6|2|[]]}.</p> + * + * <p>It also let's us continue for this rule:</p> + * + * <p>{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}</p> + * + * <p>After matching input A, we reach the stop state for rule A, state 1. + * State 8 is the state right before B. Clearly alternatives 1 and 2 + * conflict and no amount of further lookahead will separate the two. + * However, alternative 3 will be able to continue and so we do not stop + * working on this state. In the previous example, we're concerned with + * states associated with the conflicting alternatives. Here alt 3 is not + * associated with the conflicting configs, but since we can continue + * looking for input reasonably, don't declare the state done.</p> + * + * <p><strong>PURE SLL PARSING</strong></p> + * + * <p>To handle pure SLL parsing, all we have to do is make sure that we + * combine stack contexts for configurations that differ only by semantic + * predicate. From there, we can do the usual SLL termination heuristic.</p> + * + * <p><strong>PREDICATES IN SLL+LL PARSING</strong></p> + * + * <p>SLL decisions don't evaluate predicates until after they reach DFA stop + * states because they need to create the DFA cache that works in all + * semantic situations. In contrast, full LL evaluates predicates collected + * during start state computation so it can ignore predicates thereafter. + * This means that SLL termination detection can totally ignore semantic + * predicates.</p> + * + * <p>Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + * semantic predicate contexts so we might see two configurations like the + * following.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p})}</p> + * + * <p>Before testing these configurations against others, we have to merge + * {@code x} and {@code x'} (without modifying the existing configurations). + * For example, we test {@code (x+x')==x''} when looking for conflicts in + * the following configurations.</p> + * + * <p>{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}</p> + * + * <p>If the configuration set has predicates (as indicated by + * {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + * the configurations to strip out all of the predicates so that a standard + * {@link ATNConfigSet} will merge everything ignoring predicates.</p> + */ + static bool hasSLLConflictTerminatingPrediction(PredictionMode mode, ATNConfigSet *configs); + + /// <summary> + /// Checks if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if any configuration in {@code configs} is in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool hasConfigInRuleStopState(ATNConfigSet *configs); + + /// <summary> + /// Checks if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>. Configurations meeting this condition have + /// reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// </summary> + /// <param name="configs"> the configuration set to test </param> + /// <returns> {@code true} if all configurations in {@code configs} are in a + /// <seealso cref="RuleStopState"/>, otherwise {@code false} </returns> + static bool allConfigsInRuleStopStates(ATNConfigSet *configs); + + /** + * Full LL prediction termination. + * + * <p>Can we stop looking ahead during ATN simulation or is there some + * uncertainty as to which alternative we will ultimately pick, after + * consuming more input? Even if there are partial conflicts, we might know + * that everything is going to resolve to the same minimum alternative. That + * means we can stop since no more lookahead will change that fact. On the + * other hand, there might be multiple conflicts that resolve to different + * minimums. That means we need more look ahead to decide which of those + * alternatives we should predict.</p> + * + * <p>The basic idea is to split the set of configurations {@code C}, into + * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + * non-conflicting configurations. Two configurations conflict if they have + * identical {@link ATNConfig#state} and {@link ATNConfig#context} values + * but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + * and {@code (s, j, ctx, _)} for {@code i!=j}.</p> + * + * <p>Reduce these configuration subsets to the set of possible alternatives. + * You can compute the alternative subsets in one pass as follows:</p> + * + * <p>{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + * {@code C} holding {@code s} and {@code ctx} fixed.</p> + * + * <p>Or in pseudo-code, for each configuration {@code c} in {@code C}:</p> + * + * <pre> + * map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not + * alt and not pred + * </pre> + * + * <p>The values in {@code map} are the set of {@code A_s,ctx} sets.</p> + * + * <p>If {@code |A_s,ctx|=1} then there is no conflict associated with + * {@code s} and {@code ctx}.</p> + * + * <p>Reduce the subsets to singletons by choosing a minimum of each subset. If + * the union of these alternative subsets is a singleton, then no amount of + * more lookahead will help us. We will always pick that alternative. If, + * however, there is more than one alternative, then we are uncertain which + * alternative to predict and must continue looking for resolution. We may + * or may not discover an ambiguity in the future, even if there are no + * conflicting subsets this round.</p> + * + * <p>The biggest sin is to terminate early because it means we've made a + * decision but were uncertain as to the eventual outcome. We haven't used + * enough lookahead. On the other hand, announcing a conflict too late is no + * big deal; you will still have the conflict. It's just inefficient. It + * might even look until the end of file.</p> + * + * <p>No special consideration for semantic predicates is required because + * predicates are evaluated on-the-fly for full LL prediction, ensuring that + * no configuration contains a semantic context during the termination + * check.</p> + * + * <p><strong>CONFLICTING CONFIGS</strong></p> + * + * <p>Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + * when {@code i!=j} but {@code x=x'}. Because we merge all + * {@code (s, i, _)} configurations together, that means that there are at + * most {@code n} configurations associated with state {@code s} for + * {@code n} possible alternatives in the decision. The merged stacks + * complicate the comparison of configuration contexts {@code x} and + * {@code x'}. Sam checks to see if one is a subset of the other by calling + * merge and checking to see if the merged result is either {@code x} or + * {@code x'}. If the {@code x} associated with lowest alternative {@code i} + * is the superset, then {@code i} is the only possible prediction since the + * others resolve to {@code min(i)} as well. However, if {@code x} is + * associated with {@code j>i} then at least one stack configuration for + * {@code j} is not in conflict with alternative {@code i}. The algorithm + * should keep going, looking for more lookahead due to the uncertainty.</p> + * + * <p>For simplicity, I'm doing a equality check between {@code x} and + * {@code x'} that lets the algorithm continue to consume lookahead longer + * than necessary. The reason I like the equality is of course the + * simplicity but also because that is the test you need to detect the + * alternatives that are actually in conflict.</p> + * + * <p><strong>CONTINUE/STOP RULE</strong></p> + * + * <p>Continue if union of resolved alternative sets from non-conflicting and + * conflicting alternative subsets has more than one alternative. We are + * uncertain about which alternative to predict.</p> + * + * <p>The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + * alternatives are still in the running for the amount of input we've + * consumed at this point. The conflicting sets let us to strip away + * configurations that won't lead to more states because we resolve + * conflicts to the configuration with a minimum alternate for the + * conflicting set.</p> + * + * <p><strong>CASES</strong></p> + * + * <ul> + * + * <li>no conflicts and more than 1 alternative in set => continue</li> + * + * <li> {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + * {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + * {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1,3}} => continue + * </li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + * {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + * {@code {1}} => stop and predict 1</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + * {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {1}} = {@code {1}} => stop and predict 1, can announce + * ambiguity {@code {1,2}}</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + * {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {2}} = {@code {1,2}} => continue</li> + * + * <li>{@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + * {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + * {@code {3}} = {@code {1,3}} => continue</li> + * + * </ul> + * + * <p><strong>EXACT AMBIGUITY DETECTION</strong></p> + * + * <p>If all states report the same conflicting set of alternatives, then we + * know we have the exact ambiguity set.</p> + * + * <p><code>|A_<em>i</em>|>1</code> and + * <code>A_<em>i</em> = A_<em>j</em></code> for all <em>i</em>, <em>j</em>.</p> + * + * <p>In other words, we continue examining lookahead until all {@code A_i} + * have more than one alternative and all {@code A_i} are the same. If + * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + * because the resolved set is {@code {1}}. To determine what the real + * ambiguity is, we have to know whether the ambiguity is between one and + * two or one and three so we keep going. We can only stop prediction when + * we need exact ambiguity detection when the sets look like + * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...</p> + */ + static size_t resolvesToJustOneViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} contains more + /// than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every <seealso cref="BitSet"/> in {@code altsets} + /// has + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool allSubsetsConflict(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// exactly one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> 1, otherwise {@code false} + /// </returns> + static bool hasNonConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if any single alternative subset in {@code altsets} contains + /// more than one alternative. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if {@code altsets} contains a <seealso + /// cref="BitSet"/> with + /// <seealso cref="BitSet#cardinality cardinality"/> > 1, otherwise {@code + /// false} </returns> + static bool hasConflictingAltSet(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Determines if every alternative subset in {@code altsets} is equivalent. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> {@code true} if every member of {@code altsets} is equal to the + /// others, otherwise {@code false} </returns> + static bool allSubsetsEqual(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Returns the unique alternative predicted by all alternative subsets in + /// {@code altsets}. If no such alternative exists, this method returns + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + static size_t getUniqueAlt(const std::vector<antlrcpp::BitSet> &altsets); + + /// <summary> + /// Gets the complete set of represented alternatives for a collection of + /// alternative subsets. This method returns the union of each <seealso + /// cref="BitSet"/> + /// in {@code altsets}. + /// </summary> + /// <param name="altsets"> a collection of alternative subsets </param> + /// <returns> the set of represented alternatives in {@code altsets} </returns> + static antlrcpp::BitSet getAlts(const std::vector<antlrcpp::BitSet> &altsets); + + /** Get union of all alts from configs. @since 4.5.1 */ + static antlrcpp::BitSet getAlts(ATNConfigSet *configs); + + /// <summary> + /// This function gets the conflicting alt subsets from a configuration set. + /// For each configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c] U= c.<seealso cref="ATNConfig#alt alt"/> # map hash/equals uses s and + /// x, not + /// alt and not pred + /// </pre> + /// </summary> + static std::vector<antlrcpp::BitSet> getConflictingAltSubsets(ATNConfigSet *configs); + + /// <summary> + /// Get a map from state to alt subset from a configuration set. For each + /// configuration {@code c} in {@code configs}: + /// + /// <pre> + /// map[c.<seealso cref="ATNConfig#state state"/>] U= c.<seealso + /// cref="ATNConfig#alt alt"/> + /// </pre> + /// </summary> + static std::unordered_map<ATNState*, antlrcpp::BitSet> getStateToAltMap(ATNConfigSet *configs); + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet *configs); + + static size_t getSingleViableAlt(const std::vector<antlrcpp::BitSet> &altsets); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp new file mode 100644 index 0000000000..9fd86d67d4 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.cpp @@ -0,0 +1,179 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/PredicateEvalInfo.h" +#include "atn/LookaheadEventInfo.h" +#include "Parser.h" +#include "atn/ATNConfigSet.h" +#include "support/CPPUtils.h" + +#include "atn/ProfilingATNSimulator.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlr4::dfa; +using namespace antlrcpp; + +using namespace std::chrono; + +ProfilingATNSimulator::ProfilingATNSimulator(Parser *parser) + : ParserATNSimulator(parser, parser->getInterpreter<ParserATNSimulator>()->atn, + parser->getInterpreter<ParserATNSimulator>()->decisionToDFA, + parser->getInterpreter<ParserATNSimulator>()->getSharedContextCache()) { + for (size_t i = 0; i < atn.decisionToState.size(); i++) { + _decisions.push_back(DecisionInfo(i)); + } +} + +size_t ProfilingATNSimulator::adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) { + auto onExit = finally([this](){ + _currentDecision = 0; // Originally -1, but that makes no sense (index into a vector and init value is also 0). + }); + + _sllStopIndex = -1; + _llStopIndex = -1; + _currentDecision = decision; + high_resolution_clock::time_point start = high_resolution_clock::now(); + size_t alt = ParserATNSimulator::adaptivePredict(input, decision, outerContext); + high_resolution_clock::time_point stop = high_resolution_clock::now(); + _decisions[decision].timeInPrediction += duration_cast<nanoseconds>(stop - start).count(); + _decisions[decision].invocations++; + + long long SLL_k = _sllStopIndex - _startIndex + 1; + _decisions[decision].SLL_TotalLook += SLL_k; + _decisions[decision].SLL_MinLook = _decisions[decision].SLL_MinLook == 0 ? SLL_k : std::min(_decisions[decision].SLL_MinLook, SLL_k); + if (SLL_k > _decisions[decision].SLL_MaxLook) { + _decisions[decision].SLL_MaxLook = SLL_k; + _decisions[decision].SLL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _sllStopIndex, false); + } + + if (_llStopIndex >= 0) { + long long LL_k = _llStopIndex - _startIndex + 1; + _decisions[decision].LL_TotalLook += LL_k; + _decisions[decision].LL_MinLook = _decisions[decision].LL_MinLook == 0 ? LL_k : std::min(_decisions[decision].LL_MinLook, LL_k); + if (LL_k > _decisions[decision].LL_MaxLook) { + _decisions[decision].LL_MaxLook = LL_k; + _decisions[decision].LL_MaxLookEvent = std::make_shared<LookaheadEventInfo>(decision, nullptr, alt, input, _startIndex, _llStopIndex, true); + } + } + + return alt; +} + +DFAState* ProfilingATNSimulator::getExistingTargetState(DFAState *previousD, size_t t) { + // this method is called after each time the input position advances + // during SLL prediction + _sllStopIndex = (int)_input->index(); + + DFAState *existingTargetState = ParserATNSimulator::getExistingTargetState(previousD, t); + if (existingTargetState != nullptr) { + _decisions[_currentDecision].SLL_DFATransitions++; // count only if we transition over a DFA state + if (existingTargetState == ERROR.get()) { + _decisions[_currentDecision].errors.push_back( + ErrorInfo(_currentDecision, previousD->configs.get(), _input, _startIndex, _sllStopIndex, false) + ); + } + } + + _currentState = existingTargetState; + return existingTargetState; +} + +DFAState* ProfilingATNSimulator::computeTargetState(DFA &dfa, DFAState *previousD, size_t t) { + DFAState *state = ParserATNSimulator::computeTargetState(dfa, previousD, t); + _currentState = state; + return state; +} + +std::unique_ptr<ATNConfigSet> ProfilingATNSimulator::computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) { + if (fullCtx) { + // this method is called after each time the input position advances + // during full context prediction + _llStopIndex = (int)_input->index(); + } + + std::unique_ptr<ATNConfigSet> reachConfigs = ParserATNSimulator::computeReachSet(closure, t, fullCtx); + if (fullCtx) { + _decisions[_currentDecision].LL_ATNTransitions++; // count computation even if error + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + // TODO: does not handle delayed errors per getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule() + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _llStopIndex, true)); + } + } else { + ++_decisions[_currentDecision].SLL_ATNTransitions; + if (reachConfigs != nullptr) { + } else { // no reach on current lookahead symbol. ERROR. + _decisions[_currentDecision].errors.push_back(ErrorInfo(_currentDecision, closure, _input, _startIndex, _sllStopIndex, false)); + } + } + return reachConfigs; +} + +bool ProfilingATNSimulator::evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) { + bool result = ParserATNSimulator::evalSemanticContext(pred, parserCallStack, alt, fullCtx); + if (!(std::dynamic_pointer_cast<const SemanticContext::PrecedencePredicate>(pred) != nullptr)) { + bool fullContext = _llStopIndex >= 0; + int stopIndex = fullContext ? _llStopIndex : _sllStopIndex; + _decisions[_currentDecision].predicateEvals.push_back( + PredicateEvalInfo(_currentDecision, _input, _startIndex, stopIndex, pred, result, alt, fullCtx)); + } + + return result; +} + +void ProfilingATNSimulator::reportAttemptingFullContext(DFA &dfa, const BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (conflictingAlts.count() > 0) { + conflictingAltResolvedBySLL = conflictingAlts.nextSetBit(0); + } else { + conflictingAltResolvedBySLL = configs->getAlts().nextSetBit(0); + } + _decisions[_currentDecision].LL_Fallback++; + ParserATNSimulator::reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportContextSensitivity(DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) { + if (prediction != conflictingAltResolvedBySLL) { + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + ParserATNSimulator::reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex); +} + +void ProfilingATNSimulator::reportAmbiguity(DFA &dfa, DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const BitSet &ambigAlts, ATNConfigSet *configs) { + size_t prediction; + if (ambigAlts.count() > 0) { + prediction = ambigAlts.nextSetBit(0); + } else { + prediction = configs->getAlts().nextSetBit(0); + } + if (configs->fullCtx && prediction != conflictingAltResolvedBySLL) { + // Even though this is an ambiguity we are reporting, we can + // still detect some context sensitivities. Both SLL and LL + // are showing a conflict, hence an ambiguity, but if they resolve + // to different minimum alternatives we have also identified a + // context sensitivity. + _decisions[_currentDecision].contextSensitivities.push_back( + ContextSensitivityInfo(_currentDecision, configs, _input, startIndex, stopIndex) + ); + } + _decisions[_currentDecision].ambiguities.push_back( + AmbiguityInfo(_currentDecision, configs, ambigAlts, _input, startIndex, stopIndex, configs->fullCtx) + ); + ParserATNSimulator::reportAmbiguity(dfa, D, startIndex, stopIndex, exact, ambigAlts, configs); +} + +std::vector<DecisionInfo> ProfilingATNSimulator::getDecisionInfo() const { + return _decisions; +} + +DFAState* ProfilingATNSimulator::getCurrentState() const { + return _currentState; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h new file mode 100644 index 0000000000..551efb8556 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/ProfilingATNSimulator.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ParserATNSimulator.h" +#include "atn/DecisionInfo.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC ProfilingATNSimulator : public ParserATNSimulator { + public: + explicit ProfilingATNSimulator(Parser *parser); + + virtual size_t adaptivePredict(TokenStream *input, size_t decision, ParserRuleContext *outerContext) override; + + virtual std::vector<DecisionInfo> getDecisionInfo() const; + virtual dfa::DFAState* getCurrentState() const; + + protected: + std::vector<DecisionInfo> _decisions; + + int _sllStopIndex = 0; + int _llStopIndex = 0; + + size_t _currentDecision = 0; + dfa::DFAState *_currentState; + + /// <summary> + /// At the point of LL failover, we record how SLL would resolve the conflict so that + /// we can determine whether or not a decision / input pair is context-sensitive. + /// If LL gives a different result than SLL's predicted alternative, we have a + /// context sensitivity for sure. The converse is not necessarily true, however. + /// It's possible that after conflict resolution chooses minimum alternatives, + /// SLL could get the same answer as LL. Regardless of whether or not the result indicates + /// an ambiguity, it is not treated as a context sensitivity because LL prediction + /// was not required in order to produce a correct prediction for this decision and input sequence. + /// It may in fact still be a context sensitivity but we don't know by looking at the + /// minimum alternatives for the current input. + /// </summary> + size_t conflictingAltResolvedBySLL = 0; + + virtual dfa::DFAState* getExistingTargetState(dfa::DFAState *previousD, size_t t) override; + virtual dfa::DFAState* computeTargetState(dfa::DFA &dfa, dfa::DFAState *previousD, size_t t) override; + virtual std::unique_ptr<ATNConfigSet> computeReachSet(ATNConfigSet *closure, size_t t, bool fullCtx) override; + virtual bool evalSemanticContext(Ref<const SemanticContext> const& pred, ParserRuleContext *parserCallStack, + size_t alt, bool fullCtx) override; + virtual void reportAttemptingFullContext(dfa::DFA &dfa, const antlrcpp::BitSet &conflictingAlts, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportContextSensitivity(dfa::DFA &dfa, size_t prediction, ATNConfigSet *configs, + size_t startIndex, size_t stopIndex) override; + virtual void reportAmbiguity(dfa::DFA &dfa, dfa::DFAState *D, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, ATNConfigSet *configs) override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp new file mode 100644 index 0000000000..342e550de9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.cpp @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/IntervalSet.h" + +#include "atn/RangeTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +RangeTransition::RangeTransition(ATNState *target, size_t from, size_t to) : Transition(TransitionType::RANGE, target), from(from), to(to) { +} + +misc::IntervalSet RangeTransition::label() const { + return misc::IntervalSet::of((int)from, (int)to); +} + +bool RangeTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return symbol >= from && symbol <= to; +} + +std::string RangeTransition::toString() const { + return "RANGE " + Transition::toString() + " { from: " + std::to_string(from) + ", to: " + std::to_string(to) + " }"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h new file mode 100644 index 0000000000..b75c60e247 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RangeTransition.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RangeTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::RANGE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const size_t from; + const size_t to; + + RangeTransition(ATNState *target, size_t from, size_t to); + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h new file mode 100644 index 0000000000..549491514b --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStartState.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleStartState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::RULE_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + RuleStopState *stopState = nullptr; + bool isLeftRecursiveRule = false; + + RuleStartState() : ATNState(ATNStateType::RULE_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h new file mode 100644 index 0000000000..7792a1265c --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleStopState.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + /// The last node in the ATN for a rule, unless that rule is the start symbol. + /// In that case, there is one transition to EOF. Later, we might encode + /// references to all calls to this rule to compute FOLLOW sets for + /// error handling. + class ANTLR4CPP_PUBLIC RuleStopState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::RULE_STOP; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + RuleStopState() : ATNState(ATNStateType::RULE_STOP) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp new file mode 100644 index 0000000000..ba50dd03dd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/RuleStartState.h" +#include "atn/RuleTransition.h" + +using namespace antlr4::atn; + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState) + : RuleTransition(ruleStart, ruleIndex, 0, followState) { +} + +RuleTransition::RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState) + : Transition(TransitionType::RULE, ruleStart), ruleIndex(ruleIndex), precedence(precedence) { + this->followState = followState; +} + +bool RuleTransition::isEpsilon() const { + return true; +} + +bool RuleTransition::matches(size_t /*symbol*/, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return false; +} + +std::string RuleTransition::toString() const { + std::stringstream ss; + ss << "RULE " << Transition::toString() << " { ruleIndex: " << ruleIndex << ", precedence: " << precedence << + ", followState: " << std::hex << followState << " }"; + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h new file mode 100644 index 0000000000..396ef700f2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/RuleTransition.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC RuleTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::RULE; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + /// Ptr to the rule definition object for this rule ref. + const size_t ruleIndex; // no Rule object at runtime + + const int precedence; + + /// What node to begin computations following ref to rule. + ATNState *followState; + + /// @deprecated Use + /// <seealso cref="#RuleTransition(RuleStartState, size_t, int, ATNState)"/> instead. + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, ATNState *followState); + + RuleTransition(RuleStartState *ruleStart, size_t ruleIndex, int precedence, ATNState *followState); + RuleTransition(RuleTransition const&) = delete; + RuleTransition& operator=(RuleTransition const&) = delete; + + virtual bool isEpsilon() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp new file mode 100644 index 0000000000..7d7fe068df --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.cpp @@ -0,0 +1,418 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <functional> +#include <unordered_set> + +#include "misc/MurmurHash.h" +#include "support/Casts.h" +#include "support/CPPUtils.h" +#include "support/Arrays.h" + +#include "SemanticContext.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + struct SemanticContextHasher final { + size_t operator()(const SemanticContext *semanticContext) const { + return semanticContext->hashCode(); + } + }; + + struct SemanticContextComparer final { + bool operator()(const SemanticContext *lhs, const SemanticContext *rhs) const { + return *lhs == *rhs; + } + }; + + template <typename Comparer> + void insertSemanticContext(const Ref<const SemanticContext> &semanticContext, + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> &operandSet, + std::vector<Ref<const SemanticContext>> &operandList, + Ref<const SemanticContext::PrecedencePredicate> &precedencePredicate, + Comparer comparer) { + if (semanticContext != nullptr) { + if (semanticContext->getContextType() == SemanticContextType::PRECEDENCE) { + if (precedencePredicate == nullptr || comparer(downCast<const SemanticContext::PrecedencePredicate*>(semanticContext.get())->precedence, precedencePredicate->precedence)) { + precedencePredicate = std::static_pointer_cast<const SemanticContext::PrecedencePredicate>(semanticContext); + } + } else { + auto [existing, inserted] = operandSet.insert(semanticContext.get()); + if (inserted) { + operandList.push_back(semanticContext); + } + } + } + } + + template <typename Comparer> + void insertSemanticContext(Ref<const SemanticContext> &&semanticContext, + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> &operandSet, + std::vector<Ref<const SemanticContext>> &operandList, + Ref<const SemanticContext::PrecedencePredicate> &precedencePredicate, + Comparer comparer) { + if (semanticContext != nullptr) { + if (semanticContext->getContextType() == SemanticContextType::PRECEDENCE) { + if (precedencePredicate == nullptr || comparer(downCast<const SemanticContext::PrecedencePredicate*>(semanticContext.get())->precedence, precedencePredicate->precedence)) { + precedencePredicate = std::static_pointer_cast<const SemanticContext::PrecedencePredicate>(std::move(semanticContext)); + } + } else { + auto [existing, inserted] = operandSet.insert(semanticContext.get()); + if (inserted) { + operandList.push_back(std::move(semanticContext)); + } + } + } + } + + size_t predictOperandCapacity(const Ref<const SemanticContext> &x) { + switch (x->getContextType()) { + case SemanticContextType::AND: + return downCast<const SemanticContext::AND&>(*x).getOperands().size(); + case SemanticContextType::OR: + return downCast<const SemanticContext::OR&>(*x).getOperands().size(); + default: + return 1; + } + } + + size_t predictOperandCapacity(const Ref<const SemanticContext> &a, const Ref<const SemanticContext> &b) { + return predictOperandCapacity(a) + predictOperandCapacity(b); + } + +} + +//------------------ Predicate ----------------------------------------------------------------------------------------- + +SemanticContext::Predicate::Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent) + : SemanticContext(SemanticContextType::PREDICATE), ruleIndex(ruleIndex), predIndex(predIndex), isCtxDependent(isCtxDependent) {} + +bool SemanticContext::Predicate::eval(Recognizer *parser, RuleContext *parserCallStack) const { + RuleContext *localctx = nullptr; + if (isCtxDependent) { + localctx = parserCallStack; + } + return parser->sempred(localctx, ruleIndex, predIndex); +} + +size_t SemanticContext::Predicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(getContextType())); + hashCode = misc::MurmurHash::update(hashCode, ruleIndex); + hashCode = misc::MurmurHash::update(hashCode, predIndex); + hashCode = misc::MurmurHash::update(hashCode, isCtxDependent ? 1 : 0); + hashCode = misc::MurmurHash::finish(hashCode, 4); + return hashCode; +} + +bool SemanticContext::Predicate::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const Predicate &p = downCast<const Predicate&>(other); + return ruleIndex == p.ruleIndex && predIndex == p.predIndex && isCtxDependent == p.isCtxDependent; +} + +std::string SemanticContext::Predicate::toString() const { + return std::string("{") + std::to_string(ruleIndex) + std::string(":") + std::to_string(predIndex) + std::string("}?"); +} + +//------------------ PrecedencePredicate ------------------------------------------------------------------------------- + +SemanticContext::PrecedencePredicate::PrecedencePredicate(int precedence) : SemanticContext(SemanticContextType::PRECEDENCE), precedence(precedence) {} + +bool SemanticContext::PrecedencePredicate::eval(Recognizer *parser, RuleContext *parserCallStack) const { + return parser->precpred(parserCallStack, precedence); +} + +Ref<const SemanticContext> SemanticContext::PrecedencePredicate::evalPrecedence(Recognizer *parser, + RuleContext *parserCallStack) const { + if (parser->precpred(parserCallStack, precedence)) { + return SemanticContext::Empty::Instance; + } + return nullptr; +} + +size_t SemanticContext::PrecedencePredicate::hashCode() const { + size_t hashCode = misc::MurmurHash::initialize(); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(getContextType())); + hashCode = misc::MurmurHash::update(hashCode, static_cast<size_t>(precedence)); + return misc::MurmurHash::finish(hashCode, 2); +} + +bool SemanticContext::PrecedencePredicate::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const PrecedencePredicate &predicate = downCast<const PrecedencePredicate&>(other); + return precedence == predicate.precedence; +} + +std::string SemanticContext::PrecedencePredicate::toString() const { + return "{" + std::to_string(precedence) + ">=prec}?"; +} + +//------------------ AND ----------------------------------------------------------------------------------------------- + +SemanticContext::AND::AND(Ref<const SemanticContext> a, Ref<const SemanticContext> b) : Operator(SemanticContextType::AND) { + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> operands; + Ref<const SemanticContext::PrecedencePredicate> precedencePredicate; + + _opnds.reserve(predictOperandCapacity(a, b) + 1); + + if (a->getContextType() == SemanticContextType::AND) { + for (const auto &operand : downCast<const AND*>(a.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::less<int>{}); + } + } else { + insertSemanticContext(std::move(a), operands, _opnds, precedencePredicate, std::less<int>{}); + } + + if (b->getContextType() == SemanticContextType::AND) { + for (const auto &operand : downCast<const AND*>(b.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::less<int>{}); + } + } else { + insertSemanticContext(std::move(b), operands, _opnds, precedencePredicate, std::less<int>{}); + } + + if (precedencePredicate != nullptr) { + // interested in the transition with the lowest precedence + auto [existing, inserted] = operands.insert(precedencePredicate.get()); + if (inserted) { + _opnds.push_back(std::move(precedencePredicate)); + } + } +} + +const std::vector<Ref<const SemanticContext>>& SemanticContext::AND::getOperands() const { + return _opnds; +} + +bool SemanticContext::AND::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const AND &context = downCast<const AND&>(other); + return Arrays::equals(getOperands(), context.getOperands()); +} + +size_t SemanticContext::AND::hashCode() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + return misc::MurmurHash::hashCode(getOperands(), hash); +} + +bool SemanticContext::AND::eval(Recognizer *parser, RuleContext *parserCallStack) const { + for (const auto &opnd : getOperands()) { + if (!opnd->eval(parser, parserCallStack)) { + return false; + } + } + return true; +} + +Ref<const SemanticContext> SemanticContext::AND::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const { + bool differs = false; + std::vector<Ref<const SemanticContext>> operands; + for (const auto &context : getOperands()) { + auto evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == nullptr) { + // The AND context is false if any element is false. + return nullptr; + } + if (evaluated != Empty::Instance) { + // Reduce the result by skipping true elements. + operands.push_back(std::move(evaluated)); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // All elements were true, so the AND context is true. + return Empty::Instance; + } + + Ref<const SemanticContext> result = std::move(operands[0]); + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::And(std::move(result), std::move(operands[i])); + } + + return result; +} + +std::string SemanticContext::AND::toString() const { + std::string tmp; + for (const auto &var : getOperands()) { + tmp += var->toString() + " && "; + } + return tmp; +} + +//------------------ OR ------------------------------------------------------------------------------------------------ + +SemanticContext::OR::OR(Ref<const SemanticContext> a, Ref<const SemanticContext> b) : Operator(SemanticContextType::OR) { + std::unordered_set<const SemanticContext*, SemanticContextHasher, SemanticContextComparer> operands; + Ref<const SemanticContext::PrecedencePredicate> precedencePredicate; + + _opnds.reserve(predictOperandCapacity(a, b) + 1); + + if (a->getContextType() == SemanticContextType::OR) { + for (const auto &operand : downCast<const OR*>(a.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::greater<int>{}); + } + } else { + insertSemanticContext(std::move(a), operands, _opnds, precedencePredicate, std::greater<int>{}); + } + + if (b->getContextType() == SemanticContextType::OR) { + for (const auto &operand : downCast<const OR*>(b.get())->getOperands()) { + insertSemanticContext(operand, operands, _opnds, precedencePredicate, std::greater<int>{}); + } + } else { + insertSemanticContext(std::move(b), operands, _opnds, precedencePredicate, std::greater<int>{}); + } + + if (precedencePredicate != nullptr) { + // interested in the transition with the highest precedence + auto [existing, inserted] = operands.insert(precedencePredicate.get()); + if (inserted) { + _opnds.push_back(std::move(precedencePredicate)); + } + } +} + +const std::vector<Ref<const SemanticContext>>& SemanticContext::OR::getOperands() const { + return _opnds; +} + +bool SemanticContext::OR::equals(const SemanticContext &other) const { + if (this == &other) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const OR &context = downCast<const OR&>(other); + return Arrays::equals(getOperands(), context.getOperands()); +} + +size_t SemanticContext::OR::hashCode() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + return misc::MurmurHash::hashCode(getOperands(), hash); +} + +bool SemanticContext::OR::eval(Recognizer *parser, RuleContext *parserCallStack) const { + for (const auto &opnd : getOperands()) { + if (opnd->eval(parser, parserCallStack)) { + return true; + } + } + return false; +} + +Ref<const SemanticContext> SemanticContext::OR::evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const { + bool differs = false; + std::vector<Ref<const SemanticContext>> operands; + for (const auto &context : getOperands()) { + auto evaluated = context->evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == Empty::Instance) { + // The OR context is true if any element is true. + return Empty::Instance; + } + if (evaluated != nullptr) { + // Reduce the result by skipping false elements. + operands.push_back(std::move(evaluated)); + } + } + + if (!differs) { + return shared_from_this(); + } + + if (operands.empty()) { + // AllĀ elements were false, so the OR context is false. + return nullptr; + } + + Ref<const SemanticContext> result = std::move(operands[0]); + for (size_t i = 1; i < operands.size(); ++i) { + result = SemanticContext::Or(std::move(result), std::move(operands[i])); + } + + return result; +} + +std::string SemanticContext::OR::toString() const { + std::string tmp; + for(const auto &var : getOperands()) { + tmp += var->toString() + " || "; + } + return tmp; +} + +//------------------ SemanticContext ----------------------------------------------------------------------------------- + +const Ref<const SemanticContext> SemanticContext::Empty::Instance = std::make_shared<Predicate>(INVALID_INDEX, INVALID_INDEX, false); + +Ref<const SemanticContext> SemanticContext::evalPrecedence(Recognizer * /*parser*/, RuleContext * /*parserCallStack*/) const { + return shared_from_this(); +} + +Ref<const SemanticContext> SemanticContext::And(Ref<const SemanticContext> a, Ref<const SemanticContext> b) { + if (!a || a == Empty::Instance) { + return b; + } + + if (!b || b == Empty::Instance) { + return a; + } + + Ref<AND> result = std::make_shared<AND>(std::move(a), std::move(b)); + if (result->getOperands().size() == 1) { + return result->getOperands()[0]; + } + + return result; +} + +Ref<const SemanticContext> SemanticContext::Or(Ref<const SemanticContext> a, Ref<const SemanticContext> b) { + if (!a) { + return b; + } + if (!b) { + return a; + } + + if (a == Empty::Instance || b == Empty::Instance) { + return Empty::Instance; + } + + Ref<OR> result = std::make_shared<OR>(std::move(a), std::move(b)); + if (result->getOperands().size() == 1) { + return result->getOperands()[0]; + } + + return result; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h new file mode 100644 index 0000000000..8116fc0b56 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContext.h @@ -0,0 +1,237 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Recognizer.h" +#include "support/CPPUtils.h" +#include "atn/SemanticContextType.h" + +namespace antlr4 { +namespace atn { + + /// A tree structure used to record the semantic context in which + /// an ATN configuration is valid. It's either a single predicate, + /// a conjunction "p1 && p2", or a sum of products "p1||p2". + /// + /// I have scoped the AND, OR, and Predicate subclasses of + /// SemanticContext within the scope of this outer class. + class ANTLR4CPP_PUBLIC SemanticContext : public std::enable_shared_from_this<SemanticContext> { + public: + virtual ~SemanticContext() = default; + + SemanticContextType getContextType() const { return _contextType; } + + /// <summary> + /// For context independent predicates, we evaluate them without a local + /// context (i.e., null context). That way, we can evaluate them without + /// having to create proper rule-specific context during prediction (as + /// opposed to the parser, which creates them naturally). In a practical + /// sense, this avoids a cast exception from RuleContext to myruleContext. + /// <p/> + /// For context dependent predicates, we must pass in a local context so that + /// references such as $arg evaluate properly as _localctx.arg. We only + /// capture context dependent predicates in the context in which we begin + /// prediction, so we passed in the outer context here in case of context + /// dependent predicate evaluation. + /// </summary> + virtual bool eval(Recognizer *parser, RuleContext *parserCallStack) const = 0; + + /** + * Evaluate the precedence predicates for the context and reduce the result. + * + * @param parser The parser instance. + * @param parserCallStack + * @return The simplified semantic context after precedence predicates are + * evaluated, which will be one of the following values. + * <ul> + * <li>{@link #NONE}: if the predicate simplifies to {@code true} after + * precedence predicates are evaluated.</li> + * <li>{@code null}: if the predicate simplifies to {@code false} after + * precedence predicates are evaluated.</li> + * <li>{@code this}: if the semantic context is not changed as a result of + * precedence predicate evaluation.</li> + * <li>A non-{@code null} {@link SemanticContext}: the new simplified + * semantic context after precedence predicates are evaluated.</li> + * </ul> + */ + virtual Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const; + + virtual size_t hashCode() const = 0; + + virtual bool equals(const SemanticContext &other) const = 0; + + virtual std::string toString() const = 0; + + static Ref<const SemanticContext> And(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + /// See also: ParserATNSimulator::getPredsForAmbigAlts. + static Ref<const SemanticContext> Or(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + class Empty; + class Predicate; + class PrecedencePredicate; + class Operator; + class AND; + class OR; + + protected: + explicit SemanticContext(SemanticContextType contextType) : _contextType(contextType) {} + + private: + const SemanticContextType _contextType; + }; + + inline bool operator==(const SemanticContext &lhs, const SemanticContext &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const SemanticContext &lhs, const SemanticContext &rhs) { + return !operator==(lhs, rhs); + } + + class ANTLR4CPP_PUBLIC SemanticContext::Empty : public SemanticContext{ + public: + /** + * The default {@link SemanticContext}, which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ + static const Ref<const SemanticContext> Instance; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::Predicate final : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::PREDICATE; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + const size_t ruleIndex; + const size_t predIndex; + const bool isCtxDependent; // e.g., $i ref in pred + + Predicate(size_t ruleIndex, size_t predIndex, bool isCtxDependent); + + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + }; + + class ANTLR4CPP_PUBLIC SemanticContext::PrecedencePredicate final : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::PRECEDENCE; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + const int precedence; + + explicit PrecedencePredicate(int precedence); + + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + }; + + /** + * This is the base class for semantic context "operators", which operate on + * a collection of semantic context "operands". + * + * @since 4.3 + */ + class ANTLR4CPP_PUBLIC SemanticContext::Operator : public SemanticContext { + public: + static bool is(const SemanticContext &semanticContext) { + const auto contextType = semanticContext.getContextType(); + return contextType == SemanticContextType::AND || contextType == SemanticContextType::OR; + } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + /** + * Gets the operands for the semantic context operator. + * + * @return a collection of {@link SemanticContext} operands for the + * operator. + * + * @since 4.3 + */ + + virtual const std::vector<Ref<const SemanticContext>>& getOperands() const = 0; + + protected: + using SemanticContext::SemanticContext; + }; + + /** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ + class ANTLR4CPP_PUBLIC SemanticContext::AND final : public SemanticContext::Operator { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::AND; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + AND(Ref<const SemanticContext> a, Ref<const SemanticContext> b) ; + + const std::vector<Ref<const SemanticContext>>& getOperands() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered.</p> + */ + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + + private: + std::vector<Ref<const SemanticContext>> _opnds; + }; + + /** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ + class ANTLR4CPP_PUBLIC SemanticContext::OR final : public SemanticContext::Operator { + public: + static bool is(const SemanticContext &semanticContext) { return semanticContext.getContextType() == SemanticContextType::OR; } + + static bool is(const SemanticContext *semanticContext) { return semanticContext != nullptr && is(*semanticContext); } + + OR(Ref<const SemanticContext> a, Ref<const SemanticContext> b); + + const std::vector<Ref<const SemanticContext>>& getOperands() const override; + + /** + * The evaluation of predicates by this context is short-circuiting, but + * unordered. + */ + bool eval(Recognizer *parser, RuleContext *parserCallStack) const override; + Ref<const SemanticContext> evalPrecedence(Recognizer *parser, RuleContext *parserCallStack) const override; + size_t hashCode() const override; + bool equals(const SemanticContext &other) const override; + std::string toString() const override; + + private: + std::vector<Ref<const SemanticContext>> _opnds; + }; + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::SemanticContext> { + size_t operator()(const ::antlr4::atn::SemanticContext &semanticContext) const { + return semanticContext.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h new file mode 100644 index 0000000000..bca6e421d2 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SemanticContextType.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + enum class SemanticContextType : size_t { + PREDICATE = 1, + PRECEDENCE = 2, + AND = 3, + OR = 4, + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h new file mode 100644 index 0000000000..a723589bc3 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SerializedATNView.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <vector> + +#include "antlr4-common.h" +#include "misc/MurmurHash.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SerializedATNView final { + public: + using value_type = int32_t; + using size_type = size_t; + using difference_type = ptrdiff_t; + using reference = int32_t&; + using const_reference = const int32_t&; + using pointer = int32_t*; + using const_pointer = const int32_t*; + using iterator = const_pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + SerializedATNView() = default; + + SerializedATNView(const_pointer data, size_type size) : _data(data), _size(size) {} + + SerializedATNView(const std::vector<int32_t> &serializedATN) : _data(serializedATN.data()), _size(serializedATN.size()) {} + + SerializedATNView(const SerializedATNView&) = default; + + SerializedATNView& operator=(const SerializedATNView&) = default; + + const_iterator begin() const { return data(); } + + const_iterator cbegin() const { return data(); } + + const_iterator end() const { return data() + size(); } + + const_iterator cend() const { return data() + size(); } + + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + bool empty() const { return size() == 0; } + + const_pointer data() const { return _data; } + + size_type size() const { return _size; } + + size_type size_bytes() const { return size() * sizeof(value_type); } + + const_reference operator[](size_type index) const { return _data[index]; } + + private: + const_pointer _data = nullptr; + size_type _size = 0; + }; + + inline bool operator==(const SerializedATNView &lhs, const SerializedATNView &rhs) { + return (lhs.data() == rhs.data() && lhs.size() == rhs.size()) || + (lhs.size() == rhs.size() && std::memcmp(lhs.data(), rhs.data(), lhs.size_bytes()) == 0); + } + + inline bool operator!=(const SerializedATNView &lhs, const SerializedATNView &rhs) { + return !operator==(lhs, rhs); + } + + inline bool operator<(const SerializedATNView &lhs, const SerializedATNView &rhs) { + int diff = std::memcmp(lhs.data(), rhs.data(), std::min(lhs.size_bytes(), rhs.size_bytes())); + return diff < 0 || (diff == 0 && lhs.size() < rhs.size()); + } + +} // namespace atn +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::atn::SerializedATNView> { + size_t operator()(const ::antlr4::atn::SerializedATNView &serializedATNView) const { + return ::antlr4::misc::MurmurHash::hashCode(serializedATNView.data(), serializedATNView.size()); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp new file mode 100644 index 0000000000..95ec514edb --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.cpp @@ -0,0 +1,28 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Token.h" +#include "misc/IntervalSet.h" + +#include "atn/SetTransition.h" + +using namespace antlr4; +using namespace antlr4::atn; + +SetTransition::SetTransition(TransitionType transitionType, ATNState *target, misc::IntervalSet aSet) + : Transition(transitionType, target), set(aSet.isEmpty() ? misc::IntervalSet::of(Token::INVALID_TYPE) : std::move(aSet)) { +} + +misc::IntervalSet SetTransition::label() const { + return set; +} + +bool SetTransition::matches(size_t symbol, size_t /*minVocabSymbol*/, size_t /*maxVocabSymbol*/) const { + return set.contains(symbol); +} + +std::string SetTransition::toString() const { + return "SET " + Transition::toString() + " { set: " + set.toString() + "}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h new file mode 100644 index 0000000000..3a3343ec25 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SetTransition.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// A transition containing a set of values. </summary> + class ANTLR4CPP_PUBLIC SetTransition : public Transition { + public: + static bool is(const Transition &transition) { + const auto transitionType = transition.getTransitionType(); + return transitionType == TransitionType::SET || transitionType == TransitionType::NOT_SET; + } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + const misc::IntervalSet set; + + SetTransition(ATNState *target, misc::IntervalSet set) : SetTransition(TransitionType::SET, target, std::move(set)) {} + + virtual misc::IntervalSet label() const override; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + + protected: + SetTransition(TransitionType transitionType, ATNState *target, misc::IntervalSet set); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp new file mode 100644 index 0000000000..66a91936e9 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.cpp @@ -0,0 +1,86 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/SingletonPredictionContext.h" + +#include "support/Casts.h" +#include "misc/MurmurHash.h" + +using namespace antlr4::atn; +using namespace antlrcpp; + +namespace { + + bool cachedHashCodeEqual(size_t lhs, size_t rhs) { + return lhs == rhs || lhs == 0 || rhs == 0; + } + +} + +SingletonPredictionContext::SingletonPredictionContext(Ref<const PredictionContext> parent, size_t returnState) + : PredictionContext(PredictionContextType::SINGLETON), parent(std::move(parent)), returnState(returnState) { + assert(returnState != ATNState::INVALID_STATE_NUMBER); +} + +Ref<const SingletonPredictionContext> SingletonPredictionContext::create(Ref<const PredictionContext> parent, size_t returnState) { + if (returnState == EMPTY_RETURN_STATE && parent == nullptr) { + // someone can pass in the bits of an array ctx that mean $ + return std::dynamic_pointer_cast<const SingletonPredictionContext>(EMPTY); + } + return std::make_shared<SingletonPredictionContext>(std::move(parent), returnState); +} + +bool SingletonPredictionContext::isEmpty() const { + return parent == nullptr && returnState == EMPTY_RETURN_STATE; +} + +size_t SingletonPredictionContext::size() const { + return 1; +} + +const Ref<const PredictionContext>& SingletonPredictionContext::getParent(size_t index) const { + assert(index == 0); + static_cast<void>(index); + return parent; +} + +size_t SingletonPredictionContext::getReturnState(size_t index) const { + assert(index == 0); + static_cast<void>(index); + return returnState; +} + +size_t SingletonPredictionContext::hashCodeImpl() const { + size_t hash = misc::MurmurHash::initialize(); + hash = misc::MurmurHash::update(hash, static_cast<size_t>(getContextType())); + hash = misc::MurmurHash::update(hash, parent); + hash = misc::MurmurHash::update(hash, returnState); + return misc::MurmurHash::finish(hash, 3); +} + +bool SingletonPredictionContext::equals(const PredictionContext &other) const { + if (this == std::addressof(other)) { + return true; + } + if (getContextType() != other.getContextType()) { + return false; + } + const auto &singleton = downCast<const SingletonPredictionContext&>(other); + return returnState == singleton.returnState && + cachedHashCodeEqual(cachedHashCode(), singleton.cachedHashCode()) && + (parent == singleton.parent || (parent != nullptr && singleton.parent != nullptr && *parent == *singleton.parent)); +} + +std::string SingletonPredictionContext::toString() const { + //std::string up = !parent.expired() ? parent.lock()->toString() : ""; + std::string up = parent != nullptr ? parent->toString() : ""; + if (up.length() == 0) { + if (returnState == EMPTY_RETURN_STATE) { + return "$"; + } + return std::to_string(returnState); + } + return std::to_string(returnState) + " " + up; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h new file mode 100644 index 0000000000..1784c4f045 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/SingletonPredictionContext.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/PredictionContext.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC SingletonPredictionContext final : public PredictionContext { + public: + static bool is(const PredictionContext &predictionContext) { return predictionContext.getContextType() == PredictionContextType::SINGLETON; } + + static bool is(const PredictionContext *predictionContext) { return predictionContext != nullptr && is(*predictionContext); } + + static Ref<const SingletonPredictionContext> create(Ref<const PredictionContext> parent, size_t returnState); + + // Usually a parent is linked via a weak ptr. Not so here as we have kinda reverse reference chain. + // There are no child contexts stored here and often the parent context is left dangling when it's + // owning ATNState is released. In order to avoid having this context released as well (leaving all other contexts + // which got this one as parent with a null reference) we use a shared_ptr here instead, to keep those left alone + // parent contexts alive. + const Ref<const PredictionContext> parent; + const size_t returnState; + + SingletonPredictionContext(Ref<const PredictionContext> parent, size_t returnState); + + bool isEmpty() const override; + size_t size() const override; + const Ref<const PredictionContext>& getParent(size_t index) const override; + size_t getReturnState(size_t index) const override; + bool equals(const PredictionContext &other) const override; + std::string toString() const override; + + protected: + size_t hashCodeImpl() const override; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h new file mode 100644 index 0000000000..17fd43fde8 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarBlockStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/BlockStartState.h" + +namespace antlr4 { +namespace atn { + + /// The block that begins a closure loop. + class ANTLR4CPP_PUBLIC StarBlockStartState final : public BlockStartState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_BLOCK_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + StarBlockStartState() : BlockStartState(ATNStateType::STAR_BLOCK_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h new file mode 100644 index 0000000000..a62eb812b1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopEntryState.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopEntryState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_LOOP_ENTRY; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + /** + * Indicates whether this state can benefit from a precedence DFA during SLL + * decision making. + * + * <p>This is a computed property that is calculated during ATN deserialization + * and stored for use in {@link ParserATNSimulator} and + * {@link ParserInterpreter}.</p> + * + * @see DFA#isPrecedenceDfa() + */ + bool isPrecedenceDecision = false; + + StarLoopbackState *loopBackState = nullptr; + + StarLoopEntryState() : DecisionState(ATNStateType::STAR_LOOP_ENTRY) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp new file mode 100644 index 0000000000..6dddbc0d4e --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.cpp @@ -0,0 +1,19 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/StarLoopEntryState.h" +#include "atn/Transition.h" +#include "support/Casts.h" + +#include "atn/StarLoopbackState.h" + +using namespace antlr4::atn; + +StarLoopEntryState *StarLoopbackState::getLoopEntryState() const { + if (transitions[0]->target != nullptr && transitions[0]->target->getStateType() == ATNStateType::STAR_LOOP_ENTRY) { + return antlrcpp::downCast<StarLoopEntryState*>(transitions[0]->target); + } + return nullptr; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h new file mode 100644 index 0000000000..04ef9db095 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/StarLoopbackState.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/ATNState.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC StarLoopbackState final : public ATNState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::STAR_LOOP_BACK; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + StarLoopbackState() : ATNState(ATNStateType::STAR_LOOP_BACK) {} + + StarLoopEntryState *getLoopEntryState() const; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h b/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h new file mode 100644 index 0000000000..8e41636283 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TokensStartState.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/DecisionState.h" + +namespace antlr4 { +namespace atn { + + /// The Tokens rule start state linking to each lexer rule start state. + class ANTLR4CPP_PUBLIC TokensStartState final : public DecisionState { + public: + static bool is(const ATNState &atnState) { return atnState.getStateType() == ATNStateType::TOKEN_START; } + + static bool is(const ATNState *atnState) { return atnState != nullptr && is(*atnState); } + + TokensStartState() : DecisionState(ATNStateType::TOKEN_START) {} + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp new file mode 100644 index 0000000000..b918cddfcf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "support/Arrays.h" + +#include "atn/Transition.h" + +using namespace antlr4; +using namespace antlr4::atn; +using namespace antlrcpp; + +Transition::Transition(TransitionType transitionType, ATNState *target) : _transitionType(transitionType) { + if (target == nullptr) { + throw NullPointerException("target cannot be null."); + } + + this->target = target; +} + +bool Transition::isEpsilon() const { + return false; +} + +misc::IntervalSet Transition::label() const { + return misc::IntervalSet::EMPTY_SET; +} + +std::string Transition::toString() const { + std::stringstream ss; + ss << "(Transition " << std::hex << this << ", target: " << std::hex << target << ')'; + + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h new file mode 100644 index 0000000000..4c88d698ae --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/Transition.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/IntervalSet.h" +#include "atn/TransitionType.h" + +namespace antlr4 { +namespace atn { + + /// <summary> + /// An ATN transition between any two ATN states. Subclasses define + /// atom, set, epsilon, action, predicate, rule transitions. + /// <p/> + /// This is a one way link. It emanates from a state (usually via a list of + /// transitions) and has a target state. + /// <p/> + /// Since we never have to change the ATN transitions once we construct it, + /// we can fix these transitions as specific classes. The DFA transitions + /// on the other hand need to update the labels as it adds transitions to + /// the states. We'll use the term Edge for the DFA to distinguish them from + /// ATN transitions. + /// </summary> + class ANTLR4CPP_PUBLIC Transition { + public: + /// The target of this transition. + // ml: this is a reference into the ATN. + ATNState *target; + + virtual ~Transition() = default; + + TransitionType getTransitionType() const { return _transitionType; } + + /** + * Determines if the transition is an "epsilon" transition. + * + * <p>The default implementation returns {@code false}.</p> + * + * @return {@code true} if traversing this transition in the ATN does not + * consume an input symbol; otherwise, {@code false} if traversing this + * transition consumes (matches) an input symbol. + */ + virtual bool isEpsilon() const; + virtual misc::IntervalSet label() const; + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const = 0; + + virtual std::string toString() const; + + Transition(Transition const&) = delete; + Transition& operator=(Transition const&) = delete; + + protected: + Transition(TransitionType transitionType, ATNState *target); + + private: + const TransitionType _transitionType; + }; + + using ConstTransitionPtr = std::unique_ptr<const Transition>; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp new file mode 100644 index 0000000000..78769b2ada --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.cpp @@ -0,0 +1,27 @@ +#include "atn/TransitionType.h" + +std::string antlr4::atn::transitionTypeName(TransitionType transitionType) { + switch (transitionType) { + case TransitionType::EPSILON: + return "EPSILON"; + case TransitionType::RANGE: + return "RANGE"; + case TransitionType::RULE: + return "RULE"; + case TransitionType::PREDICATE: + return "PREDICATE"; + case TransitionType::ATOM: + return "ATOM"; + case TransitionType::ACTION: + return "ACTION"; + case TransitionType::SET: + return "SET"; + case TransitionType::NOT_SET: + return "NOT_SET"; + case TransitionType::WILDCARD: + return "WILDCARD"; + case TransitionType::PRECEDENCE: + return "PRECEDENCE"; + } + return "UNKNOWN"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h new file mode 100644 index 0000000000..d5d5f3bd97 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/TransitionType.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include <cstddef> +#include <string> + +#include "antlr4-common.h" + +namespace antlr4 { +namespace atn { + + // Constants for transition serialization. + enum class TransitionType : size_t { + EPSILON = 1, + RANGE = 2, + RULE = 3, + PREDICATE = 4, // e.g., {isType(input.LT(1))}? + ATOM = 5, + ACTION = 6, + SET = 7, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET = 8, + WILDCARD = 9, + PRECEDENCE = 10, + }; + + ANTLR4CPP_PUBLIC std::string transitionTypeName(TransitionType transitionType); + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp new file mode 100644 index 0000000000..03ec00d399 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.cpp @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNState.h" + +#include "atn/WildcardTransition.h" + +using namespace antlr4::atn; + +WildcardTransition::WildcardTransition(ATNState *target) : Transition(TransitionType::WILDCARD, target) { +} + +bool WildcardTransition::matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol; +} + +std::string WildcardTransition::toString() const { + return "WILDCARD " + Transition::toString() + " {}"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h new file mode 100644 index 0000000000..d8d663f1fd --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/atn/WildcardTransition.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "atn/Transition.h" + +namespace antlr4 { +namespace atn { + + class ANTLR4CPP_PUBLIC WildcardTransition final : public Transition { + public: + static bool is(const Transition &transition) { return transition.getTransitionType() == TransitionType::WILDCARD; } + + static bool is(const Transition *transition) { return transition != nullptr && is(*transition); } + + explicit WildcardTransition(ATNState *target); + + virtual bool matches(size_t symbol, size_t minVocabSymbol, size_t maxVocabSymbol) const override; + + virtual std::string toString() const override; + }; + +} // namespace atn +} // namespace antlr4 |