diff options
author | asmyasnikov <asmyasnikov@ydb.tech> | 2024-06-26 17:09:51 +0300 |
---|---|---|
committer | asmyasnikov <asmyasnikov@ydb.tech> | 2024-06-26 17:27:07 +0300 |
commit | e25934f4bbe7b98daa362f04861972e8f83066ad (patch) | |
tree | b350932f398fafa6740fe43a529edf700c747270 /contrib/libs/antlr4_cpp_runtime/src/dfa | |
parent | e6190f5d36aef50e2fec0076c384ba0874f5564c (diff) | |
download | ydb-e25934f4bbe7b98daa362f04861972e8f83066ad.tar.gz |
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/dfa')
8 files changed, 555 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp new file mode 100644 index 0000000000..4cc0ab7cc1 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp @@ -0,0 +1,115 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFASerializer.h" +#include "dfa/LexerDFASerializer.h" +#include "support/CPPUtils.h" +#include "atn/StarLoopEntryState.h" +#include "atn/ATNConfigSet.h" +#include "support/Casts.h" + +#include "dfa/DFA.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlrcpp; + +DFA::DFA(atn::DecisionState *atnStartState) : DFA(atnStartState, 0) { +} + +DFA::DFA(atn::DecisionState *atnStartState, size_t decision) + : atnStartState(atnStartState), s0(nullptr), decision(decision) { + + _precedenceDfa = false; + if (atn::StarLoopEntryState::is(atnStartState)) { + if (downCast<atn::StarLoopEntryState*>(atnStartState)->isPrecedenceDecision) { + _precedenceDfa = true; + s0 = new DFAState(std::unique_ptr<atn::ATNConfigSet>(new atn::ATNConfigSet())); + s0->isAcceptState = false; + s0->requiresFullContext = false; + } + } +} + +DFA::DFA(DFA &&other) : atnStartState(other.atnStartState), s0(other.s0), decision(other.decision) { + // Source states are implicitly cleared by the move. + states = std::move(other.states); + + other.atnStartState = nullptr; + other.decision = 0; + other.s0 = nullptr; + _precedenceDfa = other._precedenceDfa; + other._precedenceDfa = false; +} + +DFA::~DFA() { + bool s0InList = (s0 == nullptr); + for (auto *state : states) { + if (state == s0) + s0InList = true; + delete state; + } + + if (!s0InList) { + delete s0; + } +} + +bool DFA::isPrecedenceDfa() const { + return _precedenceDfa; +} + +DFAState* DFA::getPrecedenceStartState(int precedence) const { + assert(_precedenceDfa); // Only precedence DFAs may contain a precedence start state. + + auto iterator = s0->edges.find(precedence); + if (iterator == s0->edges.end()) + return nullptr; + + return iterator->second; +} + +void DFA::setPrecedenceStartState(int precedence, DFAState *startState) { + if (!isPrecedenceDfa()) { + throw IllegalStateException("Only precedence DFAs may contain a precedence start state."); + } + + if (precedence < 0) { + return; + } + + s0->edges[precedence] = startState; +} + +std::vector<DFAState *> DFA::getStates() const { + std::vector<DFAState *> result; + for (auto *state : states) + result.push_back(state); + + std::sort(result.begin(), result.end(), [](DFAState *o1, DFAState *o2) -> bool { + return o1->stateNumber < o2->stateNumber; + }); + + return result; +} + +std::string DFA::toString(const Vocabulary &vocabulary) const { + if (s0 == nullptr) { + return ""; + } + + DFASerializer serializer(this, vocabulary); + return serializer.toString(); +} + +std::string DFA::toLexerString() const { + if (s0 == nullptr) { + return ""; + } + LexerDFASerializer serializer(this); + + return serializer.toString(); +} + diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h new file mode 100644 index 0000000000..360eda8ba7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFAState.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC DFA final { + private: + struct DFAStateHasher final { + size_t operator()(const DFAState *dfaState) const { + return dfaState->hashCode(); + } + }; + + struct DFAStateComparer final { + bool operator()(const DFAState *lhs, const DFAState *rhs) const { + return lhs == rhs || *lhs == *rhs; + } + }; + + public: + /// A set of all DFA states. Use a map so we can get old state back. + /// Set only allows you to see if it's there. + + /// From which ATN state did we create this DFA? + atn::DecisionState *atnStartState; + std::unordered_set<DFAState*, DFAStateHasher, DFAStateComparer> states; // States are owned by this class. + DFAState *s0; + size_t decision; + + explicit DFA(atn::DecisionState *atnStartState); + DFA(atn::DecisionState *atnStartState, size_t decision); + DFA(const DFA &other) = delete; + DFA(DFA &&other); + ~DFA(); + + /** + * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + * start state {@link #s0} which is not stored in {@link #states}. The + * {@link DFAState#edges} array for this start state contains outgoing edges + * supplying individual start states corresponding to specific precedence + * values. + * + * @return {@code true} if this is a precedence DFA; otherwise, + * {@code false}. + * @see Parser#getPrecedence() + */ + bool isPrecedenceDfa() const; + + /** + * Get the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @return The start state corresponding to the specified precedence, or + * {@code null} if no start state exists for the specified precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + DFAState* getPrecedenceStartState(int precedence) const; + + /** + * Set the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @param startState The start state corresponding to the specified + * precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + void setPrecedenceStartState(int precedence, DFAState *startState); + + /// Return a list of all states in this DFA, ordered by state number. + std::vector<DFAState *> getStates() const; + + std::string toString(const Vocabulary &vocabulary) const; + + std::string toLexerString() const; + + private: + /** + * {@code true} if this DFA is for a precedence decision; otherwise, + * {@code false}. This is the backing field for {@link #isPrecedenceDfa}. + */ + bool _precedenceDfa; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp new file mode 100644 index 0000000000..64d01769de --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "Vocabulary.h" + +#include "dfa/DFASerializer.h" + +using namespace antlr4::dfa; + +DFASerializer::DFASerializer(const DFA *dfa, const Vocabulary &vocabulary) : _dfa(dfa), _vocabulary(vocabulary) { +} + +std::string DFASerializer::toString() const { + if (_dfa->s0 == nullptr) { + return ""; + } + + std::stringstream ss; + std::vector<DFAState *> states = _dfa->getStates(); + for (auto *s : states) { + for (size_t i = 0; i < s->edges.size(); i++) { + DFAState *t = s->edges[i]; + if (t != nullptr && t->stateNumber != INT32_MAX) { + ss << getStateString(s); + std::string label = getEdgeLabel(i); + ss << "-" << label << "->" << getStateString(t) << "\n"; + } + } + } + + return ss.str(); +} + +std::string DFASerializer::getEdgeLabel(size_t i) const { + return _vocabulary.getDisplayName(i); // ml: no longer needed -1 as we use a map for edges, without offset. +} + +std::string DFASerializer::getStateString(DFAState *s) const { + size_t n = s->stateNumber; + + const std::string baseStateStr = std::string(s->isAcceptState ? ":" : "") + "s" + std::to_string(n) + + (s->requiresFullContext ? "^" : ""); + + if (s->isAcceptState) { + if (!s->predicates.empty()) { + std::string buf; + for (size_t i = 0; i < s->predicates.size(); i++) { + buf.append(s->predicates[i].toString()); + } + return baseStateStr + "=>" + buf; + } else { + return baseStateStr + "=>" + std::to_string(s->prediction); + } + } else { + return baseStateStr; + } +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h new file mode 100644 index 0000000000..b541714078 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Vocabulary.h" + +namespace antlr4 { +namespace dfa { + + /// A DFA walker that knows how to dump them to serialized strings. + class ANTLR4CPP_PUBLIC DFASerializer { + public: + DFASerializer(const DFA *dfa, const Vocabulary &vocabulary); + + virtual ~DFASerializer() = default; + + std::string toString() const; + + protected: + virtual std::string getEdgeLabel(size_t i) const; + std::string getStateString(DFAState *s) const; + + private: + const DFA *_dfa; + const Vocabulary &_vocabulary; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp new file mode 100644 index 0000000000..e591b204c7 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" +#include "atn/SemanticContext.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" + +#include "dfa/DFAState.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; + +std::string DFAState::PredPrediction::toString() const { + return std::string("(") + pred->toString() + ", " + std::to_string(alt) + ")"; +} + +std::set<size_t> DFAState::getAltSet() const { + std::set<size_t> alts; + if (configs != nullptr) { + for (size_t i = 0; i < configs->size(); i++) { + alts.insert(configs->get(i)->alt); + } + } + return alts; +} + +size_t DFAState::hashCode() const { + return configs != nullptr ? configs->hashCode() : 0; +} + +bool DFAState::equals(const DFAState &other) const { + if (this == std::addressof(other)) { + return true; + } + return configs == other.configs || + (configs != nullptr && other.configs != nullptr && *configs == *other.configs); +} + +std::string DFAState::toString() const { + std::stringstream ss; + ss << stateNumber; + if (configs) { + ss << ":" << configs->toString(); + } + if (isAcceptState) { + ss << " => "; + if (!predicates.empty()) { + for (size_t i = 0; i < predicates.size(); i++) { + ss << predicates[i].toString(); + } + } else { + ss << prediction; + } + } + return ss.str(); +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h new file mode 100644 index 0000000000..f555cc45cf --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h @@ -0,0 +1,154 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +#include "atn/ATNConfigSet.h" +#include "FlatHashMap.h" + +namespace antlr4 { +namespace dfa { + + /// <summary> + /// A DFA state represents a set of possible ATN configurations. + /// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + /// to keep track of all possible states the ATN can be in after + /// reading each input symbol. That is to say, after reading + /// input a1a2..an, the DFA is in a state that represents the + /// subset T of the states of the ATN that are reachable from the + /// ATN's start state along some path labeled a1a2..an." + /// In conventional NFA->DFA conversion, therefore, the subset T + /// would be a bitset representing the set of states the + /// ATN could be in. We need to track the alt predicted by each + /// state as well, however. More importantly, we need to maintain + /// a stack of states, tracking the closure operations as they + /// jump from rule to rule, emulating rule invocations (method calls). + /// I have to add a stack to simulate the proper lookahead sequences for + /// the underlying LL grammar from which the ATN was derived. + /// <p/> + /// I use a set of ATNConfig objects not simple states. An ATNConfig + /// is both a state (ala normal conversion) and a RuleContext describing + /// the chain of rules (if any) followed to arrive at that state. + /// <p/> + /// A DFA state may have multiple references to a particular state, + /// but with different ATN contexts (with same or different alts) + /// meaning that state was reached via a different set of rule invocations. + /// </summary> + class ANTLR4CPP_PUBLIC DFAState final { + public: + struct ANTLR4CPP_PUBLIC PredPrediction final { + public: + Ref<const atn::SemanticContext> pred; // never null; at least SemanticContext.NONE + int alt; + + PredPrediction() = delete; + + PredPrediction(const PredPrediction&) = default; + PredPrediction(PredPrediction&&) = default; + + PredPrediction(Ref<const atn::SemanticContext> pred, int alt) : pred(std::move(pred)), alt(alt) {} + + PredPrediction& operator=(const PredPrediction&) = default; + PredPrediction& operator=(PredPrediction&&) = default; + + std::string toString() const; + }; + + std::unique_ptr<atn::ATNConfigSet> configs; + + /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + /// <seealso cref="Token#EOF"/> maps to {@code edges[0]}. + // ml: this is a sparse list, so we use a map instead of a vector. + // Watch out: we no longer have the -1 offset, as it isn't needed anymore. + FlatHashMap<size_t, DFAState*> edges; + + /// if accept state, what ttype do we match or alt do we predict? + /// This is set to <seealso cref="ATN#INVALID_ALT_NUMBER"/> when <seealso cref="#predicates"/>{@code !=null} or + /// <seealso cref="#requiresFullContext"/>. + size_t prediction = 0; + + Ref<const atn::LexerActionExecutor> lexerActionExecutor; + + /// <summary> + /// During SLL parsing, this is a list of predicates associated with the + /// ATN configurations of the DFA state. When we have predicates, + /// <seealso cref="#requiresFullContext"/> is {@code false} since full context prediction evaluates predicates + /// on-the-fly. If this is not null, then <seealso cref="#prediction"/> is + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// <p/> + /// We only use these for non-<seealso cref="#requiresFullContext"/> but conflicting states. That + /// means we know from the context (it's $ or we don't dip into outer + /// context) that it's an ambiguity not a conflict. + /// <p/> + /// This list is computed by <seealso cref="ParserATNSimulator#predicateDFAState"/>. + /// </summary> + std::vector<PredPrediction> predicates; + + int stateNumber = -1; + + bool isAcceptState = false; + + /// <summary> + /// Indicates that this state was created during SLL prediction that + /// discovered a conflict between the configurations in the state. Future + /// <seealso cref="ParserATNSimulator#execATN"/> invocations immediately jumped doing + /// full context prediction if this field is true. + /// </summary> + bool requiresFullContext = false; + + /// Map a predicate to a predicted alternative. + DFAState() = default; + + explicit DFAState(int stateNumber) : stateNumber(stateNumber) {} + + explicit DFAState(std::unique_ptr<atn::ATNConfigSet> configs) : configs(std::move(configs)) {} + + /// <summary> + /// Get the set of all alts mentioned by all ATN configurations in this + /// DFA state. + /// </summary> + std::set<size_t> getAltSet() const; + + size_t hashCode() const; + + /// Two DFAState instances are equal if their ATN configuration sets + /// are the same. This method is used to see if a state already exists. + /// + /// Because the number of alternatives and number of ATN configurations are + /// finite, there is a finite number of DFA states that can be processed. + /// This is necessary to show that the algorithm terminates. + /// + /// Cannot test the DFA state numbers here because in + /// ParserATNSimulator#addDFAState we need to know if any other state + /// exists that has this exact set of ATN configurations. The + /// stateNumber is irrelevant. + bool equals(const DFAState &other) const; + + std::string toString() const; + }; + + inline bool operator==(const DFAState &lhs, const DFAState &rhs) { + return lhs.equals(rhs); + } + + inline bool operator!=(const DFAState &lhs, const DFAState &rhs) { + return !operator==(lhs, rhs); + } + +} // namespace dfa +} // namespace antlr4 + +namespace std { + + template <> + struct hash<::antlr4::dfa::DFAState> { + size_t operator()(const ::antlr4::dfa::DFAState &dfaState) const { + return dfaState.hashCode(); + } + }; + +} // namespace std diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp new file mode 100644 index 0000000000..20ed734743 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp @@ -0,0 +1,17 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Vocabulary.h" + +#include "dfa/LexerDFASerializer.h" + +using namespace antlr4::dfa; + +LexerDFASerializer::LexerDFASerializer(const DFA *dfa) : DFASerializer(dfa, Vocabulary()) { +} + +std::string LexerDFASerializer::getEdgeLabel(size_t i) const { + return std::string("'") + static_cast<char>(i) + "'"; +} diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h new file mode 100644 index 0000000000..eed7f4f0c5 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFASerializer.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC LexerDFASerializer final : public DFASerializer { + public: + explicit LexerDFASerializer(const DFA *dfa); + + protected: + std::string getEdgeLabel(size_t i) const override; + }; + +} // namespace atn +} // namespace antlr4 |