aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/dfa
diff options
context:
space:
mode:
authorasmyasnikov <asmyasnikov@ydb.tech>2024-06-26 17:09:51 +0300
committerasmyasnikov <asmyasnikov@ydb.tech>2024-06-26 17:27:07 +0300
commite25934f4bbe7b98daa362f04861972e8f83066ad (patch)
treeb350932f398fafa6740fe43a529edf700c747270 /contrib/libs/antlr4_cpp_runtime/src/dfa
parente6190f5d36aef50e2fec0076c384ba0874f5564c (diff)
downloadydb-e25934f4bbe7b98daa362f04861972e8f83066ad.tar.gz
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/dfa')
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp115
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h96
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp60
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h32
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp59
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h154
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp17
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h22
8 files changed, 555 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp
new file mode 100644
index 0000000000..4cc0ab7cc1
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.cpp
@@ -0,0 +1,115 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "dfa/DFASerializer.h"
+#include "dfa/LexerDFASerializer.h"
+#include "support/CPPUtils.h"
+#include "atn/StarLoopEntryState.h"
+#include "atn/ATNConfigSet.h"
+#include "support/Casts.h"
+
+#include "dfa/DFA.h"
+
+using namespace antlr4;
+using namespace antlr4::dfa;
+using namespace antlrcpp;
+
+DFA::DFA(atn::DecisionState *atnStartState) : DFA(atnStartState, 0) {
+}
+
+DFA::DFA(atn::DecisionState *atnStartState, size_t decision)
+ : atnStartState(atnStartState), s0(nullptr), decision(decision) {
+
+ _precedenceDfa = false;
+ if (atn::StarLoopEntryState::is(atnStartState)) {
+ if (downCast<atn::StarLoopEntryState*>(atnStartState)->isPrecedenceDecision) {
+ _precedenceDfa = true;
+ s0 = new DFAState(std::unique_ptr<atn::ATNConfigSet>(new atn::ATNConfigSet()));
+ s0->isAcceptState = false;
+ s0->requiresFullContext = false;
+ }
+ }
+}
+
+DFA::DFA(DFA &&other) : atnStartState(other.atnStartState), s0(other.s0), decision(other.decision) {
+ // Source states are implicitly cleared by the move.
+ states = std::move(other.states);
+
+ other.atnStartState = nullptr;
+ other.decision = 0;
+ other.s0 = nullptr;
+ _precedenceDfa = other._precedenceDfa;
+ other._precedenceDfa = false;
+}
+
+DFA::~DFA() {
+ bool s0InList = (s0 == nullptr);
+ for (auto *state : states) {
+ if (state == s0)
+ s0InList = true;
+ delete state;
+ }
+
+ if (!s0InList) {
+ delete s0;
+ }
+}
+
+bool DFA::isPrecedenceDfa() const {
+ return _precedenceDfa;
+}
+
+DFAState* DFA::getPrecedenceStartState(int precedence) const {
+ assert(_precedenceDfa); // Only precedence DFAs may contain a precedence start state.
+
+ auto iterator = s0->edges.find(precedence);
+ if (iterator == s0->edges.end())
+ return nullptr;
+
+ return iterator->second;
+}
+
+void DFA::setPrecedenceStartState(int precedence, DFAState *startState) {
+ if (!isPrecedenceDfa()) {
+ throw IllegalStateException("Only precedence DFAs may contain a precedence start state.");
+ }
+
+ if (precedence < 0) {
+ return;
+ }
+
+ s0->edges[precedence] = startState;
+}
+
+std::vector<DFAState *> DFA::getStates() const {
+ std::vector<DFAState *> result;
+ for (auto *state : states)
+ result.push_back(state);
+
+ std::sort(result.begin(), result.end(), [](DFAState *o1, DFAState *o2) -> bool {
+ return o1->stateNumber < o2->stateNumber;
+ });
+
+ return result;
+}
+
+std::string DFA::toString(const Vocabulary &vocabulary) const {
+ if (s0 == nullptr) {
+ return "";
+ }
+
+ DFASerializer serializer(this, vocabulary);
+ return serializer.toString();
+}
+
+std::string DFA::toLexerString() const {
+ if (s0 == nullptr) {
+ return "";
+ }
+ LexerDFASerializer serializer(this);
+
+ return serializer.toString();
+}
+
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h
new file mode 100644
index 0000000000..360eda8ba7
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFA.h
@@ -0,0 +1,96 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "dfa/DFAState.h"
+
+namespace antlr4 {
+namespace dfa {
+
+ class ANTLR4CPP_PUBLIC DFA final {
+ private:
+ struct DFAStateHasher final {
+ size_t operator()(const DFAState *dfaState) const {
+ return dfaState->hashCode();
+ }
+ };
+
+ struct DFAStateComparer final {
+ bool operator()(const DFAState *lhs, const DFAState *rhs) const {
+ return lhs == rhs || *lhs == *rhs;
+ }
+ };
+
+ public:
+ /// A set of all DFA states. Use a map so we can get old state back.
+ /// Set only allows you to see if it's there.
+
+ /// From which ATN state did we create this DFA?
+ atn::DecisionState *atnStartState;
+ std::unordered_set<DFAState*, DFAStateHasher, DFAStateComparer> states; // States are owned by this class.
+ DFAState *s0;
+ size_t decision;
+
+ explicit DFA(atn::DecisionState *atnStartState);
+ DFA(atn::DecisionState *atnStartState, size_t decision);
+ DFA(const DFA &other) = delete;
+ DFA(DFA &&other);
+ ~DFA();
+
+ /**
+ * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special
+ * start state {@link #s0} which is not stored in {@link #states}. The
+ * {@link DFAState#edges} array for this start state contains outgoing edges
+ * supplying individual start states corresponding to specific precedence
+ * values.
+ *
+ * @return {@code true} if this is a precedence DFA; otherwise,
+ * {@code false}.
+ * @see Parser#getPrecedence()
+ */
+ bool isPrecedenceDfa() const;
+
+ /**
+ * Get the start state for a specific precedence value.
+ *
+ * @param precedence The current precedence.
+ * @return The start state corresponding to the specified precedence, or
+ * {@code null} if no start state exists for the specified precedence.
+ *
+ * @throws IllegalStateException if this is not a precedence DFA.
+ * @see #isPrecedenceDfa()
+ */
+ DFAState* getPrecedenceStartState(int precedence) const;
+
+ /**
+ * Set the start state for a specific precedence value.
+ *
+ * @param precedence The current precedence.
+ * @param startState The start state corresponding to the specified
+ * precedence.
+ *
+ * @throws IllegalStateException if this is not a precedence DFA.
+ * @see #isPrecedenceDfa()
+ */
+ void setPrecedenceStartState(int precedence, DFAState *startState);
+
+ /// Return a list of all states in this DFA, ordered by state number.
+ std::vector<DFAState *> getStates() const;
+
+ std::string toString(const Vocabulary &vocabulary) const;
+
+ std::string toLexerString() const;
+
+ private:
+ /**
+ * {@code true} if this DFA is for a precedence decision; otherwise,
+ * {@code false}. This is the backing field for {@link #isPrecedenceDfa}.
+ */
+ bool _precedenceDfa;
+ };
+
+} // namespace atn
+} // namespace antlr4
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp
new file mode 100644
index 0000000000..64d01769de
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.cpp
@@ -0,0 +1,60 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "dfa/DFA.h"
+#include "Vocabulary.h"
+
+#include "dfa/DFASerializer.h"
+
+using namespace antlr4::dfa;
+
+DFASerializer::DFASerializer(const DFA *dfa, const Vocabulary &vocabulary) : _dfa(dfa), _vocabulary(vocabulary) {
+}
+
+std::string DFASerializer::toString() const {
+ if (_dfa->s0 == nullptr) {
+ return "";
+ }
+
+ std::stringstream ss;
+ std::vector<DFAState *> states = _dfa->getStates();
+ for (auto *s : states) {
+ for (size_t i = 0; i < s->edges.size(); i++) {
+ DFAState *t = s->edges[i];
+ if (t != nullptr && t->stateNumber != INT32_MAX) {
+ ss << getStateString(s);
+ std::string label = getEdgeLabel(i);
+ ss << "-" << label << "->" << getStateString(t) << "\n";
+ }
+ }
+ }
+
+ return ss.str();
+}
+
+std::string DFASerializer::getEdgeLabel(size_t i) const {
+ return _vocabulary.getDisplayName(i); // ml: no longer needed -1 as we use a map for edges, without offset.
+}
+
+std::string DFASerializer::getStateString(DFAState *s) const {
+ size_t n = s->stateNumber;
+
+ const std::string baseStateStr = std::string(s->isAcceptState ? ":" : "") + "s" + std::to_string(n) +
+ (s->requiresFullContext ? "^" : "");
+
+ if (s->isAcceptState) {
+ if (!s->predicates.empty()) {
+ std::string buf;
+ for (size_t i = 0; i < s->predicates.size(); i++) {
+ buf.append(s->predicates[i].toString());
+ }
+ return baseStateStr + "=>" + buf;
+ } else {
+ return baseStateStr + "=>" + std::to_string(s->prediction);
+ }
+ } else {
+ return baseStateStr;
+ }
+}
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h
new file mode 100644
index 0000000000..b541714078
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFASerializer.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "Vocabulary.h"
+
+namespace antlr4 {
+namespace dfa {
+
+ /// A DFA walker that knows how to dump them to serialized strings.
+ class ANTLR4CPP_PUBLIC DFASerializer {
+ public:
+ DFASerializer(const DFA *dfa, const Vocabulary &vocabulary);
+
+ virtual ~DFASerializer() = default;
+
+ std::string toString() const;
+
+ protected:
+ virtual std::string getEdgeLabel(size_t i) const;
+ std::string getStateString(DFAState *s) const;
+
+ private:
+ const DFA *_dfa;
+ const Vocabulary &_vocabulary;
+ };
+
+} // namespace atn
+} // namespace antlr4
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp
new file mode 100644
index 0000000000..e591b204c7
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.cpp
@@ -0,0 +1,59 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "atn/ATNConfigSet.h"
+#include "atn/SemanticContext.h"
+#include "atn/ATNConfig.h"
+#include "misc/MurmurHash.h"
+
+#include "dfa/DFAState.h"
+
+using namespace antlr4::dfa;
+using namespace antlr4::atn;
+
+std::string DFAState::PredPrediction::toString() const {
+ return std::string("(") + pred->toString() + ", " + std::to_string(alt) + ")";
+}
+
+std::set<size_t> DFAState::getAltSet() const {
+ std::set<size_t> alts;
+ if (configs != nullptr) {
+ for (size_t i = 0; i < configs->size(); i++) {
+ alts.insert(configs->get(i)->alt);
+ }
+ }
+ return alts;
+}
+
+size_t DFAState::hashCode() const {
+ return configs != nullptr ? configs->hashCode() : 0;
+}
+
+bool DFAState::equals(const DFAState &other) const {
+ if (this == std::addressof(other)) {
+ return true;
+ }
+ return configs == other.configs ||
+ (configs != nullptr && other.configs != nullptr && *configs == *other.configs);
+}
+
+std::string DFAState::toString() const {
+ std::stringstream ss;
+ ss << stateNumber;
+ if (configs) {
+ ss << ":" << configs->toString();
+ }
+ if (isAcceptState) {
+ ss << " => ";
+ if (!predicates.empty()) {
+ for (size_t i = 0; i < predicates.size(); i++) {
+ ss << predicates[i].toString();
+ }
+ } else {
+ ss << prediction;
+ }
+ }
+ return ss.str();
+}
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h
new file mode 100644
index 0000000000..f555cc45cf
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/DFAState.h
@@ -0,0 +1,154 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "antlr4-common.h"
+
+#include "atn/ATNConfigSet.h"
+#include "FlatHashMap.h"
+
+namespace antlr4 {
+namespace dfa {
+
+ /// <summary>
+ /// A DFA state represents a set of possible ATN configurations.
+ /// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
+ /// to keep track of all possible states the ATN can be in after
+ /// reading each input symbol. That is to say, after reading
+ /// input a1a2..an, the DFA is in a state that represents the
+ /// subset T of the states of the ATN that are reachable from the
+ /// ATN's start state along some path labeled a1a2..an."
+ /// In conventional NFA->DFA conversion, therefore, the subset T
+ /// would be a bitset representing the set of states the
+ /// ATN could be in. We need to track the alt predicted by each
+ /// state as well, however. More importantly, we need to maintain
+ /// a stack of states, tracking the closure operations as they
+ /// jump from rule to rule, emulating rule invocations (method calls).
+ /// I have to add a stack to simulate the proper lookahead sequences for
+ /// the underlying LL grammar from which the ATN was derived.
+ /// <p/>
+ /// I use a set of ATNConfig objects not simple states. An ATNConfig
+ /// is both a state (ala normal conversion) and a RuleContext describing
+ /// the chain of rules (if any) followed to arrive at that state.
+ /// <p/>
+ /// A DFA state may have multiple references to a particular state,
+ /// but with different ATN contexts (with same or different alts)
+ /// meaning that state was reached via a different set of rule invocations.
+ /// </summary>
+ class ANTLR4CPP_PUBLIC DFAState final {
+ public:
+ struct ANTLR4CPP_PUBLIC PredPrediction final {
+ public:
+ Ref<const atn::SemanticContext> pred; // never null; at least SemanticContext.NONE
+ int alt;
+
+ PredPrediction() = delete;
+
+ PredPrediction(const PredPrediction&) = default;
+ PredPrediction(PredPrediction&&) = default;
+
+ PredPrediction(Ref<const atn::SemanticContext> pred, int alt) : pred(std::move(pred)), alt(alt) {}
+
+ PredPrediction& operator=(const PredPrediction&) = default;
+ PredPrediction& operator=(PredPrediction&&) = default;
+
+ std::string toString() const;
+ };
+
+ std::unique_ptr<atn::ATNConfigSet> configs;
+
+ /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1)
+ /// <seealso cref="Token#EOF"/> maps to {@code edges[0]}.
+ // ml: this is a sparse list, so we use a map instead of a vector.
+ // Watch out: we no longer have the -1 offset, as it isn't needed anymore.
+ FlatHashMap<size_t, DFAState*> edges;
+
+ /// if accept state, what ttype do we match or alt do we predict?
+ /// This is set to <seealso cref="ATN#INVALID_ALT_NUMBER"/> when <seealso cref="#predicates"/>{@code !=null} or
+ /// <seealso cref="#requiresFullContext"/>.
+ size_t prediction = 0;
+
+ Ref<const atn::LexerActionExecutor> lexerActionExecutor;
+
+ /// <summary>
+ /// During SLL parsing, this is a list of predicates associated with the
+ /// ATN configurations of the DFA state. When we have predicates,
+ /// <seealso cref="#requiresFullContext"/> is {@code false} since full context prediction evaluates predicates
+ /// on-the-fly. If this is not null, then <seealso cref="#prediction"/> is
+ /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>.
+ /// <p/>
+ /// We only use these for non-<seealso cref="#requiresFullContext"/> but conflicting states. That
+ /// means we know from the context (it's $ or we don't dip into outer
+ /// context) that it's an ambiguity not a conflict.
+ /// <p/>
+ /// This list is computed by <seealso cref="ParserATNSimulator#predicateDFAState"/>.
+ /// </summary>
+ std::vector<PredPrediction> predicates;
+
+ int stateNumber = -1;
+
+ bool isAcceptState = false;
+
+ /// <summary>
+ /// Indicates that this state was created during SLL prediction that
+ /// discovered a conflict between the configurations in the state. Future
+ /// <seealso cref="ParserATNSimulator#execATN"/> invocations immediately jumped doing
+ /// full context prediction if this field is true.
+ /// </summary>
+ bool requiresFullContext = false;
+
+ /// Map a predicate to a predicted alternative.
+ DFAState() = default;
+
+ explicit DFAState(int stateNumber) : stateNumber(stateNumber) {}
+
+ explicit DFAState(std::unique_ptr<atn::ATNConfigSet> configs) : configs(std::move(configs)) {}
+
+ /// <summary>
+ /// Get the set of all alts mentioned by all ATN configurations in this
+ /// DFA state.
+ /// </summary>
+ std::set<size_t> getAltSet() const;
+
+ size_t hashCode() const;
+
+ /// Two DFAState instances are equal if their ATN configuration sets
+ /// are the same. This method is used to see if a state already exists.
+ ///
+ /// Because the number of alternatives and number of ATN configurations are
+ /// finite, there is a finite number of DFA states that can be processed.
+ /// This is necessary to show that the algorithm terminates.
+ ///
+ /// Cannot test the DFA state numbers here because in
+ /// ParserATNSimulator#addDFAState we need to know if any other state
+ /// exists that has this exact set of ATN configurations. The
+ /// stateNumber is irrelevant.
+ bool equals(const DFAState &other) const;
+
+ std::string toString() const;
+ };
+
+ inline bool operator==(const DFAState &lhs, const DFAState &rhs) {
+ return lhs.equals(rhs);
+ }
+
+ inline bool operator!=(const DFAState &lhs, const DFAState &rhs) {
+ return !operator==(lhs, rhs);
+ }
+
+} // namespace dfa
+} // namespace antlr4
+
+namespace std {
+
+ template <>
+ struct hash<::antlr4::dfa::DFAState> {
+ size_t operator()(const ::antlr4::dfa::DFAState &dfaState) const {
+ return dfaState.hashCode();
+ }
+ };
+
+} // namespace std
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp
new file mode 100644
index 0000000000..20ed734743
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.cpp
@@ -0,0 +1,17 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "Vocabulary.h"
+
+#include "dfa/LexerDFASerializer.h"
+
+using namespace antlr4::dfa;
+
+LexerDFASerializer::LexerDFASerializer(const DFA *dfa) : DFASerializer(dfa, Vocabulary()) {
+}
+
+std::string LexerDFASerializer::getEdgeLabel(size_t i) const {
+ return std::string("'") + static_cast<char>(i) + "'";
+}
diff --git a/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h
new file mode 100644
index 0000000000..eed7f4f0c5
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/dfa/LexerDFASerializer.h
@@ -0,0 +1,22 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "dfa/DFASerializer.h"
+
+namespace antlr4 {
+namespace dfa {
+
+ class ANTLR4CPP_PUBLIC LexerDFASerializer final : public DFASerializer {
+ public:
+ explicit LexerDFASerializer(const DFA *dfa);
+
+ protected:
+ std::string getEdgeLabel(size_t i) const override;
+ };
+
+} // namespace atn
+} // namespace antlr4