aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/atn/LexerATNSimulator.h
blob: 304430b04d084138b68fc17f045563fa6fefc398 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#pragma once

#include <atomic>

#include "atn/ATNSimulator.h"
#include "atn/LexerATNConfig.h"
#include "atn/ATNConfigSet.h"

namespace antlr4 {
namespace atn {

  /// "dup" of ParserInterpreter
  class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
  protected:
    struct ANTLR4CPP_PUBLIC SimState final {
      size_t index = INVALID_INDEX;
      size_t line = 0;
      size_t charPos = INVALID_INDEX;
      dfa::DFAState *dfaState = nullptr;

      void reset();
    };

  public:
    static constexpr size_t MIN_DFA_EDGE = 0;
    static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN

  protected:
    /// <summary>
    /// When we hit an accept state in either the DFA or the ATN, we
    ///  have to notify the character stream to start buffering characters
    ///  via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
    ///  includes the current index into the input, the current line,
    ///  and current character position in that line. Note that the Lexer is
    ///  tracking the starting line and characterization of the token. These
    ///  variables track the "state" of the simulator when it hits an accept state.
    /// <p/>
    ///  We track these variables separately for the DFA and ATN simulation
    ///  because the DFA simulation often has to fail over to the ATN
    ///  simulation. If the ATN simulation fails, we need the DFA to fall
    ///  back to its previously accepted state, if any. If the ATN succeeds,
    ///  then the ATN does the accept and the DFA simulator that invoked it
    ///  can simply return the predicted token type.
    /// </summary>
    Lexer *const _recog;

    /// The current token's starting index into the character stream.
    ///  Shared across DFA to ATN simulation in case the ATN fails and the
    ///  DFA did not have a previous accept state. In this case, we use the
    ///  ATN-generated exception object.
    size_t _startIndex;

    /// line number 1..n within the input.
    size_t _line;

    /// The index of the character relative to the beginning of the line 0..n-1.
    size_t _charPositionInLine;

  public:
    std::vector<dfa::DFA> &_decisionToDFA;

  protected:
    size_t _mode;

    /// Used during DFA/ATN exec to record the most recent accept configuration info.
    SimState _prevAccept;

  public:
    LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
    LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
    virtual ~LexerATNSimulator() = default;

    virtual void copyState(LexerATNSimulator *simulator);
    virtual size_t match(CharStream *input, size_t mode);
    virtual void reset() override;

    virtual void clearDFA() override;

  protected:
    virtual size_t matchATN(CharStream *input);
    virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);

    /// <summary>
    /// Get an existing target state for an edge in the DFA. If the target state
    /// for the edge has not yet been computed or is otherwise not available,
    /// this method returns {@code null}.
    /// </summary>
    /// <param name="s"> The current DFA state </param>
    /// <param name="t"> The next input symbol </param>
    /// <returns> The existing target DFA state for the given input symbol
    /// {@code t}, or {@code null} if the target state for this edge is not
    /// already cached </returns>
    virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);

    /// <summary>
    /// Compute a target state for an edge in the DFA, and attempt to add the
    /// computed state and corresponding edge to the DFA.
    /// </summary>
    /// <param name="input"> The input stream </param>
    /// <param name="s"> The current DFA state </param>
    /// <param name="t"> The next input symbol
    /// </param>
    /// <returns> The computed target DFA state for the given input symbol
    /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
    /// returns <seealso cref="#ERROR"/>. </returns>
    virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);

    virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);

    /// <summary>
    /// Given a starting configuration set, figure out all ATN configurations
    ///  we can reach upon input {@code t}. Parameter {@code reach} is a return
    ///  parameter.
    /// </summary>
    void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
                               ATNConfigSet *reach, size_t t);

    virtual void accept(CharStream *input, const Ref<const LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
                        size_t line, size_t charPos);

    virtual ATNState *getReachableTarget(const Transition *trans, size_t t);

    virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);

    /// <summary>
    /// Since the alternatives within any lexer decision are ordered by
    /// preference, this method stops pursuing the closure as soon as an accept
    /// state is reached. After the first accept state is reached by depth-first
    /// search from {@code config}, all other (potentially reachable) states for
    /// this rule would have a lower priority.
    /// </summary>
    /// <returns> {@code true} if an accept state is reached, otherwise
    /// {@code false}. </returns>
    virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
                         bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);

    // side-effect: can alter configs.hasSemanticContext
    virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, const Transition *t,
      ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);

    /// <summary>
    /// Evaluate a predicate specified in the lexer.
    /// <p/>
    /// If {@code speculative} is {@code true}, this method was called before
    /// <seealso cref="#consume"/> for the matched character. This method should call
    /// <seealso cref="#consume"/> before evaluating the predicate to ensure position
    /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
    /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
    /// lexer state. This method should restore {@code input} and the simulator
    /// to the original state before returning (i.e. undo the actions made by the
    /// call to <seealso cref="#consume"/>.
    /// </summary>
    /// <param name="input"> The input stream. </param>
    /// <param name="ruleIndex"> The rule containing the predicate. </param>
    /// <param name="predIndex"> The index of the predicate within the rule. </param>
    /// <param name="speculative"> {@code true} if the current index in {@code input} is
    /// one character before the predicate's location.
    /// </param>
    /// <returns> {@code true} if the specified predicate evaluates to
    /// {@code true}. </returns>
    virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);

    virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
    virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
    virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);

    /// <summary>
    /// Add a new DFA state if there isn't one with this set of
    /// configurations already. This method also detects the first
    /// configuration containing an ATN rule stop state. Later, when
    /// traversing the DFA, we will know which rule to accept.
    /// </summary>
    virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);

    virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge);

  public:
    dfa::DFA& getDFA(size_t mode);

    /// Get the text matched so far for the current token.
    virtual std::string getText(CharStream *input);
    virtual size_t getLine() const;
    virtual void setLine(size_t line);
    virtual size_t getCharPositionInLine();
    virtual void setCharPositionInLine(size_t charPositionInLine);
    virtual void consume(CharStream *input);
    virtual std::string getTokenName(size_t t);

  private:
    void InitializeInstanceFields();
  };

} // namespace atn
} // namespace antlr4