aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.h
blob: 216147124100e266a7c0183e03f4c88b98631b4e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#pragma once

#include "TokenStream.h"

namespace antlr4 {

  /**
   * This implementation of {@link TokenStream} loads tokens from a
   * {@link TokenSource} on-demand, and places the tokens in a buffer to provide
   * access to any previous token by index.
   *
   * <p>
   * This token stream ignores the value of {@link Token#getChannel}. If your
   * parser requires the token stream filter tokens to only those on a particular
   * channel, such as {@link Token#DEFAULT_CHANNEL} or
   * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
   * {@link CommonTokenStream}.</p>
   */
  class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream {
  public:
    BufferedTokenStream(TokenSource *tokenSource);
    BufferedTokenStream(const BufferedTokenStream& other) = delete;

    BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete;

    virtual TokenSource* getTokenSource() const override;
    virtual size_t index() override;
    virtual ssize_t mark() override;

    virtual void release(ssize_t marker) override;
    virtual void reset();
    virtual void seek(size_t index) override;

    virtual size_t size() override;
    virtual void consume() override;

    virtual Token* get(size_t i) const override;

    /// Get all tokens from start..stop inclusively.
    virtual std::vector<Token *> get(size_t start, size_t stop);

    virtual size_t LA(ssize_t i) override;
    virtual Token* LT(ssize_t k) override;

    /// Reset this token stream by setting its token source.
    virtual void setTokenSource(TokenSource *tokenSource);
    virtual std::vector<Token *> getTokens();
    virtual std::vector<Token *> getTokens(size_t start, size_t stop);

    /// <summary>
    /// Given a start and stop index, return a List of all tokens in
    ///  the token type BitSet.  Return null if no tokens were found.  This
    ///  method looks at both on and off channel tokens.
    /// </summary>
    virtual std::vector<Token *> getTokens(size_t start, size_t stop, const std::vector<size_t> &types);
    virtual std::vector<Token *> getTokens(size_t start, size_t stop, size_t ttype);

    /// Collect all tokens on specified channel to the right of
    ///  the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
    ///  EOF. If channel is -1, find any non default channel token.
    virtual std::vector<Token *> getHiddenTokensToRight(size_t tokenIndex, ssize_t channel);

    /// <summary>
    /// Collect all hidden tokens (any off-default channel) to the right of
    ///  the current token up until we see a token on DEFAULT_TOKEN_CHANNEL
    ///  or EOF.
    /// </summary>
    virtual std::vector<Token *> getHiddenTokensToRight(size_t tokenIndex);

    /// <summary>
    /// Collect all tokens on specified channel to the left of
    ///  the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
    ///  If channel is -1, find any non default channel token.
    /// </summary>
    virtual std::vector<Token *> getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel);

    /// <summary>
    /// Collect all hidden tokens (any off-default channel) to the left of
    ///  the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
    /// </summary>
    virtual std::vector<Token *> getHiddenTokensToLeft(size_t tokenIndex);

    virtual std::string getSourceName() const override;
    virtual std::string getText() override;
    virtual std::string getText(const misc::Interval &interval) override;
    virtual std::string getText(RuleContext *ctx) override;
    virtual std::string getText(Token *start, Token *stop) override;

    /// Get all tokens from lexer until EOF.
    virtual void fill();

  protected:
    /**
     * The {@link TokenSource} from which tokens for this stream are fetched.
     */
    TokenSource *_tokenSource;

    /**
     * A collection of all tokens fetched from the token source. The list is
     * considered a complete view of the input once {@link #fetchedEOF} is set
     * to {@code true}.
     */
    std::vector<std::unique_ptr<Token>> _tokens;

    /**
     * The index into {@link #tokens} of the current token (next token to
     * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
     * {@link #LT LT(1)}.
     *
     * <p>This field is set to -1 when the stream is first constructed or when
     * {@link #setTokenSource} is called, indicating that the first token has
     * not yet been fetched from the token source. For additional information,
     * see the documentation of {@link IntStream} for a description of
     * Initializing Methods.</p>
     */
    // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead.
    //     Use bool isInitialized() to find out if this stream has started reading.
    size_t _p;

    /**
     * Indicates whether the {@link Token#EOF} token has been fetched from
     * {@link #tokenSource} and added to {@link #tokens}. This field improves
     * performance for the following cases:
     *
     * <ul>
     * <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
     * consuming the EOF symbol is optimized by checking the values of
     * {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
     * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
     * {@link #tokens} is trivial with this field.</li>
     * <ul>
     */
    bool _fetchedEOF;

    /// <summary>
    /// Make sure index {@code i} in tokens has a token.
    /// </summary>
    /// <returns> {@code true} if a token is located at index {@code i}, otherwise
    ///    {@code false}. </returns>
    /// <seealso cref= #get(int i) </seealso>
    virtual bool sync(size_t i);

    /// <summary>
    /// Add {@code n} elements to buffer.
    /// </summary>
    /// <returns> The actual number of elements added to the buffer. </returns>
    virtual size_t fetch(size_t n);

    virtual Token* LB(size_t k);

    /// Allowed derived classes to modify the behavior of operations which change
    /// the current stream position by adjusting the target token index of a seek
    /// operation. The default implementation simply returns {@code i}. If an
    /// exception is thrown in this method, the current stream index should not be
    /// changed.
    /// <p/>
    /// For example, <seealso cref="CommonTokenStream"/> overrides this method to ensure that
    /// the seek target is always an on-channel token.
    ///
    /// <param name="i"> The target token index. </param>
    /// <returns> The adjusted target token index. </returns>
    virtual ssize_t adjustSeekIndex(size_t i);
    void lazyInit();
    virtual void setup();

    /**
     * Given a starting index, return the index of the next token on channel.
     * Return {@code i} if {@code tokens[i]} is on channel. Return the index of
     * the EOF token if there are no tokens on channel between {@code i} and
     * EOF.
     */
    virtual ssize_t nextTokenOnChannel(size_t i, size_t channel);

    /**
     * Given a starting index, return the index of the previous token on
     * channel. Return {@code i} if {@code tokens[i]} is on channel. Return -1
     * if there are no tokens on channel between {@code i} and 0.
     *
     * <p>
     * If {@code i} specifies an index at or after the EOF token, the EOF token
     * index is returned. This is due to the fact that the EOF token is treated
     * as though it were on every channel.</p>
     */
    virtual ssize_t previousTokenOnChannel(size_t i, size_t channel);

    virtual std::vector<Token *> filterForChannel(size_t from, size_t to, ssize_t channel);

    bool isInitialized() const;

  private:
    bool _needSetup;
    void InitializeInstanceFields();
  };

} // namespace antlr4