aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h
blob: 929056a3f9cd5c8f8359940b0231585bdd55cc86 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#pragma once

#include "antlr4-common.h"

namespace antlr4 {

  /**
   * Useful for rewriting out a buffered input token stream after doing some
   * augmentation or other manipulations on it.
   *
   * <p>
   * You can insert stuff, replace, and delete chunks. Note that the operations
   * are done lazily--only if you convert the buffer to a {@link String} with
   * {@link TokenStream#getText()}. This is very efficient because you are not
   * moving data around all the time. As the buffer of tokens is converted to
   * strings, the {@link #getText()} method(s) scan the input token stream and
   * check to see if there is an operation at the current index. If so, the
   * operation is done and then normal {@link String} rendering continues on the
   * buffer. This is like having multiple Turing machine instruction streams
   * (programs) operating on a single input tape. :)</p>
   *
   * <p>
   * This rewriter makes no modifications to the token stream. It does not ask the
   * stream to fill itself up nor does it advance the input cursor. The token
   * stream {@link TokenStream#index()} will return the same value before and
   * after any {@link #getText()} call.</p>
   *
   * <p>
   * The rewriter only works on tokens that you have in the buffer and ignores the
   * current input cursor. If you are buffering tokens on-demand, calling
   * {@link #getText()} halfway through the input will only do rewrites for those
   * tokens in the first half of the file.</p>
   *
   * <p>
   * Since the operations are done lazily at {@link #getText}-time, operations do
   * not screw up the token index values. That is, an insert operation at token
   * index {@code i} does not change the index values for tokens
   * {@code i}+1..n-1.</p>
   *
   * <p>
   * Because operations never actually alter the buffer, you may always get the
   * original token stream back without undoing anything. Since the instructions
   * are queued up, you can easily simulate transactions and roll back any changes
   * if there is an error just by removing instructions. For example,</p>
   *
   * <pre>
   * CharStream input = new ANTLRFileStream("input");
   * TLexer lex = new TLexer(input);
   * CommonTokenStream tokens = new CommonTokenStream(lex);
   * T parser = new T(tokens);
   * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
   * parser.startRule();
   * </pre>
   *
   * <p>
   * Then in the rules, you can execute (assuming rewriter is visible):</p>
   *
   * <pre>
   * Token t,u;
   * ...
   * rewriter.insertAfter(t, "text to put after t");}
   * rewriter.insertAfter(u, "text after u");}
   * System.out.println(rewriter.getText());
   * </pre>
   *
   * <p>
   * You can also have multiple "instruction streams" and get multiple rewrites
   * from a single pass over the input. Just name the instruction streams and use
   * that name again when printing the buffer. This could be useful for generating
   * a C file and also its header file--all from the same buffer:</p>
   *
   * <pre>
   * rewriter.insertAfter("pass1", t, "text to put after t");}
   * rewriter.insertAfter("pass2", u, "text after u");}
   * System.out.println(rewriter.getText("pass1"));
   * System.out.println(rewriter.getText("pass2"));
   * </pre>
   *
   * <p>
   * If you don't use named rewrite streams, a "default" stream is used as the
   * first example shows.</p>
   */
  class ANTLR4CPP_PUBLIC TokenStreamRewriter {
  public:
    static const std::string DEFAULT_PROGRAM_NAME;
    static constexpr size_t PROGRAM_INIT_SIZE = 100;
    static constexpr size_t MIN_TOKEN_INDEX = 0;

    TokenStreamRewriter(TokenStream *tokens);
    virtual ~TokenStreamRewriter();

    TokenStream *getTokenStream();

    virtual void rollback(size_t instructionIndex);

    /// Rollback the instruction stream for a program so that
    /// the indicated instruction (via instructionIndex) is no
    /// longer in the stream.  UNTESTED!
    virtual void rollback(const std::string &programName, size_t instructionIndex);

    virtual void deleteProgram();

    /// Reset the program so that no instructions exist.
    virtual void deleteProgram(const std::string &programName);
    virtual void insertAfter(Token *t, const std::string& text);
    virtual void insertAfter(size_t index, const std::string& text);
    virtual void insertAfter(const std::string &programName, Token *t, const std::string& text);
    virtual void insertAfter(const std::string &programName, size_t index, const std::string& text);

    virtual void insertBefore(Token *t, const std::string& text);
    virtual void insertBefore(size_t index, const std::string& text);
    virtual void insertBefore(const std::string &programName, Token *t, const std::string& text);
    virtual void insertBefore(const std::string &programName, size_t index, const std::string& text);

    virtual void replace(size_t index, const std::string& text);
    virtual void replace(size_t from, size_t to, const std::string& text);
    virtual void replace(Token *indexT, const std::string& text);
    virtual void replace(Token *from, Token *to, const std::string& text);
    virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text);
    virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text);

    virtual void Delete(size_t index);
    virtual void Delete(size_t from, size_t to);
    virtual void Delete(Token *indexT);
    virtual void Delete(Token *from, Token *to);
    virtual void Delete(const std::string &programName, size_t from, size_t to);
    virtual void Delete(const std::string &programName, Token *from, Token *to);

    virtual size_t getLastRewriteTokenIndex();

    /// Return the text from the original tokens altered per the
    /// instructions given to this rewriter.
    virtual std::string getText();

    /** Return the text from the original tokens altered per the
     *  instructions given to this rewriter in programName.
     */
    std::string getText(std::string programName);

    /// Return the text associated with the tokens in the interval from the
    /// original token stream but with the alterations given to this rewriter.
    /// The interval refers to the indexes in the original token stream.
    /// We do not alter the token stream in any way, so the indexes
    /// and intervals are still consistent. Includes any operations done
    /// to the first and last token in the interval. So, if you did an
    /// insertBefore on the first token, you would get that insertion.
    /// The same is true if you do an insertAfter the stop token.
    virtual std::string getText(const misc::Interval &interval);

    virtual std::string getText(const std::string &programName, const misc::Interval &interval);

  protected:
    class RewriteOperation {
    public:
      /// What index into rewrites List are we?
      size_t index;
      std::string text;

      /// Token buffer index.
      size_t instructionIndex;

      RewriteOperation(TokenStreamRewriter *outerInstance, size_t index);
      RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
      virtual ~RewriteOperation();

      /// Execute the rewrite operation by possibly adding to the buffer.
      /// Return the index of the next token to operate on.

      virtual size_t execute(std::string *buf);
      virtual std::string toString();

    private:
      TokenStreamRewriter *const outerInstance;
      void InitializeInstanceFields();
    };

    class InsertBeforeOp : public RewriteOperation {
    private:
      TokenStreamRewriter *const outerInstance;

    public:
      InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);

      virtual size_t execute(std::string *buf) override;
    };

    class ReplaceOp : public RewriteOperation {
    private:
      TokenStreamRewriter *const outerInstance;

    public:
      size_t lastIndex;

      ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text);
      virtual size_t execute(std::string *buf) override;
      virtual std::string toString() override;

    private:
      void InitializeInstanceFields();
    };

    /// Our source stream
    TokenStream *const tokens;

    /// You may have multiple, named streams of rewrite operations.
    /// I'm calling these things "programs."
    /// Maps String (name) -> rewrite (List)
    std::map<std::string, std::vector<RewriteOperation*>> _programs;

    /// <summary>
    /// Map String (program name) -> Integer index </summary>
    std::map<std::string, size_t> _lastRewriteTokenIndexes;
    virtual size_t getLastRewriteTokenIndex(const std::string &programName);
    virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i);
    virtual std::vector<RewriteOperation*>& getProgram(const std::string &name);

    /// <summary>
    /// We need to combine operations and report invalid operations (like
    ///  overlapping replaces that are not completed nested).  Inserts to
    ///  same index need to be combined etc...   Here are the cases:
    ///
    ///  I.i.u I.j.v                                leave alone, nonoverlapping
    ///  I.i.u I.i.v                                combine: Iivu
    ///
    ///  R.i-j.u R.x-y.v    | i-j in x-y            delete first R
    ///  R.i-j.u R.i-j.v                            delete first R
    ///  R.i-j.u R.x-y.v    | x-y in i-j            ERROR
    ///  R.i-j.u R.x-y.v    | boundaries overlap    ERROR
    ///
    ///  Delete special case of replace (text==null):
    ///  D.i-j.u D.x-y.v    | boundaries overlap    combine to max(min)..max(right)
    ///
    ///  I.i.u R.x-y.v | i in (x+1)-y           delete I (since insert before
    ///                                         we're not deleting i)
    ///  I.i.u R.x-y.v | i not in (x+1)-y       leave alone, nonoverlapping
    ///  R.x-y.v I.i.u | i in x-y               ERROR
    ///  R.x-y.v I.x.u                          R.x-y.uv (combine, delete I)
    ///  R.x-y.v I.i.u | i not in x-y           leave alone, nonoverlapping
    ///
    ///  I.i.u = insert u before op @ index i
    ///  R.x-y.u = replace x-y indexed tokens with u
    ///
    ///  First we need to examine replaces.  For any replace op:
    ///
    ///         1. wipe out any insertions before op within that range.
    ///     2. Drop any replace op before that is contained completely within
    ///         that range.
    ///     3. Throw exception upon boundary overlap with any previous replace.
    ///
    ///  Then we can deal with inserts:
    ///
    ///         1. for any inserts to same index, combine even if not adjacent.
    ///         2. for any prior replace with same left boundary, combine this
    ///         insert with replace and delete this replace.
    ///         3. throw exception if index in same range as previous replace
    ///
    ///  Don't actually delete; make op null in list. Easier to walk list.
    ///  Later we can throw as we add to index -> op map.
    ///
    ///  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
    ///  inserted stuff would be before the replace range.  But, if you
    ///  add tokens in front of a method body '{' and then delete the method
    ///  body, I think the stuff before the '{' you added should disappear too.
    ///
    ///  Return a map from token index to operation.
    /// </summary>
    virtual std::unordered_map<size_t, RewriteOperation*> reduceToSingleOperationPerIndex(std::vector<RewriteOperation*> &rewrites);

    virtual std::string catOpText(std::string *a, std::string *b);

    /// Get all operations before an index of a particular kind.
    template <typename T>
    std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) {
      std::vector<T *> ops;
      for (size_t i = 0; i < before && i < rewrites.size(); i++) {
        T *op = dynamic_cast<T *>(rewrites[i]);
        if (op == nullptr) { // ignore deleted or non matching entries
          continue;
        }
        ops.push_back(op);
      }
      return ops;
    }

  private:
    std::vector<RewriteOperation *>& initializeProgram(const std::string &name);

  };

} // namespace antlr4