Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2023-12-02 01:45:21 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2023-12-02 02:42:50 +0300
commit: 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree: 9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h
parent: 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
download: ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz
1 files changed, 295 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h
new file mode 100644
index 0000000000..929056a3f9
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h
@@ -0,0 +1,295 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "antlr4-common.h"
+
+namespace antlr4 {
+
+  /**
+   * Useful for rewriting out a buffered input token stream after doing some
+   * augmentation or other manipulations on it.
+   *
+   * <p>
+   * You can insert stuff, replace, and delete chunks. Note that the operations
+   * are done lazily--only if you convert the buffer to a {@link String} with
+   * {@link TokenStream#getText()}. This is very efficient because you are not
+   * moving data around all the time. As the buffer of tokens is converted to
+   * strings, the {@link #getText()} method(s) scan the input token stream and
+   * check to see if there is an operation at the current index. If so, the
+   * operation is done and then normal {@link String} rendering continues on the
+   * buffer. This is like having multiple Turing machine instruction streams
+   * (programs) operating on a single input tape. :)</p>
+   *
+   * <p>
+   * This rewriter makes no modifications to the token stream. It does not ask the
+   * stream to fill itself up nor does it advance the input cursor. The token
+   * stream {@link TokenStream#index()} will return the same value before and
+   * after any {@link #getText()} call.</p>
+   *
+   * <p>
+   * The rewriter only works on tokens that you have in the buffer and ignores the
+   * current input cursor. If you are buffering tokens on-demand, calling
+   * {@link #getText()} halfway through the input will only do rewrites for those
+   * tokens in the first half of the file.</p>
+   *
+   * <p>
+   * Since the operations are done lazily at {@link #getText}-time, operations do
+   * not screw up the token index values. That is, an insert operation at token
+   * index {@code i} does not change the index values for tokens
+   * {@code i}+1..n-1.</p>
+   *
+   * <p>
+   * Because operations never actually alter the buffer, you may always get the
+   * original token stream back without undoing anything. Since the instructions
+   * are queued up, you can easily simulate transactions and roll back any changes
+   * if there is an error just by removing instructions. For example,</p>
+   *
+   * <pre>
+   * CharStream input = new ANTLRFileStream("input");
+   * TLexer lex = new TLexer(input);
+   * CommonTokenStream tokens = new CommonTokenStream(lex);
+   * T parser = new T(tokens);
+   * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+   * parser.startRule();
+   * </pre>
+   *
+   * <p>
+   * Then in the rules, you can execute (assuming rewriter is visible):</p>
+   *
+   * <pre>
+   * Token t,u;
+   * ...
+   * rewriter.insertAfter(t, "text to put after t");}
+   * rewriter.insertAfter(u, "text after u");}
+   * System.out.println(rewriter.getText());
+   * </pre>
+   *
+   * <p>
+   * You can also have multiple "instruction streams" and get multiple rewrites
+   * from a single pass over the input. Just name the instruction streams and use
+   * that name again when printing the buffer. This could be useful for generating
+   * a C file and also its header file--all from the same buffer:</p>
+   *
+   * <pre>
+   * rewriter.insertAfter("pass1", t, "text to put after t");}
+   * rewriter.insertAfter("pass2", u, "text after u");}
+   * System.out.println(rewriter.getText("pass1"));
+   * System.out.println(rewriter.getText("pass2"));
+   * </pre>
+   *
+   * <p>
+   * If you don't use named rewrite streams, a "default" stream is used as the
+   * first example shows.</p>
+   */
+  class ANTLR4CPP_PUBLIC TokenStreamRewriter {
+  public:
+    static const std::string DEFAULT_PROGRAM_NAME;
+    static constexpr size_t PROGRAM_INIT_SIZE = 100;
+    static constexpr size_t MIN_TOKEN_INDEX = 0;
+
+    TokenStreamRewriter(TokenStream *tokens);
+    virtual ~TokenStreamRewriter();
+
+    TokenStream *getTokenStream();
+
+    virtual void rollback(size_t instructionIndex);
+
+    /// Rollback the instruction stream for a program so that
+    /// the indicated instruction (via instructionIndex) is no
+    /// longer in the stream.  UNTESTED!
+    virtual void rollback(const std::string &programName, size_t instructionIndex);
+
+    virtual void deleteProgram();
+
+    /// Reset the program so that no instructions exist.
+    virtual void deleteProgram(const std::string &programName);
+    virtual void insertAfter(Token *t, const std::string& text);
+    virtual void insertAfter(size_t index, const std::string& text);
+    virtual void insertAfter(const std::string &programName, Token *t, const std::string& text);
+    virtual void insertAfter(const std::string &programName, size_t index, const std::string& text);
+
+    virtual void insertBefore(Token *t, const std::string& text);
+    virtual void insertBefore(size_t index, const std::string& text);
+    virtual void insertBefore(const std::string &programName, Token *t, const std::string& text);
+    virtual void insertBefore(const std::string &programName, size_t index, const std::string& text);
+
+    virtual void replace(size_t index, const std::string& text);
+    virtual void replace(size_t from, size_t to, const std::string& text);
+    virtual void replace(Token *indexT, const std::string& text);
+    virtual void replace(Token *from, Token *to, const std::string& text);
+    virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text);
+    virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text);
+
+    virtual void Delete(size_t index);
+    virtual void Delete(size_t from, size_t to);
+    virtual void Delete(Token *indexT);
+    virtual void Delete(Token *from, Token *to);
+    virtual void Delete(const std::string &programName, size_t from, size_t to);
+    virtual void Delete(const std::string &programName, Token *from, Token *to);
+
+    virtual size_t getLastRewriteTokenIndex();
+
+    /// Return the text from the original tokens altered per the
+    /// instructions given to this rewriter.
+    virtual std::string getText();
+
+    /** Return the text from the original tokens altered per the
+     *  instructions given to this rewriter in programName.
+     */
+    std::string getText(std::string programName);
+
+    /// Return the text associated with the tokens in the interval from the
+    /// original token stream but with the alterations given to this rewriter.
+    /// The interval refers to the indexes in the original token stream.
+    /// We do not alter the token stream in any way, so the indexes
+    /// and intervals are still consistent. Includes any operations done
+    /// to the first and last token in the interval. So, if you did an
+    /// insertBefore on the first token, you would get that insertion.
+    /// The same is true if you do an insertAfter the stop token.
+    virtual std::string getText(const misc::Interval &interval);
+
+    virtual std::string getText(const std::string &programName, const misc::Interval &interval);
+
+  protected:
+    class RewriteOperation {
+    public:
+      /// What index into rewrites List are we?
+      size_t index;
+      std::string text;
+
+      /// Token buffer index.
+      size_t instructionIndex;
+
+      RewriteOperation(TokenStreamRewriter *outerInstance, size_t index);
+      RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
+      virtual ~RewriteOperation();
+
+      /// Execute the rewrite operation by possibly adding to the buffer.
+      /// Return the index of the next token to operate on.
+
+      virtual size_t execute(std::string *buf);
+      virtual std::string toString();
+
+    private:
+      TokenStreamRewriter *const outerInstance;
+      void InitializeInstanceFields();
+    };
+
+    class InsertBeforeOp : public RewriteOperation {
+    private:
+      TokenStreamRewriter *const outerInstance;
+
+    public:
+      InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text);
+
+      virtual size_t execute(std::string *buf) override;
+    };
+
+    class ReplaceOp : public RewriteOperation {
+    private:
+      TokenStreamRewriter *const outerInstance;
+
+    public:
+      size_t lastIndex;
+
+      ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text);
+      virtual size_t execute(std::string *buf) override;
+      virtual std::string toString() override;
+
+    private:
+      void InitializeInstanceFields();
+    };
+
+    /// Our source stream
+    TokenStream *const tokens;
+
+    /// You may have multiple, named streams of rewrite operations.
+    /// I'm calling these things "programs."
+    /// Maps String (name) -> rewrite (List)
+    std::map<std::string, std::vector<RewriteOperation*>> _programs;
+
+    /// <summary>
+    /// Map String (program name) -> Integer index </summary>
+    std::map<std::string, size_t> _lastRewriteTokenIndexes;
+    virtual size_t getLastRewriteTokenIndex(const std::string &programName);
+    virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i);
+    virtual std::vector<RewriteOperation*>& getProgram(const std::string &name);
+
+    /// <summary>
+    /// We need to combine operations and report invalid operations (like
+    ///  overlapping replaces that are not completed nested).  Inserts to
+    ///  same index need to be combined etc...   Here are the cases:
+    ///
+    ///  I.i.u I.j.v                                leave alone, nonoverlapping
+    ///  I.i.u I.i.v                                combine: Iivu
+    ///
+    ///  R.i-j.u R.x-y.v    | i-j in x-y            delete first R
+    ///  R.i-j.u R.i-j.v                            delete first R
+    ///  R.i-j.u R.x-y.v    | x-y in i-j            ERROR
+    ///  R.i-j.u R.x-y.v    | boundaries overlap    ERROR
+    ///
+    ///  Delete special case of replace (text==null):
+    ///  D.i-j.u D.x-y.v    | boundaries overlap    combine to max(min)..max(right)
+    ///
+    ///  I.i.u R.x-y.v | i in (x+1)-y           delete I (since insert before
+    ///                                         we're not deleting i)
+    ///  I.i.u R.x-y.v | i not in (x+1)-y       leave alone, nonoverlapping
+    ///  R.x-y.v I.i.u | i in x-y               ERROR
+    ///  R.x-y.v I.x.u                          R.x-y.uv (combine, delete I)
+    ///  R.x-y.v I.i.u | i not in x-y           leave alone, nonoverlapping
+    ///
+    ///  I.i.u = insert u before op @ index i
+    ///  R.x-y.u = replace x-y indexed tokens with u
+    ///
+    ///  First we need to examine replaces.  For any replace op:
+    ///
+    ///         1. wipe out any insertions before op within that range.
+    ///     2. Drop any replace op before that is contained completely within
+    ///         that range.
+    ///     3. Throw exception upon boundary overlap with any previous replace.
+    ///
+    ///  Then we can deal with inserts:
+    ///
+    ///         1. for any inserts to same index, combine even if not adjacent.
+    ///         2. for any prior replace with same left boundary, combine this
+    ///         insert with replace and delete this replace.
+    ///         3. throw exception if index in same range as previous replace
+    ///
+    ///  Don't actually delete; make op null in list. Easier to walk list.
+    ///  Later we can throw as we add to index -> op map.
+    ///
+    ///  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
+    ///  inserted stuff would be before the replace range.  But, if you
+    ///  add tokens in front of a method body '{' and then delete the method
+    ///  body, I think the stuff before the '{' you added should disappear too.
+    ///
+    ///  Return a map from token index to operation.
+    /// </summary>
+    virtual std::unordered_map<size_t, RewriteOperation*> reduceToSingleOperationPerIndex(std::vector<RewriteOperation*> &rewrites);
+
+    virtual std::string catOpText(std::string *a, std::string *b);
+
+    /// Get all operations before an index of a particular kind.
+    template <typename T>
+    std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) {
+      std::vector<T *> ops;
+      for (size_t i = 0; i < before && i < rewrites.size(); i++) {
+        T *op = dynamic_cast<T *>(rewrites[i]);
+        if (op == nullptr) { // ignore deleted or non matching entries
+          continue;
+        }
+        ops.push_back(op);
+      }
+      return ops;
+    }
+
+  private:
+    std::vector<RewriteOperation *>& initializeProgram(const std::string &name);
+
+  };
+
+} // namespace antlr4
author	robot-piglet <robot-piglet@yandex-team.com>	2023-12-02 01:45:21 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2023-12-02 02:42:50 +0300
commit	9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree	9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/TokenStreamRewriter.h
parent	32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
download	ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz