Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2023-12-02 01:45:21 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2023-12-02 02:42:50 +0300
commit: 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree: 9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h
parent: 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
download: ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz
1 files changed, 117 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h
new file mode 100644
index 0000000000..5b05834f85
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h
@@ -0,0 +1,117 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "CharStream.h"
+
+namespace antlr4 {
+
+  /// Do not buffer up the entire char stream. It does keep a small buffer
+  /// for efficiency and also buffers while a mark exists (set by the
+  /// lookahead prediction in parser). "Unbuffered" here refers to fact
+  /// that it doesn't buffer all data, not that's it's on demand loading of char.
+  class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream {
+  public:
+    /// The name or source of this char stream.
+    std::string name;
+
+    explicit UnbufferedCharStream(std::wistream &input);
+
+    void consume() override;
+    size_t LA(ssize_t i) override;
+
+    /// <summary>
+    /// Return a marker that we can release later.
+    /// <p/>
+    /// The specific marker value used for this class allows for some level of
+    /// protection against misuse where {@code seek()} is called on a mark or
+    /// {@code release()} is called in the wrong order.
+    /// </summary>
+    ssize_t mark() override;
+
+    /// <summary>
+    /// Decrement number of markers, resetting buffer if we hit 0. </summary>
+    /// <param name="marker"> </param>
+    void release(ssize_t marker) override;
+    size_t index() override;
+
+    /// <summary>
+    /// Seek to absolute character index, which might not be in the current
+    ///  sliding window.  Move {@code p} to {@code index-bufferStartIndex}.
+    /// </summary>
+    void seek(size_t index) override;
+    size_t size() override;
+    std::string getSourceName() const override;
+    std::string getText(const misc::Interval &interval) override;
+
+    std::string toString() const override;
+
+  protected:
+    /// A moving window buffer of the data being scanned. While there's a marker,
+    /// we keep adding to buffer. Otherwise, <seealso cref="#consume consume()"/> resets so
+    /// we start filling at index 0 again.
+    // UTF-32 encoded.
+    std::u32string _data;
+    typedef char32_t storage_type;
+
+    /// <summary>
+    /// 0..n-1 index into <seealso cref="#data data"/> of next character.
+    /// <p/>
+    /// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are
+    /// out of buffered characters.
+    /// </summary>
+    size_t _p;
+
+    /// <summary>
+    /// Count up with <seealso cref="#mark mark()"/> and down with
+    /// <seealso cref="#release release()"/>. When we {@code release()} the last mark,
+    /// {@code numMarkers} reaches 0 and we reset the buffer. Copy
+    /// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}.
+    /// </summary>
+    size_t _numMarkers;
+
+    /// This is the {@code LA(-1)} character for the current position.
+    size_t _lastChar; // UTF-32
+
+    /// <summary>
+    /// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the
+    /// first character in <seealso cref="#data data"/>. Otherwise, this is unspecified.
+    /// </summary>
+    size_t _lastCharBufferStart; // UTF-32
+
+    /// <summary>
+    /// Absolute character index. It's the index of the character about to be
+    /// read via {@code LA(1)}. Goes from 0 to the number of characters in the
+    /// entire stream, although the stream size is unknown before the end is
+    /// reached.
+    /// </summary>
+    size_t _currentCharIndex;
+
+    std::wistream &_input;
+
+    /// <summary>
+    /// Make sure we have 'want' elements from current position <seealso cref="#p p"/>.
+    /// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is
+    /// the char index 'need' elements ahead. If we need 1 element,
+    /// {@code (p+1-1)==p} must be less than {@code data.length}.
+    /// </summary>
+    virtual void sync(size_t want);
+
+    /// <summary>
+    /// Add {@code n} characters to the buffer. Returns the number of characters
+    /// actually added to the buffer. If the return value is less than {@code n},
+    /// then EOF was reached before {@code n} characters could be added.
+    /// </summary>
+    virtual size_t fill(size_t n);
+
+    /// Override to provide different source of characters than
+    /// <seealso cref="#input input"/>.
+    virtual char32_t nextChar();
+    virtual void add(char32_t c);
+    size_t getBufferStartIndex() const;
+  };
+
+} // namespace antlr4
author	robot-piglet <robot-piglet@yandex-team.com>	2023-12-02 01:45:21 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2023-12-02 02:42:50 +0300
commit	9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree	9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h
parent	32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
download	ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz