diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 01:45:21 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2023-12-02 02:42:50 +0300 |
commit | 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch) | |
tree | 9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h | |
parent | 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff) | |
download | ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h')
-rw-r--r-- | contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h new file mode 100644 index 0000000000..5b05834f85 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/UnbufferedCharStream.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "CharStream.h" + +namespace antlr4 { + + /// Do not buffer up the entire char stream. It does keep a small buffer + /// for efficiency and also buffers while a mark exists (set by the + /// lookahead prediction in parser). "Unbuffered" here refers to fact + /// that it doesn't buffer all data, not that's it's on demand loading of char. + class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream { + public: + /// The name or source of this char stream. + std::string name; + + explicit UnbufferedCharStream(std::wistream &input); + + void consume() override; + size_t LA(ssize_t i) override; + + /// <summary> + /// Return a marker that we can release later. + /// <p/> + /// The specific marker value used for this class allows for some level of + /// protection against misuse where {@code seek()} is called on a mark or + /// {@code release()} is called in the wrong order. + /// </summary> + ssize_t mark() override; + + /// <summary> + /// Decrement number of markers, resetting buffer if we hit 0. </summary> + /// <param name="marker"> </param> + void release(ssize_t marker) override; + size_t index() override; + + /// <summary> + /// Seek to absolute character index, which might not be in the current + /// sliding window. Move {@code p} to {@code index-bufferStartIndex}. + /// </summary> + void seek(size_t index) override; + size_t size() override; + std::string getSourceName() const override; + std::string getText(const misc::Interval &interval) override; + + std::string toString() const override; + + protected: + /// A moving window buffer of the data being scanned. While there's a marker, + /// we keep adding to buffer. Otherwise, <seealso cref="#consume consume()"/> resets so + /// we start filling at index 0 again. + // UTF-32 encoded. + std::u32string _data; + typedef char32_t storage_type; + + /// <summary> + /// 0..n-1 index into <seealso cref="#data data"/> of next character. + /// <p/> + /// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are + /// out of buffered characters. + /// </summary> + size_t _p; + + /// <summary> + /// Count up with <seealso cref="#mark mark()"/> and down with + /// <seealso cref="#release release()"/>. When we {@code release()} the last mark, + /// {@code numMarkers} reaches 0 and we reset the buffer. Copy + /// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}. + /// </summary> + size_t _numMarkers; + + /// This is the {@code LA(-1)} character for the current position. + size_t _lastChar; // UTF-32 + + /// <summary> + /// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the + /// first character in <seealso cref="#data data"/>. Otherwise, this is unspecified. + /// </summary> + size_t _lastCharBufferStart; // UTF-32 + + /// <summary> + /// Absolute character index. It's the index of the character about to be + /// read via {@code LA(1)}. Goes from 0 to the number of characters in the + /// entire stream, although the stream size is unknown before the end is + /// reached. + /// </summary> + size_t _currentCharIndex; + + std::wistream &_input; + + /// <summary> + /// Make sure we have 'want' elements from current position <seealso cref="#p p"/>. + /// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is + /// the char index 'need' elements ahead. If we need 1 element, + /// {@code (p+1-1)==p} must be less than {@code data.length}. + /// </summary> + virtual void sync(size_t want); + + /// <summary> + /// Add {@code n} characters to the buffer. Returns the number of characters + /// actually added to the buffer. If the return value is less than {@code n}, + /// then EOF was reached before {@code n} characters could be added. + /// </summary> + virtual size_t fill(size_t n); + + /// Override to provide different source of characters than + /// <seealso cref="#input input"/>. + virtual char32_t nextChar(); + virtual void add(char32_t c); + size_t getBufferStartIndex() const; + }; + +} // namespace antlr4 |