diff options
| author | asmyasnikov <[email protected]> | 2024-06-26 17:09:51 +0300 |
|---|---|---|
| committer | asmyasnikov <[email protected]> | 2024-06-26 17:27:07 +0300 |
| commit | e25934f4bbe7b98daa362f04861972e8f83066ad (patch) | |
| tree | b350932f398fafa6740fe43a529edf700c747270 /contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp | |
| parent | e6190f5d36aef50e2fec0076c384ba0874f5564c (diff) | |
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp')
| -rw-r--r-- | contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp new file mode 100644 index 00000000000..b6470af9b71 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp @@ -0,0 +1,180 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include <string.h> + +#include "Exceptions.h" +#include "misc/Interval.h" +#include "IntStream.h" + +#include "support/Utf8.h" +#include "support/CPPUtils.h" + +#include "ANTLRInputStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +using misc::Interval; + +ANTLRInputStream::ANTLRInputStream() { + InitializeInstanceFields(); +} + +ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() { + load(input.data(), input.length()); +} + +ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) { + load(data, length); +} + +ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() { + load(stream); +} + +void ANTLRInputStream::load(const std::string &input, bool lenient) { + load(input.data(), input.size(), lenient); +} + +void ANTLRInputStream::load(const char *data, size_t length, bool lenient) { + // Remove the UTF-8 BOM if present. + const char *bom = "\xef\xbb\xbf"; + if (length >= 3 && strncmp(data, bom, 3) == 0) { + data += 3; + length -= 3; + } + if (lenient) { + _data = Utf8::lenientDecode(std::string_view(data, length)); + } else { + auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length)); + if (!maybe_utf32.has_value()) { + throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence"); + } + _data = std::move(maybe_utf32).value(); + } + p = 0; +} + +void ANTLRInputStream::load(std::istream &stream, bool lenient) { + if (!stream.good() || stream.eof()) // No fail, bad or EOF. + return; + + _data.clear(); + + std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>()); + load(s.data(), s.length(), lenient); +} + +void ANTLRInputStream::reset() { + p = 0; +} + +void ANTLRInputStream::consume() { + if (p >= _data.size()) { + assert(LA(1) == IntStream::EOF); + throw IllegalStateException("cannot consume EOF"); + } + + if (p < _data.size()) { + p++; + } +} + +size_t ANTLRInputStream::LA(ssize_t i) { + if (i == 0) { + return 0; // undefined + } + + ssize_t position = static_cast<ssize_t>(p); + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1] + if ((position + i - 1) < 0) { + return IntStream::EOF; // invalid; no char before first char + } + } + + if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) { + return IntStream::EOF; + } + + return _data[static_cast<size_t>((position + i - 1))]; +} + +size_t ANTLRInputStream::LT(ssize_t i) { + return LA(i); +} + +size_t ANTLRInputStream::index() { + return p; +} + +size_t ANTLRInputStream::size() { + return _data.size(); +} + +// Mark/release do nothing. We have entire buffer. +ssize_t ANTLRInputStream::mark() { + return -1; +} + +void ANTLRInputStream::release(ssize_t /* marker */) { +} + +void ANTLRInputStream::seek(size_t index) { + if (index <= p) { + p = index; // just jump; don't update stream state (line, ...) + return; + } + // seek forward, consume until p hits index or n (whichever comes first) + index = std::min(index, _data.size()); + while (p < index) { + consume(); + } +} + +std::string ANTLRInputStream::getText(const Interval &interval) { + if (interval.a < 0 || interval.b < 0) { + return ""; + } + + size_t start = static_cast<size_t>(interval.a); + size_t stop = static_cast<size_t>(interval.b); + + + if (stop >= _data.size()) { + stop = _data.size() - 1; + } + + size_t count = stop - start + 1; + if (start >= _data.size()) { + return ""; + } + + auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count)); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +std::string ANTLRInputStream::getSourceName() const { + if (name.empty()) { + return IntStream::UNKNOWN_SOURCE_NAME; + } + return name; +} + +std::string ANTLRInputStream::toString() const { + auto maybeUtf8 = Utf8::strictEncode(_data); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +void ANTLRInputStream::InitializeInstanceFields() { + p = 0; +} |
