summaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp
diff options
context:
space:
mode:
authorasmyasnikov <[email protected]>2024-06-26 17:09:51 +0300
committerasmyasnikov <[email protected]>2024-06-26 17:27:07 +0300
commite25934f4bbe7b98daa362f04861972e8f83066ad (patch)
treeb350932f398fafa6740fe43a529edf700c747270 /contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp
parente6190f5d36aef50e2fec0076c384ba0874f5564c (diff)
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp')
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp180
1 files changed, 180 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp
new file mode 100644
index 00000000000..b6470af9b71
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/ANTLRInputStream.cpp
@@ -0,0 +1,180 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include <string.h>
+
+#include "Exceptions.h"
+#include "misc/Interval.h"
+#include "IntStream.h"
+
+#include "support/Utf8.h"
+#include "support/CPPUtils.h"
+
+#include "ANTLRInputStream.h"
+
+using namespace antlr4;
+using namespace antlrcpp;
+
+using misc::Interval;
+
+ANTLRInputStream::ANTLRInputStream() {
+ InitializeInstanceFields();
+}
+
+ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() {
+ load(input.data(), input.length());
+}
+
+ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) {
+ load(data, length);
+}
+
+ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
+ load(stream);
+}
+
+void ANTLRInputStream::load(const std::string &input, bool lenient) {
+ load(input.data(), input.size(), lenient);
+}
+
+void ANTLRInputStream::load(const char *data, size_t length, bool lenient) {
+ // Remove the UTF-8 BOM if present.
+ const char *bom = "\xef\xbb\xbf";
+ if (length >= 3 && strncmp(data, bom, 3) == 0) {
+ data += 3;
+ length -= 3;
+ }
+ if (lenient) {
+ _data = Utf8::lenientDecode(std::string_view(data, length));
+ } else {
+ auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length));
+ if (!maybe_utf32.has_value()) {
+ throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence");
+ }
+ _data = std::move(maybe_utf32).value();
+ }
+ p = 0;
+}
+
+void ANTLRInputStream::load(std::istream &stream, bool lenient) {
+ if (!stream.good() || stream.eof()) // No fail, bad or EOF.
+ return;
+
+ _data.clear();
+
+ std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
+ load(s.data(), s.length(), lenient);
+}
+
+void ANTLRInputStream::reset() {
+ p = 0;
+}
+
+void ANTLRInputStream::consume() {
+ if (p >= _data.size()) {
+ assert(LA(1) == IntStream::EOF);
+ throw IllegalStateException("cannot consume EOF");
+ }
+
+ if (p < _data.size()) {
+ p++;
+ }
+}
+
+size_t ANTLRInputStream::LA(ssize_t i) {
+ if (i == 0) {
+ return 0; // undefined
+ }
+
+ ssize_t position = static_cast<ssize_t>(p);
+ if (i < 0) {
+ i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1]
+ if ((position + i - 1) < 0) {
+ return IntStream::EOF; // invalid; no char before first char
+ }
+ }
+
+ if ((position + i - 1) >= static_cast<ssize_t>(_data.size())) {
+ return IntStream::EOF;
+ }
+
+ return _data[static_cast<size_t>((position + i - 1))];
+}
+
+size_t ANTLRInputStream::LT(ssize_t i) {
+ return LA(i);
+}
+
+size_t ANTLRInputStream::index() {
+ return p;
+}
+
+size_t ANTLRInputStream::size() {
+ return _data.size();
+}
+
+// Mark/release do nothing. We have entire buffer.
+ssize_t ANTLRInputStream::mark() {
+ return -1;
+}
+
+void ANTLRInputStream::release(ssize_t /* marker */) {
+}
+
+void ANTLRInputStream::seek(size_t index) {
+ if (index <= p) {
+ p = index; // just jump; don't update stream state (line, ...)
+ return;
+ }
+ // seek forward, consume until p hits index or n (whichever comes first)
+ index = std::min(index, _data.size());
+ while (p < index) {
+ consume();
+ }
+}
+
+std::string ANTLRInputStream::getText(const Interval &interval) {
+ if (interval.a < 0 || interval.b < 0) {
+ return "";
+ }
+
+ size_t start = static_cast<size_t>(interval.a);
+ size_t stop = static_cast<size_t>(interval.b);
+
+
+ if (stop >= _data.size()) {
+ stop = _data.size() - 1;
+ }
+
+ size_t count = stop - start + 1;
+ if (start >= _data.size()) {
+ return "";
+ }
+
+ auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count));
+ if (!maybeUtf8.has_value()) {
+ throw IllegalArgumentException("Input stream contains invalid Unicode code points");
+ }
+ return std::move(maybeUtf8).value();
+}
+
+std::string ANTLRInputStream::getSourceName() const {
+ if (name.empty()) {
+ return IntStream::UNKNOWN_SOURCE_NAME;
+ }
+ return name;
+}
+
+std::string ANTLRInputStream::toString() const {
+ auto maybeUtf8 = Utf8::strictEncode(_data);
+ if (!maybeUtf8.has_value()) {
+ throw IllegalArgumentException("Input stream contains invalid Unicode code points");
+ }
+ return std::move(maybeUtf8).value();
+}
+
+void ANTLRInputStream::InitializeInstanceFields() {
+ p = 0;
+}