aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yson_pull
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yson_pull
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yson_pull')
-rw-r--r--library/cpp/yson_pull/bridge.h34
-rw-r--r--library/cpp/yson_pull/buffer.h79
-rw-r--r--library/cpp/yson_pull/consumer.cpp83
-rw-r--r--library/cpp/yson_pull/consumer.h37
-rw-r--r--library/cpp/yson_pull/cyson_enums.h47
-rw-r--r--library/cpp/yson_pull/detail/byte_reader.h74
-rw-r--r--library/cpp/yson_pull/detail/byte_writer.h77
-rw-r--r--library/cpp/yson_pull/detail/cescape.h143
-rw-r--r--library/cpp/yson_pull/detail/cescape_decode.h154
-rw-r--r--library/cpp/yson_pull/detail/cescape_encode.h114
-rw-r--r--library/cpp/yson_pull/detail/fail.h20
-rw-r--r--library/cpp/yson_pull/detail/format_string.h26
-rw-r--r--library/cpp/yson_pull/detail/input/buffered.h35
-rw-r--r--library/cpp/yson_pull/detail/input/stdio_file.h42
-rw-r--r--library/cpp/yson_pull/detail/input/stream.h69
-rw-r--r--library/cpp/yson_pull/detail/lexer_base.h343
-rw-r--r--library/cpp/yson_pull/detail/macros.h24
-rw-r--r--library/cpp/yson_pull/detail/number.h37
-rw-r--r--library/cpp/yson_pull/detail/output/buffered.h51
-rw-r--r--library/cpp/yson_pull/detail/output/stdio_file.h33
-rw-r--r--library/cpp/yson_pull/detail/output/stream.h56
-rw-r--r--library/cpp/yson_pull/detail/percent_scalar.h36
-rw-r--r--library/cpp/yson_pull/detail/reader.h677
-rw-r--r--library/cpp/yson_pull/detail/stream_counter.h51
-rw-r--r--library/cpp/yson_pull/detail/symbols.h55
-rw-r--r--library/cpp/yson_pull/detail/traits.h29
-rw-r--r--library/cpp/yson_pull/detail/varint.h260
-rw-r--r--library/cpp/yson_pull/detail/writer.h566
-rw-r--r--library/cpp/yson_pull/detail/zigzag.h24
-rw-r--r--library/cpp/yson_pull/event.cpp18
-rw-r--r--library/cpp/yson_pull/event.h85
-rw-r--r--library/cpp/yson_pull/exceptions.cpp45
-rw-r--r--library/cpp/yson_pull/exceptions.h59
-rw-r--r--library/cpp/yson_pull/input.cpp33
-rw-r--r--library/cpp/yson_pull/input.h81
-rw-r--r--library/cpp/yson_pull/output.cpp29
-rw-r--r--library/cpp/yson_pull/output.h65
-rw-r--r--library/cpp/yson_pull/position_info.h23
-rw-r--r--library/cpp/yson_pull/range.h35
-rw-r--r--library/cpp/yson_pull/read_ops.cpp66
-rw-r--r--library/cpp/yson_pull/read_ops.h142
-rw-r--r--library/cpp/yson_pull/reader.cpp27
-rw-r--r--library/cpp/yson_pull/reader.h37
-rw-r--r--library/cpp/yson_pull/scalar.cpp57
-rw-r--r--library/cpp/yson_pull/scalar.h146
-rw-r--r--library/cpp/yson_pull/stream_type.h11
-rw-r--r--library/cpp/yson_pull/ut/cescape_ut.cpp71
-rw-r--r--library/cpp/yson_pull/ut/loop_ut.cpp382
-rw-r--r--library/cpp/yson_pull/ut/reader_ut.cpp410
-rw-r--r--library/cpp/yson_pull/ut/writer_ut.cpp256
-rw-r--r--library/cpp/yson_pull/ut/ya.make12
-rw-r--r--library/cpp/yson_pull/writer.cpp30
-rw-r--r--library/cpp/yson_pull/writer.h126
-rw-r--r--library/cpp/yson_pull/ya.make21
-rw-r--r--library/cpp/yson_pull/yson.h14
55 files changed, 5557 insertions, 0 deletions
diff --git a/library/cpp/yson_pull/bridge.h b/library/cpp/yson_pull/bridge.h
new file mode 100644
index 0000000000..ac767dcba0
--- /dev/null
+++ b/library/cpp/yson_pull/bridge.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "consumer.h"
+#include "event.h"
+#include "writer.h"
+
+namespace NYsonPull {
+ //! \brief Connect YSON stream producer and consumer.
+ //!
+ //! Useful for writing YSON stream filters.
+ //! \p Producer must have a \p next_event() method (like \p NYsonPull::reader).
+ //! \p Consumer must be like \p NYsonPull::consumer interface.
+ template <typename Producer, typename Consumer>
+ inline void Bridge(Producer&& producer, Consumer&& consumer) {
+ for (;;) {
+ auto& event = producer.NextEvent();
+ consumer.OnEvent(event);
+ if (event.Type() == EEventType::EndStream) {
+ break;
+ }
+ }
+ }
+
+ template <typename Producer>
+ inline void Bridge(Producer&& producer, TWriter& writer_) {
+ Bridge(std::forward<Producer>(producer), writer_.GetConsumer());
+ }
+
+ template <typename Producer>
+ inline void Bridge(Producer&& producer, TWriter&& writer_) {
+ Bridge(std::forward<Producer>(producer), writer_.GetConsumer());
+ }
+
+}
diff --git a/library/cpp/yson_pull/buffer.h b/library/cpp/yson_pull/buffer.h
new file mode 100644
index 0000000000..04c9220ef3
--- /dev/null
+++ b/library/cpp/yson_pull/buffer.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+#include <cstddef>
+
+namespace NYsonPull {
+ //! \brief A non-owning buffer model.
+ //!
+ //! Represents a \p pos pointer moving between \p begin and \p end.
+ template <typename T>
+ class buffer {
+ T* begin_ = nullptr;
+ T* pos_ = nullptr;
+ T* end_ = nullptr;
+
+ public:
+ T* begin() const noexcept {
+ return begin_;
+ }
+ T* pos() const noexcept {
+ return pos_;
+ }
+ T* end() const noexcept {
+ return end_;
+ }
+
+ //! \brief Amount of data after current position.
+ size_t available() const noexcept {
+ return end_ - pos_;
+ }
+
+ //! \brief Amount of data before current position.
+ size_t used() const noexcept {
+ return pos_ - begin_;
+ }
+
+ //! \brief Move current position \p nbytes forward.
+ void advance(size_t nbytes) noexcept {
+ Y_ASSERT(pos_ + nbytes <= end_);
+ pos_ += nbytes;
+ }
+
+ //! \brief Reset buffer pointers.
+ void reset(T* new_begin, T* new_end, T* new_pos) {
+ begin_ = new_begin;
+ pos_ = new_pos;
+ end_ = new_end;
+ }
+
+ //! \brief Reset buffer to beginning
+ void reset(T* new_begin, T* new_end) {
+ reset(new_begin, new_end, new_begin);
+ }
+ };
+
+ class output_buffer: public buffer<ui8> {
+ public:
+ //! \brief An output buffer is empty when there is no data written to it.
+ bool is_empty() const noexcept {
+ return pos() == begin();
+ }
+
+ //! \brief An output buffer is full when there is no space to write more data to it.
+ bool is_full() const noexcept {
+ return pos() == end();
+ }
+ };
+
+ class input_buffer: public buffer<const ui8> {
+ public:
+ //! An input stream is empty when there is no data to read in it.
+ bool is_empty() const noexcept {
+ return pos() == end();
+ }
+ };
+
+}
diff --git a/library/cpp/yson_pull/consumer.cpp b/library/cpp/yson_pull/consumer.cpp
new file mode 100644
index 0000000000..c238e0a6fb
--- /dev/null
+++ b/library/cpp/yson_pull/consumer.cpp
@@ -0,0 +1,83 @@
+#include "consumer.h"
+
+#include <library/cpp/yson_pull/detail/macros.h>
+
+using namespace NYsonPull;
+
+void IConsumer::OnScalar(const TScalar& value) {
+ switch (value.Type()) {
+ case EScalarType::Entity:
+ OnEntity();
+ break;
+
+ case EScalarType::Boolean:
+ OnScalarBoolean(value.AsBoolean());
+ break;
+
+ case EScalarType::Int64:
+ OnScalarInt64(value.AsInt64());
+ break;
+
+ case EScalarType::UInt64:
+ OnScalarUInt64(value.AsUInt64());
+ break;
+
+ case EScalarType::Float64:
+ OnScalarFloat64(value.AsFloat64());
+ break;
+
+ case EScalarType::String:
+ OnScalarString(value.AsString());
+ break;
+
+ default:
+ Y_UNREACHABLE();
+ }
+}
+
+void IConsumer::OnEvent(const TEvent& value) {
+ switch (value.Type()) {
+ case EEventType::BeginStream:
+ OnBeginStream();
+ break;
+
+ case EEventType::EndStream:
+ OnEndStream();
+ break;
+
+ case EEventType::BeginList:
+ OnBeginList();
+ break;
+
+ case EEventType::EndList:
+ OnEndList();
+ break;
+
+ case EEventType::BeginMap:
+ OnBeginMap();
+ break;
+
+ case EEventType::Key:
+ OnKey(value.AsString());
+ break;
+
+ case EEventType::EndMap:
+ OnEndMap();
+ break;
+
+ case EEventType::BeginAttributes:
+ OnBeginAttributes();
+ break;
+
+ case EEventType::EndAttributes:
+ OnEndAttributes();
+ break;
+
+ case EEventType::Scalar:
+ OnScalar(value.AsScalar());
+ break;
+
+ default:
+ Y_UNREACHABLE();
+ }
+}
diff --git a/library/cpp/yson_pull/consumer.h b/library/cpp/yson_pull/consumer.h
new file mode 100644
index 0000000000..f3b1398d4c
--- /dev/null
+++ b/library/cpp/yson_pull/consumer.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "event.h"
+
+#include <util/generic/strbuf.h>
+#include <util/system/types.h>
+
+namespace NYsonPull {
+ class IConsumer {
+ public:
+ virtual ~IConsumer() = default;
+
+ virtual void OnBeginStream() = 0;
+ virtual void OnEndStream() = 0;
+
+ virtual void OnBeginList() = 0;
+ virtual void OnEndList() = 0;
+
+ virtual void OnBeginMap() = 0;
+ virtual void OnEndMap() = 0;
+
+ virtual void OnBeginAttributes() = 0;
+ virtual void OnEndAttributes() = 0;
+
+ virtual void OnKey(TStringBuf name) = 0;
+
+ virtual void OnEntity() = 0;
+ virtual void OnScalarBoolean(bool value) = 0;
+ virtual void OnScalarInt64(i64 value) = 0;
+ virtual void OnScalarUInt64(ui64 value) = 0;
+ virtual void OnScalarFloat64(double value) = 0;
+ virtual void OnScalarString(TStringBuf value) = 0;
+
+ virtual void OnScalar(const TScalar& value);
+ virtual void OnEvent(const TEvent& value);
+ };
+}
diff --git a/library/cpp/yson_pull/cyson_enums.h b/library/cpp/yson_pull/cyson_enums.h
new file mode 100644
index 0000000000..315de97307
--- /dev/null
+++ b/library/cpp/yson_pull/cyson_enums.h
@@ -0,0 +1,47 @@
+#pragma once
+
+typedef enum yson_event_type {
+ YSON_EVENT_BEGIN_STREAM = 0,
+ YSON_EVENT_END_STREAM = 1,
+ YSON_EVENT_BEGIN_LIST = 2,
+ YSON_EVENT_END_LIST = 3,
+ YSON_EVENT_BEGIN_MAP = 4,
+ YSON_EVENT_END_MAP = 5,
+ YSON_EVENT_BEGIN_ATTRIBUTES = 6,
+ YSON_EVENT_END_ATTRIBUTES = 7,
+ YSON_EVENT_KEY = 8,
+ YSON_EVENT_SCALAR = 9,
+ YSON_EVENT_ERROR = 10
+} yson_event_type;
+
+typedef enum yson_scalar_type {
+ YSON_SCALAR_ENTITY = 0,
+ YSON_SCALAR_BOOLEAN = 1,
+ YSON_SCALAR_INT64 = 2,
+ YSON_SCALAR_UINT64 = 3,
+ YSON_SCALAR_FLOAT64 = 4,
+ YSON_SCALAR_STRING = 5
+} yson_scalar_type;
+
+typedef enum yson_input_stream_result {
+ YSON_INPUT_STREAM_RESULT_OK = 0,
+ YSON_INPUT_STREAM_RESULT_EOF = 1,
+ YSON_INPUT_STREAM_RESULT_ERROR = 2
+} yson_input_stream_result;
+
+typedef enum yson_output_stream_result {
+ YSON_OUTPUT_STREAM_RESULT_OK = 0,
+ YSON_OUTPUT_STREAM_RESULT_ERROR = 1
+} yson_output_stream_result;
+
+typedef enum yson_writer_result {
+ YSON_WRITER_RESULT_OK = 0,
+ YSON_WRITER_RESULT_BAD_STREAM = 1,
+ YSON_WRITER_RESULT_ERROR = 2
+} yson_writer_result;
+
+typedef enum yson_stream_type {
+ YSON_STREAM_TYPE_NODE = 0,
+ YSON_STREAM_TYPE_LIST_FRAGMENT = 1,
+ YSON_STREAM_TYPE_MAP_FRAGMENT = 2
+} yson_stream_type;
diff --git a/library/cpp/yson_pull/detail/byte_reader.h b/library/cpp/yson_pull/detail/byte_reader.h
new file mode 100644
index 0000000000..7cea50d323
--- /dev/null
+++ b/library/cpp/yson_pull/detail/byte_reader.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "cescape.h"
+#include "fail.h"
+#include "stream_counter.h"
+
+#include <library/cpp/yson_pull/input.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ template <class StreamCounter>
+ class byte_reader {
+ NYsonPull::NInput::IStream& stream_;
+ StreamCounter stream_counter_;
+
+ public:
+ byte_reader(NYsonPull::NInput::IStream& stream)
+ : stream_(stream)
+ {
+ }
+
+ // const-ness added to prevent direct stream mutation
+ const NYsonPull::NInput::IStream& stream() {
+ return stream_;
+ }
+
+ template <typename... Args>
+ ATTRIBUTE(noinline, cold)
+ void fail[[noreturn]](const char* msg, Args&&... args) {
+ NYsonPull::NDetail::fail(
+ stream_counter_.info(),
+ msg,
+ std::forward<Args>(args)...);
+ }
+
+ template <bool AllowFinish>
+ void fill_buffer() {
+ stream_.fill_buffer();
+
+ if (!AllowFinish) {
+ auto& buf = stream_.buffer();
+ if (Y_UNLIKELY(buf.is_empty() && stream_.at_end())) {
+ fail("Premature end of stream");
+ }
+ }
+ }
+
+ void fill_buffer() {
+ return fill_buffer<true>();
+ }
+
+ template <bool AllowFinish>
+ ui8 get_byte() {
+ fill_buffer<AllowFinish>();
+ auto& buf = stream_.buffer();
+ return !buf.is_empty()
+ ? *buf.pos()
+ : ui8{'\0'};
+ }
+
+ ui8 get_byte() {
+ return get_byte<true>();
+ }
+
+ void advance(size_t bytes) {
+ auto& buf = stream_.buffer();
+ stream_counter_.update(
+ buf.pos(),
+ buf.pos() + bytes);
+ buf.advance(bytes);
+ }
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/detail/byte_writer.h b/library/cpp/yson_pull/detail/byte_writer.h
new file mode 100644
index 0000000000..dc1d4b4b96
--- /dev/null
+++ b/library/cpp/yson_pull/detail/byte_writer.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include "macros.h"
+
+#include <library/cpp/yson_pull/output.h>
+
+#include <util/system/types.h>
+
+#include <cstddef>
+#include <cstring>
+
+namespace NYsonPull {
+ namespace NDetail {
+ template <class StreamCounter>
+ class byte_writer {
+ NYsonPull::NOutput::IStream& stream_;
+ StreamCounter stream_counter_;
+
+ public:
+ byte_writer(NYsonPull::NOutput::IStream& stream)
+ : stream_(stream)
+ {
+ }
+
+ // const-ness added to prevent direct stream mutation
+ const NYsonPull::NOutput::IStream& stream() {
+ return stream_;
+ }
+ const StreamCounter& counter() {
+ return stream_counter_;
+ }
+
+ void flush_buffer() {
+ stream_.flush_buffer();
+ }
+
+ void advance(size_t bytes) {
+ auto& buf = stream_.buffer();
+ stream_counter_.update(
+ buf.pos(),
+ buf.pos() + bytes);
+ buf.advance(bytes);
+ }
+
+ void write(ui8 c) {
+ auto& buf = stream_.buffer();
+ if (Y_LIKELY(!buf.is_full())) {
+ *buf.pos() = c;
+ advance(1);
+ } else {
+ auto ptr = reinterpret_cast<char*>(&c);
+ stream_counter_.update(&c, &c + 1);
+ stream_.flush_buffer({ptr, 1});
+ }
+ }
+
+ void write(const ui8* data, size_t size) {
+ auto& buf = stream_.buffer();
+ auto free_buf = buf.available();
+ if (Y_LIKELY(size < free_buf)) {
+ ::memcpy(buf.pos(), data, size);
+ advance(size);
+ } else {
+ if (!buf.is_full()) {
+ ::memcpy(buf.pos(), data, free_buf);
+ advance(free_buf);
+ data += free_buf;
+ size -= free_buf;
+ }
+ stream_counter_.update(data, data + size);
+ stream_.flush_buffer({reinterpret_cast<const char*>(data),
+ size});
+ }
+ }
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/detail/cescape.h b/library/cpp/yson_pull/detail/cescape.h
new file mode 100644
index 0000000000..1ea150e69a
--- /dev/null
+++ b/library/cpp/yson_pull/detail/cescape.h
@@ -0,0 +1,143 @@
+#pragma once
+
+#include "byte_writer.h"
+#include "cescape_decode.h"
+#include "cescape_encode.h"
+#include "macros.h"
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+/* REFERENCES FOR ESCAPE SEQUENCE INTERPRETATION:
+ * C99 p. 6.4.3 Universal character names.
+ * C99 p. 6.4.4.4 Character constants.
+ *
+ * <simple-escape-sequence> ::= {
+ * \' , \" , \? , \\ ,
+ * \a , \b , \f , \n , \r , \t , \v
+ * }
+ *
+ * <octal-escape-sequence> ::= \ <octal-digit> {1, 3}
+ * <hexadecimal-escape-sequence> ::= \x <hexadecimal-digit> +
+ * <universal-character-name> ::= \u <hexadecimal-digit> {4}
+ * || \U <hexadecimal-digit> {8}
+ *
+ * NOTE (6.4.4.4.7):
+ * Each octal or hexadecimal escape sequence is the longest sequence of characters that can
+ * constitute the escape sequence.
+ *
+ * THEREFORE:
+ * - Octal escape sequence spans until rightmost non-octal-digit character.
+ * - Octal escape sequence always terminates after three octal digits.
+ * - Hexadecimal escape sequence spans until rightmost non-hexadecimal-digit character.
+ * - Universal character name consists of exactly 4 or 8 hexadecimal digit.
+ *
+ */
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NCEscape {
+ inline void encode(TString& dest, TStringBuf data) {
+ NImpl::escape_impl(
+ reinterpret_cast<const ui8*>(data.data()),
+ data.size(),
+ [&](const ui8* str, size_t size) {
+ dest.append(
+ reinterpret_cast<const char*>(str),
+ size);
+ });
+ }
+
+ // dest must have at least 4*data.size() bytes available
+ inline size_t encode(ui8* dest, TStringBuf data) {
+ auto* dest_begin = dest;
+ NImpl::escape_impl(
+ reinterpret_cast<const ui8*>(data.data()),
+ data.size(),
+ [&](const ui8* str, size_t size) {
+ ::memcpy(dest, str, size);
+ dest += size;
+ });
+ return dest - dest_begin;
+ }
+
+ template <typename U>
+ void encode(byte_writer<U>& dest, TStringBuf data) {
+ auto& buffer = dest.stream().buffer();
+ if (Y_LIKELY(buffer.available() >= data.size() * 4)) {
+ auto size = encode(buffer.pos(), data);
+ dest.advance(size);
+ } else {
+ NImpl::escape_impl(
+ reinterpret_cast<const ui8*>(data.data()),
+ data.size(),
+ [&](const ui8* str, size_t size) {
+ dest.write(str, size);
+ });
+ }
+ }
+
+ inline TString encode(TStringBuf data) {
+ TString result;
+ result.reserve(data.size());
+ encode(result, data);
+ return result;
+ }
+
+ inline void decode(TString& dest, TStringBuf data) {
+ NImpl::unescape_impl(
+ reinterpret_cast<const ui8*>(data.begin()),
+ reinterpret_cast<const ui8*>(data.end()),
+ [&](ui8 c) {
+ dest += c;
+ },
+ [&](const ui8* p, size_t len) {
+ dest.append(reinterpret_cast<const char*>(p), len);
+ });
+ }
+
+ inline void decode_inplace(TVector<ui8>& data) {
+ auto* out = static_cast<ui8*>(
+ ::memchr(data.data(), '\\', data.size()));
+ if (out == nullptr) {
+ return;
+ }
+ NImpl::unescape_impl(
+ out,
+ data.data() + data.size(),
+ [&](ui8 c) {
+ *out++ = c;
+ },
+ [&](const ui8* p, size_t len) {
+ ::memmove(out, p, len);
+ out += len;
+ });
+ data.resize(out - &data[0]);
+ }
+
+ inline TString decode(TStringBuf data) {
+ TString result;
+ result.reserve(data.size());
+ decode(result, data);
+ return result;
+ }
+
+ ATTRIBUTE(noinline, cold)
+ inline TString quote(TStringBuf str) {
+ TString result;
+ result.reserve(str.size() + 16);
+ result += '"';
+ encode(result, str);
+ result += '"';
+ return result;
+ }
+
+ ATTRIBUTE(noinline, cold)
+ inline TString quote(ui8 ch) {
+ char c = ch;
+ return quote(TStringBuf(&c, 1));
+ }
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/cescape_decode.h b/library/cpp/yson_pull/detail/cescape_decode.h
new file mode 100644
index 0000000000..2ee5dd9500
--- /dev/null
+++ b/library/cpp/yson_pull/detail/cescape_decode.h
@@ -0,0 +1,154 @@
+#pragma once
+
+#include <util/system/types.h>
+
+#include <algorithm>
+#include <cstring>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NCEscape {
+ namespace NImpl {
+ inline ui8 as_digit(ui8 c) {
+ return c - ui8{'0'};
+ }
+
+ inline ui8 as_hexdigit(ui8 c) {
+ static constexpr ui8 hex_decode_map[256] = {
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255,
+ 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255};
+
+ return hex_decode_map[c];
+ }
+
+ inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) {
+ auto digit = ui8{0};
+ while (n-- && (digit = as_digit(*p)) < 8) {
+ result = result * 8 + digit;
+ ++p;
+ }
+ return p;
+ }
+
+ inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) {
+ auto digit = ui8{0};
+ while (n-- && (digit = as_hexdigit(*p)) < 16) {
+ result = result * 16 + digit;
+ ++p;
+ }
+ return p;
+ }
+
+ inline const ui8* unescape_char_and_advance(
+ ui8& result,
+ const ui8* p,
+ const ui8* end) {
+ switch (*p) {
+ default:
+ result = *p;
+ ++p;
+ break;
+ case 'b':
+ result = '\b';
+ ++p;
+ break;
+ case 'f':
+ result = '\f';
+ ++p;
+ break;
+ case 'n':
+ result = '\n';
+ ++p;
+ break;
+ case 'r':
+ result = '\r';
+ ++p;
+ break;
+ case 't':
+ result = '\t';
+ ++p;
+ break;
+
+ case 'x': {
+ ++p;
+ result = 0;
+ auto* next = read_hex(
+ result,
+ p, std::min<ptrdiff_t>(2, end - p));
+ if (next > p) {
+ p = next;
+ } else {
+ result = 'x';
+ }
+ } break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ result = 0;
+ p = read_oct(
+ result,
+ p, std::min<ptrdiff_t>(3, end - p));
+ break;
+
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ result = 0;
+ p = read_oct(
+ result,
+ p, std::min<ptrdiff_t>(2, end - p));
+ break;
+ }
+ return p;
+ }
+
+ template <typename T, typename U>
+ inline void unescape_impl(
+ const ui8* p,
+ const ui8* end,
+ T&& consume_one,
+ U&& consume_span) {
+ while (p < end) {
+ auto* escaped = static_cast<const ui8*>(
+ ::memchr(p, '\\', end - p));
+ if (escaped == nullptr) {
+ consume_span(p, end - p);
+ return;
+ } else {
+ consume_span(p, escaped - p);
+ auto c = ui8{'\\'};
+ p = escaped + 1;
+ if (p < end) {
+ p = unescape_char_and_advance(c, p, end);
+ }
+ consume_one(c);
+ }
+ }
+ }
+ }
+ } // namespace NCEscape
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/cescape_encode.h b/library/cpp/yson_pull/detail/cescape_encode.h
new file mode 100644
index 0000000000..bf5765f1d9
--- /dev/null
+++ b/library/cpp/yson_pull/detail/cescape_encode.h
@@ -0,0 +1,114 @@
+#pragma once
+
+#include <util/system/types.h>
+
+// Whether to ensure strict ASCII compatibility
+// Turns UTF-8 strings into unreadable garbage for no known reason
+//#define CESCAPE_STRICT_ASCII
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NCEscape {
+ namespace NImpl {
+ inline ui8 hex_digit(ui8 value) {
+ constexpr ui8 hex_digits[] = "0123456789ABCDEF";
+ return hex_digits[value];
+ }
+
+ inline ui8 oct_digit(ui8 value) {
+ return '0' + value;
+ }
+
+ inline bool is_printable(ui8 c) {
+#ifdef CESCAPE_STRICT_ASCII
+ return c >= 32 && c <= 126;
+#else
+ return c >= 32;
+#endif
+ }
+
+ inline bool is_hex_digit(ui8 c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+ }
+
+ inline bool is_oct_digit(ui8 c) {
+ return c >= '0' && c <= '7';
+ }
+
+ constexpr size_t ESCAPE_C_BUFFER_SIZE = 4;
+
+ inline size_t escape_char(
+ ui8 c,
+ ui8 next,
+ ui8 r[ESCAPE_C_BUFFER_SIZE]) {
+ // (1) Printable characters go as-is, except backslash and double quote.
+ // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
+ // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
+ if (c == '\"') {
+ r[0] = '\\';
+ r[1] = '\"';
+ return 2;
+ } else if (c == '\\') {
+ r[0] = '\\';
+ r[1] = '\\';
+ return 2;
+ } else if (is_printable(c)) {
+ r[0] = c;
+ return 1;
+ } else if (c == '\r') {
+ r[0] = '\\';
+ r[1] = 'r';
+ return 2;
+ } else if (c == '\n') {
+ r[0] = '\\';
+ r[1] = 'n';
+ return 2;
+ } else if (c == '\t') {
+ r[0] = '\\';
+ r[1] = 't';
+ return 2;
+ } else if (c < 8 && !is_oct_digit(next)) {
+ r[0] = '\\';
+ r[1] = oct_digit(c);
+ return 2;
+ } else if (!is_hex_digit(next)) {
+ r[0] = '\\';
+ r[1] = 'x';
+ r[2] = hex_digit((c & 0xF0) >> 4);
+ r[3] = hex_digit((c & 0x0F) >> 0);
+ return 4;
+ } else {
+ r[0] = '\\';
+ r[1] = oct_digit((c & 0700) >> 6);
+ r[2] = oct_digit((c & 0070) >> 3);
+ r[3] = oct_digit((c & 0007) >> 0);
+ return 4;
+ }
+ }
+
+ template <typename T>
+ inline void escape_impl(const ui8* str, size_t len, T&& consume) {
+ ui8 buffer[ESCAPE_C_BUFFER_SIZE];
+
+ size_t i, j;
+ for (i = 0, j = 0; i < len; ++i) {
+ auto next_char = i + 1 < len ? str[i + 1] : 0;
+ size_t rlen = escape_char(str[i], next_char, buffer);
+
+ if (rlen > 1) {
+ consume(str + j, i - j);
+ j = i + 1;
+ consume(buffer, rlen);
+ }
+ }
+
+ if (j > 0) {
+ consume(str + j, len - j);
+ } else {
+ consume(str, len);
+ }
+ }
+ }
+ } // namespace NCEscape
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/fail.h b/library/cpp/yson_pull/detail/fail.h
new file mode 100644
index 0000000000..6937612d0b
--- /dev/null
+++ b/library/cpp/yson_pull/detail/fail.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "format_string.h"
+#include "macros.h"
+
+#include <library/cpp/yson_pull/exceptions.h>
+#include <library/cpp/yson_pull/position_info.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ template <typename... Args>
+ ATTRIBUTE(noreturn, noinline, cold)
+ void fail(
+ const TPositionInfo& info,
+ Args&&... args) {
+ auto formatted_message = format_string(std::forward<Args>(args)...);
+ throw NException::TBadInput(formatted_message, info);
+ }
+ }
+}
diff --git a/library/cpp/yson_pull/detail/format_string.h b/library/cpp/yson_pull/detail/format_string.h
new file mode 100644
index 0000000000..683fd1bf36
--- /dev/null
+++ b/library/cpp/yson_pull/detail/format_string.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/string/builder.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NImpl {
+ inline void apply_args(TStringBuilder&) {
+ }
+
+ template <typename T, typename... Args>
+ inline void apply_args(TStringBuilder& builder, T&& arg, Args&&... args) {
+ apply_args(builder << arg, std::forward<Args>(args)...);
+ }
+ }
+
+ template <typename... Args>
+ TString format_string(Args&&... args) {
+ TStringBuilder builder;
+ NImpl::apply_args(builder, std::forward<Args>(args)...);
+ return TString(std::move(builder));
+ }
+ }
+}
diff --git a/library/cpp/yson_pull/detail/input/buffered.h b/library/cpp/yson_pull/detail/input/buffered.h
new file mode 100644
index 0000000000..9b1482577f
--- /dev/null
+++ b/library/cpp/yson_pull/detail/input/buffered.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <library/cpp/yson_pull/detail/macros.h>
+
+#include <library/cpp/yson_pull/exceptions.h>
+#include <library/cpp/yson_pull/input.h>
+
+#include <cstdio>
+#include <memory>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NInput {
+ class TBuffered: public NYsonPull::NInput::IStream {
+ TArrayHolder<ui8> buffer_;
+ size_t size_;
+
+ public:
+ explicit TBuffered(size_t buffer_size)
+ : buffer_{new ui8[buffer_size]}
+ , size_{buffer_size} {
+ }
+
+ protected:
+ ui8* buffer_data() const {
+ return buffer_.Get();
+ }
+
+ size_t buffer_size() const {
+ return size_;
+ }
+ };
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/input/stdio_file.h b/library/cpp/yson_pull/detail/input/stdio_file.h
new file mode 100644
index 0000000000..c412b7e59b
--- /dev/null
+++ b/library/cpp/yson_pull/detail/input/stdio_file.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include "buffered.h"
+
+#include <library/cpp/yson_pull/detail/macros.h>
+
+#include <library/cpp/yson_pull/exceptions.h>
+#include <library/cpp/yson_pull/input.h>
+
+#include <cstdio>
+#include <memory>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NInput {
+ class TStdioFile: public TBuffered {
+ FILE* file_;
+
+ public:
+ TStdioFile(FILE* file, size_t buffer_size)
+ : TBuffered(buffer_size)
+ , file_{file} {
+ }
+
+ protected:
+ result do_fill_buffer() override {
+ auto nread = ::fread(buffer_data(), 1, buffer_size(), file_);
+ if (Y_UNLIKELY(nread == 0)) {
+ if (ferror(file_)) {
+ throw NException::TSystemError();
+ }
+ if (feof(file_)) {
+ return result::at_end;
+ }
+ }
+ buffer().reset(buffer_data(), buffer_data() + nread);
+ return result::have_more_data;
+ }
+ };
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/input/stream.h b/library/cpp/yson_pull/detail/input/stream.h
new file mode 100644
index 0000000000..791cd5a3f5
--- /dev/null
+++ b/library/cpp/yson_pull/detail/input/stream.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <library/cpp/yson_pull/detail/macros.h>
+
+#include <library/cpp/yson_pull/input.h>
+
+#include <util/stream/buffered.h>
+#include <util/stream/file.h>
+#include <util/stream/zerocopy.h>
+#include <util/system/file.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NInput {
+ class TStreamBase: public NYsonPull::NInput::IStream {
+ protected:
+ result DoFillBufferFrom(IZeroCopyInput& input) {
+ void* ptr = nullptr;
+ size_t size = input.Next(&ptr);
+ if (Y_UNLIKELY(size == 0)) {
+ return result::at_end;
+ }
+ buffer().reset(static_cast<ui8*>(ptr), static_cast<ui8*>(ptr) + size);
+ return result::have_more_data;
+ }
+ };
+
+ class TZeroCopy: public TStreamBase {
+ IZeroCopyInput* Input;
+
+ public:
+ explicit TZeroCopy(IZeroCopyInput* input)
+ : Input(input)
+ {
+ }
+
+ protected:
+ result do_fill_buffer() override {
+ return DoFillBufferFrom(*Input);
+ }
+ };
+
+ template <typename TBuffered>
+ class TOwned: public TStreamBase {
+ TBuffered Input;
+
+ public:
+ template <typename... Args>
+ explicit TOwned(Args&&... args)
+ : Input(std::forward<Args>(args)...)
+ {
+ }
+
+ protected:
+ result do_fill_buffer() override {
+ return DoFillBufferFrom(Input);
+ }
+ };
+
+ class TFHandle: public TOwned<TFileInput> {
+ public:
+ TFHandle(int fd, size_t buffer_size)
+ : TOwned<TFileInput>(Duplicate(fd), buffer_size)
+ {
+ }
+ };
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/lexer_base.h b/library/cpp/yson_pull/detail/lexer_base.h
new file mode 100644
index 0000000000..572bdb3d18
--- /dev/null
+++ b/library/cpp/yson_pull/detail/lexer_base.h
@@ -0,0 +1,343 @@
+#pragma once
+
+#include "byte_reader.h"
+#include "cescape.h"
+#include "macros.h"
+#include "number.h"
+#include "percent_scalar.h"
+#include "stream_counter.h"
+#include "varint.h"
+
+#include <util/generic/maybe.h>
+#include <util/generic/vector.h>
+#include <util/string/cast.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ template <bool EnableLinePositionInfo>
+ class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> {
+ using Base = byte_reader<
+ stream_counter<EnableLinePositionInfo>>;
+
+ TVector<ui8> token_buffer_;
+ TMaybe<size_t> memory_limit_;
+
+ public:
+ lexer_base(
+ NYsonPull::NInput::IStream& buffer,
+ TMaybe<size_t> memory_limit)
+ : Base(buffer)
+ , memory_limit_{memory_limit} {
+ }
+
+ ATTRIBUTE(noinline, hot)
+ ui8 skip_space_and_get_byte() {
+ auto& buf = Base::stream().buffer();
+ if (Y_LIKELY(!buf.is_empty())) {
+ auto ch = *buf.pos();
+ if (Y_LIKELY(!is_space(ch))) {
+ return ch;
+ }
+ }
+ return skip_space_and_get_byte_fallback();
+ }
+
+ ATTRIBUTE(hot)
+ ui8 get_byte() {
+ auto& buf = Base::stream().buffer();
+ if (Y_LIKELY(!buf.is_empty())) {
+ return *buf.pos();
+ }
+ return Base::get_byte();
+ }
+
+ number read_numeric() {
+ token_buffer_.clear();
+ auto type = number_type::int64;
+ while (true) {
+ auto ch = this->Base::template get_byte<true>();
+ if (isdigit(ch) || ch == '+' || ch == '-') {
+ token_buffer_.push_back(ch);
+ } else if (ch == '.' || ch == 'e' || ch == 'E') {
+ token_buffer_.push_back(ch);
+ type = number_type::float64;
+ } else if (ch == 'u') {
+ token_buffer_.push_back(ch);
+ type = number_type::uint64;
+ } else if (Y_UNLIKELY(isalpha(ch))) {
+ COLD_BLOCK_BYVALUE
+ Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal");
+ COLD_BLOCK_END
+ } else {
+ break;
+ }
+ check_memory_limit();
+ Base::advance(1);
+ }
+
+ auto str = token_buffer();
+ try {
+ switch (type) {
+ case number_type::float64:
+ return FromString<double>(str);
+ case number_type::int64:
+ return FromString<i64>(str);
+ case number_type::uint64:
+ str.Chop(1); // 'u' suffix
+ return FromString<ui64>(str);
+ }
+ Y_UNREACHABLE();
+ } catch (const std::exception& err) {
+ Base::fail(err.what());
+ }
+ }
+
+ TStringBuf read_quoted_string() {
+ auto count_trailing_slashes = [](ui8* begin, ui8* end) {
+ auto count = size_t{0};
+ if (begin < end) {
+ for (auto p = end - 1; p >= begin && *p == '\\'; --p) {
+ ++count;
+ }
+ }
+ return count;
+ };
+
+ token_buffer_.clear();
+ auto& buf = Base::stream().buffer();
+ while (true) {
+ this->Base::template fill_buffer<false>();
+ auto* quote = reinterpret_cast<const ui8*>(
+ ::memchr(buf.pos(), '"', buf.available()));
+ if (quote == nullptr) {
+ token_buffer_.insert(
+ token_buffer_.end(),
+ buf.pos(),
+ buf.end());
+ Base::advance(buf.available());
+ continue;
+ }
+
+ token_buffer_.insert(
+ token_buffer_.end(),
+ buf.pos(),
+ quote);
+ Base::advance(quote - buf.pos() + 1); // +1 for the quote itself
+
+ // We must count the number of '\' at the end of StringValue
+ // to check if it's not \"
+ int slash_count = count_trailing_slashes(
+ token_buffer_.data(),
+ token_buffer_.data() + token_buffer_.size());
+ if (slash_count % 2 == 0) {
+ break;
+ } else {
+ token_buffer_.push_back('"');
+ }
+ check_memory_limit();
+ }
+
+ NCEscape::decode_inplace(token_buffer_);
+ return token_buffer();
+ }
+
+ TStringBuf read_unquoted_string() {
+ token_buffer_.clear();
+ while (true) {
+ auto ch = this->Base::template get_byte<true>();
+ if (isalpha(ch) || isdigit(ch) ||
+ ch == '_' || ch == '-' || ch == '%' || ch == '.') {
+ token_buffer_.push_back(ch);
+ } else {
+ break;
+ }
+ check_memory_limit();
+ Base::advance(1);
+ }
+ return token_buffer();
+ }
+
+ ATTRIBUTE(noinline, hot)
+ TStringBuf read_binary_string() {
+ auto slength = NVarInt::read<i32>(*this);
+ if (Y_UNLIKELY(slength < 0)) {
+ COLD_BLOCK_BYVALUE
+ Base::fail("Negative binary string literal length ", slength);
+ COLD_BLOCK_END
+ }
+ auto length = static_cast<ui32>(slength);
+
+ auto& buf = Base::stream().buffer();
+ if (Y_LIKELY(buf.available() >= length)) {
+ auto result = TStringBuf{
+ reinterpret_cast<const char*>(buf.pos()),
+ length};
+ Base::advance(length);
+ return result;
+ } else { // reading in Buffer
+ return read_binary_string_fallback(length);
+ }
+ }
+
+ ATTRIBUTE(noinline)
+ TStringBuf read_binary_string_fallback(size_t length) {
+ auto& buf = Base::stream().buffer();
+ auto needToRead = length;
+ token_buffer_.clear();
+ while (needToRead) {
+ this->Base::template fill_buffer<false>();
+ auto chunk_size = std::min(needToRead, buf.available());
+
+ token_buffer_.insert(
+ token_buffer_.end(),
+ buf.pos(),
+ buf.pos() + chunk_size);
+ check_memory_limit();
+ needToRead -= chunk_size;
+ Base::advance(chunk_size);
+ }
+ return token_buffer();
+ }
+
+ percent_scalar read_percent_scalar() {
+ auto throw_incorrect_percent_scalar = [&]() {
+ Base::fail("Incorrect %-literal prefix ", NCEscape::quote(token_buffer()));
+ };
+
+ auto assert_literal = [&](TStringBuf literal) -> void {
+ for (size_t i = 2; i < literal.size(); ++i) {
+ token_buffer_.push_back(this->Base::template get_byte<false>());
+ Base::advance(1);
+ if (Y_UNLIKELY(token_buffer_.back() != literal[i])) {
+ throw_incorrect_percent_scalar();
+ }
+ }
+ };
+
+ token_buffer_.clear();
+ token_buffer_.push_back(this->Base::template get_byte<false>());
+ Base::advance(1);
+
+ switch (token_buffer_[0]) {
+ case 't':
+ assert_literal(percent_scalar::true_literal);
+ return percent_scalar(true);
+ case 'f':
+ assert_literal(percent_scalar::false_literal);
+ return percent_scalar(false);
+ case 'n':
+ assert_literal(percent_scalar::nan_literal);
+ return percent_scalar(std::numeric_limits<double>::quiet_NaN());
+ case 'i':
+ assert_literal(percent_scalar::positive_inf_literal);
+ return percent_scalar(std::numeric_limits<double>::infinity());
+ case '-':
+ assert_literal(percent_scalar::negative_inf_literal);
+ return percent_scalar(-std::numeric_limits<double>::infinity());
+ default:
+ throw_incorrect_percent_scalar();
+ }
+
+ Y_UNREACHABLE();
+ }
+
+ i64 read_binary_int64() {
+ return NVarInt::read<i64>(*this);
+ }
+
+ ui64 read_binary_uint64() {
+ return NVarInt::read<ui64>(*this);
+ }
+
+ double read_binary_double() {
+ union {
+ double as_double;
+ ui8 as_bytes[sizeof(double)];
+ } data;
+ static_assert(sizeof(data) == sizeof(double), "bad union size");
+
+ auto needToRead = sizeof(double);
+
+ auto& buf = Base::stream().buffer();
+ while (needToRead != 0) {
+ Base::fill_buffer();
+
+ auto chunk_size = std::min(needToRead, buf.available());
+ if (chunk_size == 0) {
+ Base::fail("Error parsing binary double literal");
+ }
+ std::copy(
+ buf.pos(),
+ buf.pos() + chunk_size,
+ data.as_bytes + (sizeof(double) - needToRead));
+ needToRead -= chunk_size;
+ Base::advance(chunk_size);
+ }
+ return data.as_double;
+ }
+
+ private:
+ static bool is_space(ui8 ch) {
+ static const ui8 lookupTable[] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ return lookupTable[ch];
+ }
+
+ ATTRIBUTE(noinline, cold)
+ ui8 skip_space_and_get_byte_fallback() {
+ auto& buf = Base::stream().buffer();
+ while (true) {
+ // FIXME
+ if (buf.is_empty()) {
+ if (Base::stream().at_end()) {
+ return '\0';
+ }
+ Base::fill_buffer();
+ } else {
+ if (!is_space(*buf.pos())) {
+ break;
+ }
+ Base::advance(1);
+ }
+ }
+ return Base::get_byte();
+ }
+
+ void check_memory_limit() {
+ if (Y_UNLIKELY(memory_limit_ && token_buffer_.capacity() > *memory_limit_)) {
+ COLD_BLOCK_BYVALUE
+ Base::fail(
+ "Memory limit exceeded while parsing YSON stream: "
+ "allocated ",
+ token_buffer_.capacity(),
+ ", limit ", *memory_limit_);
+ COLD_BLOCK_END
+ }
+ }
+
+ TStringBuf token_buffer() const {
+ auto* begin = reinterpret_cast<const char*>(token_buffer_.data());
+ return {begin, token_buffer_.size()};
+ }
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/detail/macros.h b/library/cpp/yson_pull/detail/macros.h
new file mode 100644
index 0000000000..7243f9cfe1
--- /dev/null
+++ b/library/cpp/yson_pull/detail/macros.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <util/system/compiler.h>
+
+#if defined(__GNUC__)
+#define ATTRIBUTE(args...) __attribute__((args))
+#else
+#define ATTRIBUTE(...)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define COLD_BLOCK_BYVALUE [=]() ATTRIBUTE(noinline, cold) {
+#define COLD_BLOCK_BYREF [&]() ATTRIBUTE(noinline, cold) {
+#define COLD_BLOCK_END \
+ } \
+ ();
+#else
+// Clang does not support gnu-style attributes on lambda functions yet
+#define COLD_BLOCK_BYVALUE [=]() {
+#define COLD_BLOCK_BYREF [&]() {
+#define COLD_BLOCK_END \
+ } \
+ ();
+#endif
diff --git a/library/cpp/yson_pull/detail/number.h b/library/cpp/yson_pull/detail/number.h
new file mode 100644
index 0000000000..5595f55e05
--- /dev/null
+++ b/library/cpp/yson_pull/detail/number.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <util/system/types.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ enum class number_type {
+ float64,
+ uint64,
+ int64
+ };
+
+ struct number {
+ number_type type;
+ union {
+ double as_float64;
+ ui64 as_uint64;
+ i64 as_int64;
+ } value;
+
+ number(double v) {
+ type = number_type::float64;
+ value.as_float64 = v;
+ }
+
+ number(i64 v) {
+ type = number_type::int64;
+ value.as_int64 = v;
+ }
+
+ number(ui64 v) {
+ type = number_type::uint64;
+ value.as_uint64 = v;
+ }
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/detail/output/buffered.h b/library/cpp/yson_pull/detail/output/buffered.h
new file mode 100644
index 0000000000..475cf34785
--- /dev/null
+++ b/library/cpp/yson_pull/detail/output/buffered.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <library/cpp/yson_pull/detail/macros.h>
+
+#include <library/cpp/yson_pull/output.h>
+
+#include <util/generic/strbuf.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NOutput {
+ template <typename T>
+ class TBuffered: public NYsonPull::NOutput::IStream {
+ TArrayHolder<ui8> buffer_;
+ size_t size_;
+
+ public:
+ TBuffered(size_t buffer_size)
+ : buffer_{new ui8[buffer_size]}
+ , size_{buffer_size} {
+ reset_buffer();
+ }
+
+ protected:
+ void do_flush_buffer(TStringBuf extra) override {
+ auto& buf = buffer();
+ if (!buf.is_empty()) {
+ do_write({reinterpret_cast<const char*>(buf.begin()), buf.used()});
+ reset_buffer();
+ }
+ if (extra.size() >= buf.available()) {
+ do_write(extra);
+ } else if (extra.size() > 0) {
+ ::memcpy(buf.pos(), extra.data(), extra.size());
+ buf.advance(extra.size());
+ }
+ }
+
+ private:
+ void do_write(TStringBuf data) {
+ // CRTP dispatch
+ static_cast<T*>(this)->write(data);
+ }
+
+ void reset_buffer() {
+ buffer().reset(buffer_.Get(), buffer_.Get() + size_);
+ }
+ };
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/output/stdio_file.h b/library/cpp/yson_pull/detail/output/stdio_file.h
new file mode 100644
index 0000000000..03f2b40dc5
--- /dev/null
+++ b/library/cpp/yson_pull/detail/output/stdio_file.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "buffered.h"
+
+#include <library/cpp/yson_pull/detail/macros.h>
+
+#include <library/cpp/yson_pull/exceptions.h>
+
+#include <cstdio>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NOutput {
+ class TStdioFile: public TBuffered<TStdioFile> {
+ FILE* file_;
+
+ public:
+ TStdioFile(FILE* file, size_t buffer_size)
+ : TBuffered<TStdioFile>(buffer_size)
+ , file_(file)
+ {
+ }
+
+ void write(TStringBuf data) {
+ auto nwritten = ::fwrite(data.data(), 1, data.size(), file_);
+ if (Y_UNLIKELY(static_cast<size_t>(nwritten) != data.size())) {
+ throw NException::TSystemError();
+ }
+ }
+ };
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/output/stream.h b/library/cpp/yson_pull/detail/output/stream.h
new file mode 100644
index 0000000000..d4810f3353
--- /dev/null
+++ b/library/cpp/yson_pull/detail/output/stream.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "buffered.h"
+
+#include <library/cpp/yson_pull/detail/macros.h>
+#include <library/cpp/yson_pull/exceptions.h>
+
+#include <util/stream/output.h>
+#include <util/stream/file.h>
+#include <util/system/file.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NOutput {
+ class TStream: public TBuffered<TStream> {
+ IOutputStream* Output;
+
+ public:
+ TStream(IOutputStream* output, size_t buffer_size)
+ : TBuffered<TStream>(buffer_size)
+ , Output(output)
+ {
+ }
+
+ void write(TStringBuf data) {
+ Output->Write(data);
+ }
+ };
+
+ template <typename TOutput>
+ class TOwned: public TBuffered<TOwned<TOutput>> {
+ TOutput Output;
+
+ public:
+ template <typename... Args>
+ TOwned(size_t buffer_size, Args&&... args)
+ : TBuffered<TOwned>(buffer_size)
+ , Output(std::forward<Args>(args)...)
+ {
+ }
+
+ void write(TStringBuf data) {
+ Output.Write(data);
+ }
+ };
+
+ class TFHandle: public TOwned<TUnbufferedFileOutput> {
+ public:
+ TFHandle(int fd, size_t buffer_size)
+ : TOwned<TUnbufferedFileOutput>(buffer_size, Duplicate(fd))
+ {
+ }
+ };
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/percent_scalar.h b/library/cpp/yson_pull/detail/percent_scalar.h
new file mode 100644
index 0000000000..ff4571842e
--- /dev/null
+++ b/library/cpp/yson_pull/detail/percent_scalar.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+namespace NYsonPull::NDetail {
+ enum class percent_scalar_type {
+ boolean,
+ float64
+ };
+
+ struct percent_scalar {
+ //! Text boolean literals
+ static constexpr TStringBuf true_literal = "%true";
+ static constexpr TStringBuf false_literal = "%false";
+ //! Text floating-point literals
+ static constexpr TStringBuf nan_literal = "%nan";
+ static constexpr TStringBuf positive_inf_literal = "%inf";
+ static constexpr TStringBuf negative_inf_literal = "%-inf";
+
+ percent_scalar_type type;
+ union {
+ double as_float64;
+ bool as_boolean;
+ } value;
+
+ percent_scalar(double v) {
+ type = percent_scalar_type::float64;
+ value.as_float64 = v;
+ }
+
+ percent_scalar(bool v) {
+ type = percent_scalar_type::boolean;
+ value.as_boolean = v;
+ }
+ };
+}
diff --git a/library/cpp/yson_pull/detail/reader.h b/library/cpp/yson_pull/detail/reader.h
new file mode 100644
index 0000000000..0e02396358
--- /dev/null
+++ b/library/cpp/yson_pull/detail/reader.h
@@ -0,0 +1,677 @@
+#pragma once
+
+#include "lexer_base.h"
+#include "symbols.h"
+
+#include <library/cpp/yson_pull/reader.h>
+
+#include <util/generic/maybe.h>
+#include <util/generic/vector.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ /*! \internal */
+ ////////////////////////////////////////////////////////////////////////////////
+
+ enum class special_token : ui8 {
+ // Special values:
+ // YSON
+ semicolon = 0, // ;
+ equals = 1, // =
+ hash = 2, // #
+ left_bracket = 3, // [
+ right_bracket = 4, // ]
+ left_brace = 5, // {
+ right_brace = 6, // }
+ left_angle = 7, // <
+ right_angle = 8, // >
+ };
+
+ // char_class tree representation:
+ // Root = xb
+ // BinaryStringOrOtherSpecialToken = x0b
+ // BinaryString = 00b
+ // OtherSpecialToken = 10b
+ // Other = x1b
+ // BinaryScalar = xx01b
+ // BinaryInt64 = 0001b
+ // BinaryDouble = 0101b
+ // BinaryFalse = 1001b
+ // BinaryTrue = 1101b
+ // Other = xxx11b
+ // Quote = 00011b
+ // DigitOrMinus = 00111b
+ // String = 01011b
+ // Space = 01111b
+ // Plus = 10011b
+ // None = 10111b
+ // Percent = 11011b
+ enum class char_class : ui8 {
+ binary_string = 0, // = 00b
+
+ special_token_mask = 2, // = 10b
+ semicolon = 2 + (0 << 2),
+ equals = 2 + (1 << 2),
+ hash = 2 + (2 << 2),
+ left_bracket = 2 + (3 << 2),
+ right_bracket = 2 + (4 << 2),
+ left_brace = 2 + (5 << 2),
+ right_brace = 2 + (6 << 2),
+ left_angle = 2 + (7 << 2),
+ right_angle = 2 + (8 << 2),
+
+ binary_scalar_mask = 1,
+ binary_int64 = 1 + (0 << 2), // = 001b
+ binary_double = 1 + (1 << 2), // = 101b
+ binary_false = 1 + (2 << 2), // = 1001b
+ binary_true = 1 + (3 << 2), // = 1101b
+ binary_uint64 = 1 + (4 << 2), // = 10001b
+
+ other_mask = 3,
+ quote = 3 + (0 << 2), // = 00011b
+ number = 3 + (1 << 2), // = 00111b
+ string = 3 + (2 << 2), // = 01011b
+ percent = 3 + (6 << 2), // = 11011b
+ none = 3 + (5 << 2), // = 10111b
+ };
+
+#define CHAR_SUBCLASS(x) (static_cast<ui8>(x) >> 2)
+
+ inline char_class get_char_class(ui8 ch) {
+#define NN char_class::none
+#define BS char_class::binary_string
+#define BI char_class::binary_int64
+#define BD char_class::binary_double
+#define BF char_class::binary_false
+#define BT char_class::binary_true
+#define BU char_class::binary_uint64
+#define SP NN // char_class::space
+#define NB char_class::number
+#define ST char_class::string
+#define QU char_class::quote
+#define PC char_class::percent
+#define TT(name) (static_cast<char_class>( \
+ (static_cast<ui8>(special_token::name) << 2) | static_cast<ui8>(char_class::special_token_mask)))
+
+ static constexpr char_class lookup[256] =
+ {
+ NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+
+ // 32
+ SP, // ' '
+ NN, // '!'
+ QU, // '"'
+ TT(hash), // '#'
+ NN, // '$'
+ PC, // '%'
+ NN, // '&'
+ NN, // "'"
+ NN, // '('
+ NN, // ')'
+ NN, // '*'
+ NB, // '+'
+ NN, // ','
+ NB, // '-'
+ NN, // '.'
+ NN, // '/'
+
+ // 48
+ NB, NB, NB, NB, NB, NB, NB, NB, NB, NB, // '0' - '9'
+ NN, // ':'
+ TT(semicolon), // ';'
+ TT(left_angle), // '<'
+ TT(equals), // '='
+ TT(right_angle), // '>'
+ NN, // '?'
+
+ // 64
+ NN, // '@'
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M'
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z'
+ TT(left_bracket), // '['
+ NN, // '\'
+ TT(right_bracket), // ']'
+ NN, // '^'
+ ST, // '_'
+
+ // 96
+ NN, // '`'
+
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm'
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z'
+ TT(left_brace), // '{'
+ NN, // '|'
+ TT(right_brace), // '}'
+ NN, // '~'
+ NN, // '^?' non-printable
+ // 128
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN};
+
+#undef NN
+#undef BS
+#undef BI
+#undef BD
+#undef SP
+#undef NB
+#undef ST
+#undef QU
+#undef TT
+ return lookup[ch];
+ }
+
+ template <bool EnableLinePositionInfo>
+ class gen_reader_impl {
+ enum class state {
+ delimiter = 0, //! expecting ';' or closing-char ('>', ']', '}')
+ maybe_value = 1, //! expecting a value or closing-char
+ maybe_key = 2, //! expecting a key or closing-char
+ equals = 3, //! expecting '=' (followed by value)
+ value = 4, //! expecting a value
+ value_noattr = 5, //! expecting a value w/o attrs (after attrs)
+
+ // by design, rare states have numbers starting from first_rare_state
+ first_rare_state = 6,
+ before_begin = first_rare_state, //! before started reading the stream
+ before_end = first_rare_state + 1, //! Expecting end of stream
+ after_end = first_rare_state + 2, //! after end of stream
+ };
+
+ lexer_base<EnableLinePositionInfo> lexer_;
+ state state_;
+ TEvent event_;
+ TVector<EEventType> stack_;
+ EStreamType mode_;
+
+ public:
+ gen_reader_impl(
+ NYsonPull::NInput::IStream& buffer,
+ EStreamType mode,
+ TMaybe<size_t> memoryLimit = {})
+ : lexer_(buffer, memoryLimit)
+ , state_{state::before_begin}
+ , mode_{mode} {
+ }
+
+ const TEvent& last_event() const {
+ return event_;
+ }
+
+ ATTRIBUTE(hot)
+ const TEvent& next_event() {
+ if (Y_LIKELY(state_ < state::first_rare_state)) {
+ // 'hot' handler for in-stream events
+ next_event_hot();
+ } else {
+ // these events happen no more than once per stream
+ next_event_cold();
+ }
+ return event_;
+ }
+
+ private:
+ ATTRIBUTE(hot)
+ void next_event_hot() {
+ auto ch = lexer_.get_byte();
+ auto cls = get_char_class(ch);
+ if (Y_UNLIKELY(cls == char_class::none)) {
+ ch = lexer_.skip_space_and_get_byte();
+ if (Y_UNLIKELY(ch == NSymbol::eof)) {
+ handle_eof();
+ return;
+ }
+ cls = get_char_class(ch);
+ }
+
+ // states maybe_value/value/value_noattr are distinguished
+ // later in state_value_special
+ switch (state_) {
+ case state::maybe_value:
+ state_value(ch, cls);
+ break;
+ case state::maybe_key:
+ state_maybe_key(ch, cls);
+ break;
+ case state::equals:
+ state_equals(ch);
+ break;
+ case state::value:
+ state_value(ch, cls);
+ break;
+ case state::value_noattr:
+ state_value(ch, cls);
+ break;
+ case state::delimiter:
+ state_delimiter(ch, cls);
+ break;
+ default:
+ Y_UNREACHABLE();
+ }
+ }
+
+ ATTRIBUTE(noinline, cold)
+ void next_event_cold() {
+ switch (state_) {
+ case state::before_begin:
+ state_before_begin();
+ break;
+ case state::after_end:
+ lexer_.fail("Attempted read past stream end");
+ case state::before_end:
+ state_before_end();
+ break;
+ default:
+ Y_UNREACHABLE();
+ }
+ }
+
+ //! Present a scalar value for caller
+ template <typename T>
+ void yield(T value) {
+ event_ = TEvent{TScalar{value}};
+ }
+
+ //! Present a scalar value with non-scalar tag (i.e. key)
+ template <typename T>
+ void yield(EEventType type, T value) {
+ event_ = TEvent{type, TScalar{value}};
+ }
+
+ //! Present a value from number variant
+ void yield(const number& value) {
+ switch (value.type) {
+ case number_type::int64:
+ yield(value.value.as_int64);
+ break;
+ case number_type::uint64:
+ yield(value.value.as_uint64);
+ break;
+ case number_type::float64:
+ yield(value.value.as_float64);
+ break;
+ }
+ }
+
+ //! Present a value from %-literal variant
+ void yield(const percent_scalar& value) {
+ switch (value.type) {
+ case percent_scalar_type::boolean:
+ yield(value.value.as_boolean);
+ break;
+ case percent_scalar_type::float64:
+ yield(value.value.as_float64);
+ break;
+ }
+ }
+
+ //! Present a value-less event
+ void yield(EEventType type) {
+ event_ = TEvent{type};
+ }
+
+ //! Push the opening of a paired event
+ void push(EEventType type) {
+ stack_.push_back(type);
+ }
+
+ //! Close the paired_event, verify that delimiters are well-formed
+ void pop(EEventType first, EEventType last) {
+ if (Y_UNLIKELY(stack_.empty() || stack_.back() != first)) {
+ pop_fail(first, last);
+ return;
+ }
+ stack_.pop_back();
+
+ yield(last);
+ switch (first) {
+ case EEventType::BeginList:
+ next(state::delimiter);
+ break;
+
+ case EEventType::BeginMap:
+ next(state::delimiter);
+ break;
+
+ case EEventType::BeginAttributes:
+ next(state::value_noattr);
+ break;
+
+ case EEventType::BeginStream:
+ next(state::after_end);
+ break;
+
+ default:
+ Y_UNREACHABLE();
+ }
+
+ if (Y_UNLIKELY(mode_ == EStreamType::Node && stack_.size() == 1 && state_ == state::delimiter)) {
+ next(state::before_end);
+ }
+ }
+
+ ATTRIBUTE(noinline, cold)
+ void pop_fail(EEventType first, EEventType last) {
+ if (stack_.empty()) {
+ lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but event stack is empty");
+ } else {
+ lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but '", stack_.back(), "' is found.");
+ }
+ }
+
+ //! Transition to new_state
+ void next(state new_state) {
+ state_ = new_state;
+ }
+
+ bool in_map() {
+ return (stack_.back() == EEventType::BeginMap) || (stack_.back() == EEventType::BeginAttributes) || (stack_.back() == EEventType::BeginStream && mode_ == EStreamType::MapFragment);
+ }
+
+ ATTRIBUTE(noinline, cold)
+ void handle_eof() {
+ switch (state_) {
+ case state::maybe_value:
+ case state::maybe_key:
+ case state::delimiter:
+ case state::before_end:
+ pop(EEventType::BeginStream, EEventType::EndStream);
+ return;
+
+ default:
+ lexer_.fail("Unexpected end of stream");
+ }
+ }
+
+ ATTRIBUTE(noinline, cold)
+ void state_before_begin() {
+ push(EEventType::BeginStream);
+ yield(EEventType::BeginStream);
+ switch (mode_) {
+ case EStreamType::Node:
+ next(state::value);
+ break;
+ case EStreamType::ListFragment:
+ next(state::maybe_value);
+ break;
+ case EStreamType::MapFragment:
+ next(state::maybe_key);
+ break;
+ default:
+ Y_UNREACHABLE();
+ }
+ }
+
+ ATTRIBUTE(noinline, cold)
+ void state_before_end() {
+ auto ch = lexer_.skip_space_and_get_byte();
+ if (ch == NSymbol::eof) {
+ handle_eof();
+ } else {
+ lexer_.fail("Expected stream end, but found ", NCEscape::quote(ch));
+ }
+ }
+
+ ATTRIBUTE(hot)
+ void state_delimiter(ui8 ch, char_class cls) {
+ if (Y_LIKELY(ch == NSymbol::item_separator)) {
+ lexer_.advance(1);
+ next(in_map() ? state::maybe_key : state::maybe_value);
+ // immediately read next value
+ next_event_hot();
+ return;
+ }
+ state_delimiter_fallback(ch, cls);
+ }
+
+ ATTRIBUTE(noinline, hot)
+ void state_delimiter_fallback(ui8 ch, char_class cls) {
+ auto cls_bits = static_cast<ui8>(cls);
+ if ((cls_bits & 3) == static_cast<ui8>(char_class::special_token_mask)) {
+ auto token = static_cast<special_token>(cls_bits >> 2);
+ lexer_.advance(1);
+ switch (token) {
+ /* // handled in the fast track
+ case special_token::semicolon:
+ next(in_map()? state::maybe_key : state::maybe_value);
+ // immediately read next value
+ return next_event();
+ */
+
+ case special_token::right_bracket:
+ pop(EEventType::BeginList, EEventType::EndList);
+ return;
+
+ case special_token::right_brace:
+ pop(EEventType::BeginMap, EEventType::EndMap);
+ return;
+
+ case special_token::right_angle:
+ pop(EEventType::BeginAttributes, EEventType::EndAttributes);
+ return;
+
+ default:
+ break;
+ }
+ }
+
+ COLD_BLOCK_BYVALUE
+ lexer_.fail(
+ "Unexpected ", NCEscape::quote(ch), ", expected one of ",
+ NCEscape::quote(NSymbol::item_separator), ", ",
+ NCEscape::quote(NSymbol::end_list), ", ",
+ NCEscape::quote(NSymbol::end_map), ", ",
+ NCEscape::quote(NSymbol::end_attributes));
+ COLD_BLOCK_END
+ }
+
+ ATTRIBUTE(noinline, hot)
+ void state_maybe_key(ui8 ch, char_class cls) {
+ auto key = TStringBuf{};
+ // Keys are always strings, put binary-string key into fast lane
+ if (Y_LIKELY(ch == NSymbol::string_marker)) {
+ lexer_.advance(1);
+ key = lexer_.read_binary_string();
+ } else {
+ switch (cls) {
+ case char_class::quote:
+ lexer_.advance(1);
+ key = lexer_.read_quoted_string();
+ break;
+
+ case char_class::string:
+ key = lexer_.read_unquoted_string();
+ break;
+
+ case char_class::right_brace:
+ lexer_.advance(1);
+ pop(EEventType::BeginMap, EEventType::EndMap);
+ return;
+
+ case char_class::right_angle:
+ lexer_.advance(1);
+ pop(EEventType::BeginAttributes, EEventType::EndAttributes);
+ return;
+
+ default:
+ COLD_BLOCK_BYVALUE
+ lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected key string");
+ COLD_BLOCK_END
+ }
+ }
+
+ yield(EEventType::Key, key);
+ next(state::equals);
+ }
+
+ ATTRIBUTE(hot)
+ void state_equals(ui8 ch) {
+ // skip '='
+ if (Y_UNLIKELY(ch != NSymbol::key_value_separator)) {
+ COLD_BLOCK_BYVALUE
+ lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected ", NCEscape::quote(NSymbol::key_value_separator));
+ COLD_BLOCK_END
+ }
+ lexer_.advance(1);
+ next(state::value);
+ // immediately read the following value
+ // (this symbol yields no result)
+ next_event_hot();
+ }
+
+ ATTRIBUTE(noinline, hot)
+ void state_value(ui8 ch, char_class cls) {
+ auto cls_bits = static_cast<ui8>(cls);
+ if (cls_bits & 1) { // Other = x1b
+ if (cls_bits & (1 << 1)) { // Other = xxx11b
+ state_value_text_scalar(cls);
+ } else { // BinaryScalar = x01b
+ state_value_binary_scalar(cls);
+ }
+ next(state::delimiter);
+ } else { // BinaryStringOrOtherSpecialToken = x0b
+ lexer_.advance(1);
+ if (cls_bits & 1 << 1) {
+ // special token
+ auto token = static_cast<special_token>(cls_bits >> 2);
+ state_value_special(token, ch);
+ } else {
+ // binary string
+ yield(lexer_.read_binary_string());
+ next(state::delimiter);
+ }
+ }
+ }
+
+ ATTRIBUTE(noinline)
+ void state_value_special(special_token token, ui8 ch) {
+ // Value starters are always accepted values
+ switch (token) {
+ case special_token::hash:
+ yield(TScalar{});
+ next(state::delimiter);
+ return;
+
+ case special_token::left_bracket:
+ push(EEventType::BeginList);
+ yield(EEventType::BeginList);
+ next(state::maybe_value);
+ return;
+
+ case special_token::left_brace:
+ push(EEventType::BeginMap);
+ yield(EEventType::BeginMap);
+ next(state::maybe_key);
+ return;
+
+ default:
+ break;
+ }
+
+ // ...closing-chars are only allowed in maybe_value state
+ if (state_ == state::maybe_value) {
+ switch (token) {
+ case special_token::right_bracket:
+ pop(EEventType::BeginList, EEventType::EndList);
+ return;
+
+ case special_token::right_brace:
+ pop(EEventType::BeginMap, EEventType::EndMap);
+ return;
+
+ // right_angle is impossible in maybe_value state
+ // (only in delimiter, maybe_key)
+
+ default:
+ break;
+ }
+ }
+
+ // attributes are not allowed after attributes (thus, value_noattr state)
+ if (state_ != state::value_noattr && token == special_token::left_angle) {
+ push(EEventType::BeginAttributes);
+ yield(EEventType::BeginAttributes);
+ next(state::maybe_key);
+ return;
+ }
+
+ COLD_BLOCK_BYVALUE
+ lexer_.fail("Unexpected ", NCEscape::quote(ch));
+ COLD_BLOCK_END
+ }
+
+ ATTRIBUTE(hot)
+ void state_value_binary_scalar(char_class cls) {
+ lexer_.advance(1);
+ switch (cls) {
+ case char_class::binary_double:
+ yield(lexer_.read_binary_double());
+ break;
+
+ case char_class::binary_int64:
+ yield(lexer_.read_binary_int64());
+ break;
+
+ case char_class::binary_uint64:
+ yield(lexer_.read_binary_uint64());
+ break;
+
+ case char_class::binary_false:
+ yield(false);
+ break;
+
+ case char_class::binary_true:
+ yield(true);
+ break;
+
+ default:
+ Y_UNREACHABLE();
+ }
+ }
+
+ ATTRIBUTE(noinline)
+ void state_value_text_scalar(char_class cls) {
+ switch (cls) {
+ case char_class::quote:
+ lexer_.advance(1);
+ yield(lexer_.read_quoted_string());
+ break;
+
+ case char_class::number:
+ yield(lexer_.read_numeric());
+ break;
+
+ case char_class::string:
+ yield(lexer_.read_unquoted_string());
+ break;
+
+ case char_class::percent:
+ lexer_.advance(1);
+ yield(lexer_.read_percent_scalar());
+ break;
+
+ case char_class::none:
+ COLD_BLOCK_BYVALUE
+ lexer_.fail("Invalid yson value.");
+ COLD_BLOCK_END
+ break;
+
+ default:
+ Y_UNREACHABLE();
+ }
+ }
+ };
+
+ class reader_impl: public gen_reader_impl<false> {
+ public:
+ using gen_reader_impl<false>::gen_reader_impl;
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/detail/stream_counter.h b/library/cpp/yson_pull/detail/stream_counter.h
new file mode 100644
index 0000000000..3b41b27eb6
--- /dev/null
+++ b/library/cpp/yson_pull/detail/stream_counter.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <library/cpp/yson_pull/position_info.h>
+
+#include <cstddef>
+
+namespace NYsonPull {
+ namespace NDetail {
+ template <bool EnableLinePositionInfo>
+ class stream_counter;
+
+ template <>
+ class stream_counter<true> {
+ private:
+ size_t offset_ = 0;
+ size_t line_ = 1;
+ size_t column_ = 1;
+
+ public:
+ TPositionInfo info() const {
+ return {offset_, line_, column_};
+ }
+
+ void update(const ui8* begin, const ui8* end) {
+ offset_ += end - begin;
+ for (auto current = begin; current != end; ++current) {
+ ++column_;
+ if (*current == '\n') { //TODO: memchr
+ ++line_;
+ column_ = 1;
+ }
+ }
+ }
+ };
+
+ template <>
+ class stream_counter<false> {
+ private:
+ size_t offset_ = 0;
+
+ public:
+ TPositionInfo info() const {
+ return {offset_, {}, {}};
+ }
+
+ void update(const ui8* begin, const ui8* end) {
+ offset_ += end - begin;
+ }
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/detail/symbols.h b/library/cpp/yson_pull/detail/symbols.h
new file mode 100644
index 0000000000..fe94bb9c41
--- /dev/null
+++ b/library/cpp/yson_pull/detail/symbols.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/system/types.h>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NSymbol {
+#define SYM(name, value) constexpr ui8 name = value
+
+ //! Indicates the beginning of a list.
+ SYM(begin_list, '[');
+ //! Indicates the end of a list.
+ SYM(end_list, ']');
+
+ //! Indicates the beginning of a map.
+ SYM(begin_map, '{');
+ //! Indicates the end of a map.
+ SYM(end_map, '}');
+
+ //! Indicates the beginning of an attribute map.
+ SYM(begin_attributes, '<');
+ //! Indicates the end of an attribute map.
+ SYM(end_attributes, '>');
+
+ //! Separates items in lists and pairs in maps or attribute maps.
+ SYM(item_separator, ';');
+ //! Separates keys from values in maps and attribute maps.
+ SYM(key_value_separator, '=');
+
+ //! Indicates an entity.
+ SYM(entity, '#');
+ //! Indicates end of stream.
+ SYM(eof, '\0');
+
+ //! Marks the beginning of a binary string literal.
+ SYM(string_marker, '\x01');
+ //! Marks the beginning of a binary int64 literal.
+ SYM(int64_marker, '\x02');
+ //! Marks the beginning of a binary uint64 literal.
+ SYM(uint64_marker, '\x06');
+ //! Marks the beginning of a binary double literal.
+ SYM(double_marker, '\x03');
+ //! Marks a binary `false' boolean value.
+ SYM(false_marker, '\x04');
+ //! Marks a binary `true' boolean value.
+ SYM(true_marker, '\x05');
+
+ //! Text string quote symbol
+ SYM(quote, '"');
+
+#undef SYM
+ }
+ }
+}
diff --git a/library/cpp/yson_pull/detail/traits.h b/library/cpp/yson_pull/detail/traits.h
new file mode 100644
index 0000000000..869a3b9c44
--- /dev/null
+++ b/library/cpp/yson_pull/detail/traits.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <type_traits>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NTraits {
+ template <typename T, typename U>
+ using if_signed = typename std::enable_if<
+ std::is_signed<T>::value,
+ U>::type;
+
+ template <typename T, typename U>
+ using if_unsigned = typename std::enable_if<
+ std::is_unsigned<T>::value,
+ U>::type;
+
+ template <typename T>
+ using to_unsigned = typename std::enable_if<
+ std::is_signed<T>::value,
+ typename std::make_unsigned<T>::type>::type;
+
+ template <typename T>
+ using to_signed = typename std::enable_if<
+ std::is_unsigned<T>::value,
+ typename std::make_signed<T>::type>::type;
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/varint.h b/library/cpp/yson_pull/detail/varint.h
new file mode 100644
index 0000000000..38bf45d925
--- /dev/null
+++ b/library/cpp/yson_pull/detail/varint.h
@@ -0,0 +1,260 @@
+#pragma once
+
+#include "byte_reader.h"
+#include "byte_writer.h"
+#include "traits.h"
+#include "zigzag.h"
+
+#include <util/system/types.h>
+
+#include <cstddef>
+#include <type_traits>
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NVarInt {
+ namespace NImpl {
+ template <typename T>
+ constexpr inline size_t max_size() {
+ return (8 * sizeof(T) - 1) / 7 + 1;
+ }
+
+ template <typename T>
+ inline size_t write(ui64 value, T&& consume) {
+ auto stop = false;
+ auto nwritten = size_t{0};
+ while (!stop) {
+ ++nwritten;
+ auto byte = static_cast<ui8>(value | 0x80);
+ value >>= 7;
+ if (value == 0) {
+ stop = true;
+ byte &= 0x7F;
+ }
+ consume(byte);
+ }
+ return nwritten;
+ }
+
+ template <typename U>
+ inline bool read_fast(byte_reader<U>& reader, ui64* value) {
+ auto& buf = reader.stream().buffer();
+ auto* ptr = buf.pos();
+ ui32 b;
+
+ // Splitting into 32-bit pieces gives better performance on 32-bit
+ // processors.
+ ui32 part0 = 0, part1 = 0, part2 = 0;
+
+ b = *(ptr++);
+ part0 = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part0 |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part0 |= (b & 0x7F) << 14;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part0 |= (b & 0x7F) << 21;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 |= (b & 0x7F) << 14;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 |= (b & 0x7F) << 21;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part2 = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part2 |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+
+ // We have overrun the maximum size of a Varint (10 bytes). The data
+ // must be corrupt.
+ return false;
+
+ done:
+ reader.advance(ptr - buf.pos());
+ *value = (static_cast<ui64>(part0)) | (static_cast<ui64>(part1) << 28) | (static_cast<ui64>(part2) << 56);
+ return true;
+ }
+
+ template <typename U>
+ inline bool read_fast(byte_reader<U>& reader, ui32* value) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+ auto& buf = reader.stream().buffer();
+ auto* ptr = buf.pos();
+ ui32 b;
+ ui32 result;
+
+ b = *(ptr++);
+ result = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= (b & 0x7F) << 14;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= (b & 0x7F) << 21;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= b << 28;
+ if (!(b & 0x80))
+ goto done;
+
+ // FIXME
+ // If the input is larger than 32 bits, we still need to read it all
+ // and discard the high-order bits.
+
+ for (size_t i = 0; i < max_size<ui64>() - max_size<ui32>(); i++) {
+ b = *(ptr++);
+ if (!(b & 0x80))
+ goto done;
+ }
+
+ // We have overrun the maximum size of a Varint (10 bytes). Assume
+ // the data is corrupt.
+ return false;
+
+ done:
+ reader.advance(ptr - buf.pos());
+ *value = result;
+ return true;
+ }
+
+ template <typename U>
+ inline bool read_slow(byte_reader<U>& reader, ui64* value) {
+ // Slow path: This read might cross the end of the buffer, so we
+ // need to check and refresh the buffer if and when it does.
+
+ auto& buf = reader.stream().buffer();
+ ui64 result = 0;
+ int count = 0;
+ ui32 b;
+
+ do {
+ if (count == max_size<ui64>()) {
+ return false;
+ }
+ reader.fill_buffer();
+ if (reader.stream().at_end()) {
+ return false;
+ }
+ b = *buf.pos();
+ result |= static_cast<ui64>(b & 0x7F) << (7 * count);
+ reader.advance(1);
+ ++count;
+ } while (b & 0x80);
+
+ *value = result;
+ return true;
+ }
+
+ template <typename U>
+ inline bool read_slow(byte_reader<U>& reader, ui32* value) {
+ ui64 result;
+ // fallback to 64-bit reading
+ if (read_slow(reader, &result) && result <= std::numeric_limits<ui32>::max()) {
+ *value = static_cast<ui32>(result);
+ return true;
+ }
+
+ return false;
+ }
+
+ // Following functions is an adaptation
+ // of Protobuf code from coded_stream.cc
+ template <typename T, typename U>
+ inline bool read_dispatch(byte_reader<U>& reader, T* value) {
+ auto& buf = reader.stream().buffer();
+ // NOTE: checking for 64-bit max_size(), since 32-bit
+ // read_fast() might fallback to 64-bit reading
+ if (buf.available() >= max_size<ui64>() ||
+ // Optimization: If the Varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (!buf.is_empty() && !(buf.end()[-1] & 0x80)))
+ {
+ return read_fast(reader, value);
+ } else {
+ // Really slow case: we will incur the cost of an extra function call here,
+ // but moving this out of line reduces the size of this function, which
+ // improves the common case. In micro benchmarks, this is worth about 10-15%
+ return read_slow(reader, value);
+ }
+ }
+
+ }
+
+ // Various functions to read/write varints.
+
+ // Returns the number of bytes written.
+ template <typename T>
+ inline NTraits::if_unsigned<T, size_t> write(ui8* data, T value) {
+ return NImpl::write(
+ static_cast<ui64>(value),
+ [&](ui8 byte) { *data++ = byte; });
+ }
+
+ template <typename T>
+ inline NTraits::if_signed<T, size_t> write(ui8* data, T value) {
+ return NImpl::write(
+ static_cast<ui64>(NZigZag::encode(value)),
+ [&](ui8 byte) { *data++ = byte; });
+ }
+
+ template <typename T, typename U>
+ inline void write(byte_writer<U>& stream, T value) {
+ ui8 data[NImpl::max_size<T>()];
+ auto size = write(data, value);
+ stream.write(data, size);
+ }
+
+ template <typename T, typename U>
+ inline NTraits::if_unsigned<T, T> read(byte_reader<U>& reader) {
+ auto value = T{};
+ auto& buf = reader.stream().buffer();
+ if (!buf.is_empty() && *buf.pos() < 0x80) {
+ value = *buf.pos();
+ reader.advance(1);
+ return value;
+ }
+
+ if (Y_UNLIKELY(!NImpl::read_dispatch(reader, &value))) {
+ reader.fail("Error parsing varint value");
+ }
+ return value;
+ }
+
+ template <typename T, typename U>
+ inline NTraits::if_signed<T, T> read(byte_reader<U>& reader) {
+ return NZigZag::decode(
+ read<NTraits::to_unsigned<T>>(reader));
+ }
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/detail/writer.h b/library/cpp/yson_pull/detail/writer.h
new file mode 100644
index 0000000000..b24b994292
--- /dev/null
+++ b/library/cpp/yson_pull/detail/writer.h
@@ -0,0 +1,566 @@
+#pragma once
+
+#include "byte_writer.h"
+#include "cescape.h"
+#include "percent_scalar.h"
+#include "stream_counter.h"
+#include "symbols.h"
+#include "varint.h"
+
+#include <library/cpp/yson_pull/consumer.h>
+#include <library/cpp/yson_pull/event.h>
+#include <library/cpp/yson_pull/output.h>
+#include <library/cpp/yson_pull/stream_type.h>
+#include <library/cpp/yson_pull/writer.h>
+
+#include <util/generic/vector.h>
+#include <util/system/yassert.h>
+
+#include <cmath>
+
+namespace NYsonPull {
+ namespace NDetail {
+ class writer: public IConsumer {
+ enum class state {
+ maybe_key,
+ maybe_value,
+ value,
+ value_noattr,
+ before_begin,
+ before_end,
+ after_end,
+ };
+
+ byte_writer<stream_counter<false>> stream_;
+ TVector<EEventType> stack_;
+ bool need_item_separator_ = false;
+ EStreamType mode_ = EStreamType::ListFragment;
+ state state_ = state::before_begin;
+
+ public:
+ void OnBeginStream() override {
+ update_state(EEventType::BeginStream);
+ }
+
+ void OnEndStream() override {
+ update_state(EEventType::EndStream);
+ stream_.flush_buffer();
+ }
+
+ void OnBeginList() override {
+ begin_node();
+ write(NSymbol::begin_list);
+ update_state(EEventType::BeginList);
+ begin_collection(collection_type::list);
+ }
+
+ void OnEndList() override {
+ update_state(EEventType::EndList);
+ end_collection(collection_type::list);
+ write(NSymbol::end_list);
+ end_node();
+ }
+
+ void OnBeginMap() override {
+ begin_node();
+ write(NSymbol::begin_map);
+ update_state(EEventType::BeginMap);
+ begin_collection(collection_type::map);
+ }
+
+ void OnEndMap() override {
+ update_state(EEventType::EndMap);
+ end_collection(collection_type::map);
+ write(NSymbol::end_map);
+ end_node();
+ }
+
+ void OnBeginAttributes() override {
+ begin_node();
+ write(NSymbol::begin_attributes);
+ update_state(EEventType::BeginAttributes);
+ begin_collection(collection_type::attributes);
+ }
+
+ void OnEndAttributes() override {
+ update_state(EEventType::EndAttributes);
+ end_collection(collection_type::attributes);
+ write(NSymbol::end_attributes);
+ // no end_node
+ }
+
+ void OnEntity() override {
+ begin_node();
+ update_state(EEventType::Scalar);
+ write(NSymbol::entity);
+ end_node();
+ }
+
+ protected:
+ enum class collection_type {
+ list,
+ map,
+ attributes,
+ };
+
+ writer(NYsonPull::NOutput::IStream& stream, EStreamType mode)
+ : stream_(stream)
+ , mode_{mode} {
+ }
+
+ bool need_item_separator() const {
+ return need_item_separator_;
+ }
+ void need_item_separator(bool value) {
+ need_item_separator_ = value;
+ }
+
+ size_t depth() const {
+ Y_ASSERT(!stack_.empty());
+ if (mode_ == EStreamType::Node) {
+ return stack_.size() - 1;
+ } else {
+ return stack_.size() - 2;
+ }
+ }
+ EStreamType mode() const {
+ return mode_;
+ }
+
+ void write(ui8 c) {
+ stream_.write(c);
+ }
+
+ void write(TStringBuf value) {
+ write_raw(value.data(), value.size());
+ }
+
+ void write_raw(const void* ptr, size_t len) {
+ stream_.write(static_cast<const ui8*>(ptr), len);
+ }
+
+ template <typename T>
+ void write_varint(T value) {
+ NVarInt::write(stream_, value);
+ }
+
+ void write_escaped_string(TStringBuf value) {
+ write(NSymbol::quote);
+ NCEscape::encode(stream_, value);
+ write(NSymbol::quote);
+ }
+
+ void push(EEventType type) {
+ stack_.push_back(type);
+ }
+
+ void pop(EEventType type) {
+ if (stack_.empty()) {
+ fail("Unpaired events: empty event stack");
+ }
+ if (stack_.back() != type) {
+ fail("Unpaired events: expected ", type, ", got ", stack_.back());
+ }
+ stack_.pop_back();
+ }
+
+ void update_state(EEventType event) {
+ switch (state_) {
+ case state::before_begin:
+ if (event != EEventType::BeginStream) {
+ fail("Expected begin_stream, got ", event);
+ }
+ begin_stream();
+ return;
+
+ case state::before_end:
+ if (event != EEventType::EndStream) {
+ fail("Expected end_stream, got ", event);
+ }
+ end_stream();
+ return;
+
+ case state::after_end:
+ fail("Attempted write past stream end");
+
+ case state::maybe_key:
+ if (event == EEventType::Key) {
+ state_ = state::value;
+ return;
+ }
+
+ switch (event) {
+ case EEventType::EndStream:
+ end_stream();
+ return;
+
+ case EEventType::EndMap:
+ pop(EEventType::BeginMap);
+ next_state();
+ return;
+
+ case EEventType::EndAttributes:
+ pop(EEventType::BeginAttributes);
+ state_ = state::value_noattr;
+ return;
+
+ default:
+ fail("Unexpected event ", event, " in maybe_key");
+ }
+ break;
+
+ case state::maybe_value:
+ switch (event) {
+ case EEventType::EndList:
+ pop(EEventType::BeginList);
+ next_state();
+ return;
+
+ case EEventType::EndStream:
+ end_stream();
+ return;
+
+ default:
+ break;
+ }
+ [[fallthrough]];
+ case state::value:
+ if (event == EEventType::BeginAttributes) {
+ push(EEventType::BeginAttributes);
+ next_state();
+ return;
+ }
+ [[fallthrough]];
+ case state::value_noattr:
+ switch (event) {
+ case EEventType::Scalar:
+ next_state();
+ return;
+
+ case EEventType::BeginList:
+ push(EEventType::BeginList);
+ next_state();
+ return;
+
+ case EEventType::BeginMap:
+ push(EEventType::BeginMap);
+ next_state();
+ return;
+
+ default:
+ fail("Unexpected event ", event, " (in value_*)");
+ }
+ break;
+ }
+ }
+
+ void next_state() {
+ Y_ASSERT(!stack_.empty());
+ switch (stack_.back()) {
+ case EEventType::BeginMap:
+ case EEventType::BeginAttributes:
+ state_ = state::maybe_key;
+ break;
+
+ case EEventType::BeginList:
+ state_ = state::maybe_value;
+ break;
+
+ case EEventType::BeginStream:
+ state_ = state::before_end;
+ break;
+
+ default:
+ Y_UNREACHABLE();
+ }
+ }
+
+ void begin_stream() {
+ push(EEventType::BeginStream);
+ switch (mode_) {
+ case EStreamType::ListFragment:
+ push(EEventType::BeginList);
+ state_ = state::maybe_value;
+ break;
+
+ case EStreamType::MapFragment:
+ push(EEventType::BeginMap);
+ state_ = state::maybe_key;
+ break;
+
+ case EStreamType::Node:
+ state_ = state::value;
+ break;
+ }
+ }
+
+ void end_stream() {
+ switch (mode_) {
+ case EStreamType::ListFragment:
+ pop(EEventType::BeginList);
+ break;
+
+ case EStreamType::MapFragment:
+ pop(EEventType::BeginMap);
+ break;
+
+ case EStreamType::Node:
+ break;
+ }
+ pop(EEventType::BeginStream);
+ state_ = state::after_end;
+ }
+
+ virtual void begin_node() {
+ if (need_item_separator_) {
+ write(NSymbol::item_separator);
+ }
+ }
+
+ virtual void end_node() {
+ need_item_separator_ = true;
+ }
+
+ virtual void begin_key() {
+ begin_node();
+ }
+
+ virtual void end_key() {
+ need_item_separator_ = false;
+ write(NSymbol::key_value_separator);
+ }
+
+ virtual void begin_collection(collection_type type) {
+ Y_UNUSED(type);
+ need_item_separator_ = false;
+ }
+
+ virtual void end_collection(collection_type type) {
+ need_item_separator_ = (type != collection_type::attributes);
+ }
+
+ template <typename... Args>
+ ATTRIBUTE(noinline, cold)
+ void fail[[noreturn]](const char* msg, Args&&... args) {
+ auto formatted_message = format_string(
+ msg,
+ std::forward<Args>(args)...);
+ throw NException::TBadOutput(
+ formatted_message,
+ stream_.counter().info());
+ }
+ };
+
+ class TBinaryWriterImpl final: public writer {
+ public:
+ TBinaryWriterImpl(NYsonPull::NOutput::IStream& stream, EStreamType mode)
+ : writer(stream, mode)
+ {
+ }
+
+ void OnScalarBoolean(bool value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write(value ? NSymbol::true_marker : NSymbol::false_marker);
+ end_node();
+ }
+
+ void OnScalarInt64(i64 value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write(NSymbol::int64_marker);
+ write_varint(value);
+ end_node();
+ }
+
+ void OnScalarUInt64(ui64 value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write(NSymbol::uint64_marker);
+ write_varint(value);
+ end_node();
+ }
+
+ void OnScalarFloat64(double value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write(NSymbol::double_marker);
+ write_raw(&value, sizeof value);
+ end_node();
+ }
+
+ void OnScalarString(TStringBuf value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write(NSymbol::string_marker);
+ write_varint(static_cast<i32>(value.size()));
+ write_raw(value.data(), value.size());
+ end_node();
+ }
+
+ void OnKey(TStringBuf name) override {
+ update_state(EEventType::Key);
+
+ begin_key();
+ write(NSymbol::string_marker);
+ write_varint(static_cast<i32>(name.size()));
+ write_raw(name.data(), name.size());
+ end_key();
+ }
+ };
+
+ class TTextWriterImpl: public writer {
+ public:
+ TTextWriterImpl(NYsonPull::NOutput::IStream& stream, EStreamType mode)
+ : writer(stream, mode)
+ {
+ }
+
+ void OnScalarBoolean(bool value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write(value ? percent_scalar::true_literal : percent_scalar::false_literal);
+ end_node();
+ }
+
+ void OnScalarInt64(i64 value) override {
+ update_state(EEventType::Scalar);
+
+ char buf[32];
+ auto len = ::snprintf(buf, sizeof(buf), "%" PRIi64, value);
+
+ begin_node();
+ write_raw(buf, len);
+ end_node();
+ }
+
+ void OnScalarUInt64(ui64 value) override {
+ update_state(EEventType::Scalar);
+
+ char buf[32];
+ auto len = ::snprintf(buf, sizeof(buf), "%" PRIu64, value);
+
+ begin_node();
+ write_raw(buf, len);
+ write('u');
+ end_node();
+ }
+
+ void OnScalarFloat64(double value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+
+ if (std::isfinite(value)) {
+ char buf[32];
+ auto len = ::snprintf(buf, sizeof(buf), "%#.17lg", value);
+ write_raw(buf, len);
+ } else if (std::isnan(value)) {
+ write(percent_scalar::nan_literal);
+ } else if (value > 0) {
+ write(percent_scalar::positive_inf_literal);
+ } else {
+ write(percent_scalar::negative_inf_literal);
+ }
+
+ end_node();
+ }
+
+ void OnScalarString(TStringBuf value) override {
+ update_state(EEventType::Scalar);
+
+ begin_node();
+ write_escaped_string(value);
+ end_node();
+ }
+
+ void OnKey(TStringBuf name) override {
+ update_state(EEventType::Key);
+
+ begin_key();
+ write_escaped_string(name);
+ end_key();
+ }
+
+ protected:
+ void begin_node() override {
+ if (need_item_separator()) {
+ write(NSymbol::item_separator);
+ write(' ');
+ }
+ }
+
+ void end_node() override {
+ if (mode() != EStreamType::Node && depth() == 0) {
+ write(NSymbol::item_separator);
+ write('\n');
+ need_item_separator(false);
+ } else {
+ writer::end_node();
+ }
+ }
+
+ void end_key() override {
+ write(' ');
+ writer::end_key();
+ write(' ');
+ }
+ };
+
+ class TPrettyWriterImpl final: public TTextWriterImpl {
+ size_t indent_size_;
+
+ public:
+ TPrettyWriterImpl(
+ NYsonPull::NOutput::IStream& stream,
+ EStreamType mode,
+ size_t indent_size)
+ : TTextWriterImpl(stream, mode)
+ , indent_size_{indent_size} {
+ }
+
+ protected:
+ void begin_node() override {
+ if (need_item_separator()) {
+ write(NSymbol::item_separator);
+ newline();
+ }
+ }
+
+ void begin_collection(collection_type type) override {
+ TTextWriterImpl::begin_collection(type);
+ newline();
+ }
+
+ void end_collection(collection_type type) override {
+ TTextWriterImpl::end_collection(type);
+ newline();
+ }
+
+ void newline() {
+ write('\n');
+ indent(depth());
+ }
+
+ void indent(size_t count) {
+ for (size_t i = 0; i < count * indent_size_; ++i) {
+ write(' ');
+ }
+ }
+ };
+
+ template <typename T, typename... Args>
+ NYsonPull::TWriter make_writer(
+ THolder<NYsonPull::NOutput::IStream> stream,
+ Args&&... args) {
+ auto impl = MakeHolder<T>(*stream, std::forward<Args>(args)...);
+ return NYsonPull::TWriter(std::move(stream), std::move(impl));
+ }
+ }
+}
diff --git a/library/cpp/yson_pull/detail/zigzag.h b/library/cpp/yson_pull/detail/zigzag.h
new file mode 100644
index 0000000000..98fcac0e9f
--- /dev/null
+++ b/library/cpp/yson_pull/detail/zigzag.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "traits.h"
+
+namespace NYsonPull {
+ namespace NDetail {
+ namespace NZigZag {
+ //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x|
+
+ template <typename TSigned>
+ inline NTraits::to_unsigned<TSigned> encode(TSigned x) {
+ using TUnsigned = NTraits::to_unsigned<TSigned>;
+ constexpr auto rshift = sizeof(TSigned) * 8 - 1;
+ return (static_cast<TUnsigned>(x) << 1) ^ static_cast<TUnsigned>(x >> rshift);
+ }
+
+ template <typename TUnsigned>
+ inline NTraits::to_signed<TUnsigned> decode(TUnsigned x) {
+ using TSigned = NTraits::to_signed<TUnsigned>;
+ return static_cast<TSigned>(x >> 1) ^ -static_cast<TSigned>(x & 1);
+ }
+ }
+ } // namespace NDetail
+}
diff --git a/library/cpp/yson_pull/event.cpp b/library/cpp/yson_pull/event.cpp
new file mode 100644
index 0000000000..b7ede494b6
--- /dev/null
+++ b/library/cpp/yson_pull/event.cpp
@@ -0,0 +1,18 @@
+#include "event.h"
+
+#include <library/cpp/yson_pull/detail/cescape.h>
+
+#include <util/stream/output.h>
+
+using namespace NYsonPull;
+
+template <>
+void Out<TEvent>(IOutputStream& out, const TEvent& value) {
+ out << '(' << value.Type();
+ if (value.Type() == EEventType::Scalar) {
+ out << ' ' << value.AsScalar();
+ } else if (value.Type() == EEventType::Key) {
+ out << ' ' << NYsonPull::NDetail::NCEscape::quote(value.AsString());
+ }
+ out << ')';
+}
diff --git a/library/cpp/yson_pull/event.h b/library/cpp/yson_pull/event.h
new file mode 100644
index 0000000000..b41d5ea3b5
--- /dev/null
+++ b/library/cpp/yson_pull/event.h
@@ -0,0 +1,85 @@
+#pragma once
+
+#include "cyson_enums.h"
+#include "scalar.h"
+
+#include <util/generic/strbuf.h>
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+namespace NYsonPull {
+ //! A well-formed decoded YSON stream can be described by the following grammar:
+ //!
+ //! STREAM[node] ::= begin_stream VALUE end_stream
+ //! STREAM[list_fragment] ::= begin_stream LIST_FRAGMENT end_stream
+ //! STREAM[map_fragment] ::= begin_stream MAP_FRAGMENT end_stream
+ //! LIST_FRAGMENT ::= { VALUE; }
+ //! MAP_FRAGMENT ::= { KEY VALUE; }
+ //! KEY ::= key(String)
+ //! VALUE ::= VALUE_NOATTR | ATTRIBUTES VALUE_NOATTR
+ //! ATTRIBUTES ::= begin_attributes MAP_FRAGMENT end_attributes
+ //! VALUE_NOATTR ::= scalar(Scalar) | LIST | MAP
+ //! LIST ::= begin_list LIST_FRAGMENT end_list
+ //! MAP ::= begin_map MAP_FRAGMENT end_map
+
+ //! \brief YSON event type tag. Corresponds to YSON grammar.
+ enum class EEventType {
+ BeginStream = YSON_EVENT_BEGIN_STREAM,
+ EndStream = YSON_EVENT_END_STREAM,
+ BeginList = YSON_EVENT_BEGIN_LIST,
+ EndList = YSON_EVENT_END_LIST,
+ BeginMap = YSON_EVENT_BEGIN_MAP,
+ EndMap = YSON_EVENT_END_MAP,
+ BeginAttributes = YSON_EVENT_BEGIN_ATTRIBUTES,
+ EndAttributes = YSON_EVENT_END_ATTRIBUTES,
+ Key = YSON_EVENT_KEY,
+ Scalar = YSON_EVENT_SCALAR,
+ };
+
+ //! \brief YSON event variant type.
+ class TEvent {
+ EEventType Type_;
+ TScalar Value_;
+
+ public:
+ //! \brief Construct a tag-only event.
+ explicit constexpr TEvent(EEventType type = EEventType::BeginStream)
+ : Type_{type} {
+ }
+
+ //! \brief Construct a tag+value event.
+ //!
+ //! Only \p EEventType::key is meaningful.
+ constexpr TEvent(EEventType type, const TScalar& value)
+ : Type_{type}
+ , Value_{value} {
+ }
+
+ //! \brief Construct a \p EEventType::scalar event.
+ explicit constexpr TEvent(const TScalar& value)
+ : Type_{EEventType::Scalar}
+ , Value_{value} {
+ }
+
+ EEventType Type() const {
+ return Type_;
+ }
+
+ //! \brief Get TScalar value.
+ //!
+ //! Undefined behaviour when event type is not \p EEventType::scalar.
+ const TScalar& AsScalar() const {
+ Y_ASSERT(Type_ == EEventType::Scalar || Type_ == EEventType::Key);
+ return Value_;
+ }
+
+ //! \brief Get string value.
+ //!
+ //! Undefined behaviour when event type is not \p EEventType::key.
+ TStringBuf AsString() const {
+ Y_ASSERT(Type_ == EEventType::Key || (Type_ == EEventType::Scalar && Value_.Type() == EScalarType::String));
+ return Value_.AsString();
+ }
+ };
+
+}
diff --git a/library/cpp/yson_pull/exceptions.cpp b/library/cpp/yson_pull/exceptions.cpp
new file mode 100644
index 0000000000..e1d68493e7
--- /dev/null
+++ b/library/cpp/yson_pull/exceptions.cpp
@@ -0,0 +1,45 @@
+#include "exceptions.h"
+
+#include <util/string/builder.h>
+
+#include <cerrno>
+#include <cstring>
+
+using namespace NYsonPull::NException;
+
+const char* TBadStream::what() const noexcept {
+ TStringBuilder stream;
+ stream << "Invalid YSON";
+ if (Position_.Offset || Position_.Line || Position_.Column) {
+ bool first = true;
+ stream << " at ";
+ if (Position_.Offset) {
+ stream << "offset " << *Position_.Offset;
+ first = false;
+ }
+ if (Position_.Line) {
+ if (!first) {
+ stream << ", ";
+ }
+ stream << "line " << *Position_.Line;
+ first = false;
+ }
+ if (Position_.Column) {
+ if (!first) {
+ stream << ", ";
+ }
+ stream << "column " << *Position_.Column;
+ }
+ }
+ stream << ": " << Message_;
+ FormattedMessage_ = stream;
+ return FormattedMessage_.c_str();
+}
+
+NYsonPull::NException::TSystemError::TSystemError()
+ : SavedErrno_{errno} {
+}
+
+const char* NYsonPull::NException::TSystemError::what() const noexcept {
+ return ::strerror(SavedErrno_);
+}
diff --git a/library/cpp/yson_pull/exceptions.h b/library/cpp/yson_pull/exceptions.h
new file mode 100644
index 0000000000..ebfed950a5
--- /dev/null
+++ b/library/cpp/yson_pull/exceptions.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include "position_info.h"
+
+#include <util/generic/string.h>
+
+#include <stdexcept>
+#include <string>
+
+namespace NYsonPull {
+ namespace NException {
+ class TBadStream: public std::exception {
+ TString Message_;
+ TPositionInfo Position_;
+ mutable TString FormattedMessage_;
+
+ public:
+ TBadStream(
+ TString message,
+ const TPositionInfo& position)
+ : Message_(std::move(message))
+ , Position_(position)
+ {
+ }
+
+ const TPositionInfo& Position() const {
+ return Position_;
+ }
+
+ const char* what() const noexcept override;
+ };
+
+ class TBadInput: public TBadStream {
+ public:
+ using TBadStream::TBadStream;
+ };
+
+ class TBadOutput: public TBadStream {
+ public:
+ using TBadStream::TBadStream;
+ };
+
+ class TSystemError: public std::exception {
+ int SavedErrno_;
+
+ public:
+ TSystemError();
+ TSystemError(int saved_errno)
+ : SavedErrno_{saved_errno} {
+ }
+
+ int saved_errno() const noexcept {
+ return SavedErrno_;
+ }
+
+ const char* what() const noexcept override;
+ };
+ }
+}
diff --git a/library/cpp/yson_pull/input.cpp b/library/cpp/yson_pull/input.cpp
new file mode 100644
index 0000000000..1373c89868
--- /dev/null
+++ b/library/cpp/yson_pull/input.cpp
@@ -0,0 +1,33 @@
+#include "input.h"
+
+#include <library/cpp/yson_pull/detail/input/stdio_file.h>
+#include <library/cpp/yson_pull/detail/input/stream.h>
+
+#include <util/generic/ptr.h>
+#include <util/stream/file.h>
+#include <util/stream/mem.h>
+
+using namespace NYsonPull::NInput;
+using namespace NYsonPull::NDetail::NInput;
+
+namespace NInput = NYsonPull::NInput;
+
+THolder<IStream> NInput::FromStdioFile(FILE* file, size_t buffer_size) {
+ return MakeHolder<TStdioFile>(file, buffer_size);
+}
+
+THolder<IStream> NInput::FromPosixFd(int fd, size_t buffer_size) {
+ return MakeHolder<TFHandle>(fd, buffer_size);
+}
+
+THolder<IStream> NInput::FromMemory(TStringBuf data) {
+ return MakeHolder<TOwned<TMemoryInput>>(data);
+}
+
+THolder<IStream> NInput::FromInputStream(IInputStream* input, size_t buffer_size) {
+ return MakeHolder<TOwned<TBufferedInput>>(input, buffer_size);
+}
+
+THolder<IStream> NInput::FromZeroCopyInput(IZeroCopyInput* input) {
+ return MakeHolder<TZeroCopy>(input);
+}
diff --git a/library/cpp/yson_pull/input.h b/library/cpp/yson_pull/input.h
new file mode 100644
index 0000000000..2cdfae857e
--- /dev/null
+++ b/library/cpp/yson_pull/input.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include "buffer.h"
+
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+#include <cstddef>
+#include <memory>
+
+class IInputStream;
+class IZeroCopyInput;
+
+namespace NYsonPull {
+ namespace NInput {
+ //! \brief Input stream adaptor interface.
+ //!
+ //! Represents a model of a chunked input data stream.
+ class IStream {
+ input_buffer buffer_;
+ bool at_end_ = false;
+
+ public:
+ virtual ~IStream() = default;
+
+ bool at_end() const {
+ return at_end_;
+ }
+
+ input_buffer& buffer() noexcept {
+ return buffer_;
+ }
+ const input_buffer& buffer() const noexcept {
+ return buffer_;
+ }
+
+ void fill_buffer() {
+ while (buffer_.is_empty() && !at_end()) {
+ at_end_ = do_fill_buffer() == result::at_end;
+ }
+ }
+
+ protected:
+ enum class result {
+ have_more_data, //! May continue reading
+ at_end, //! Reached end of stream
+ };
+
+ //! \brief Read next chunk of data.
+ //!
+ //! The implementation is to discard the buffer contents
+ //! and reset the buffer to a next chunk of data.
+ //! End-of-stream condition is to be reported via return value.
+ //!
+ //! Read is assumed to always succeed unless it throws an exception.
+ virtual result do_fill_buffer() = 0;
+ };
+
+ //! \brief Read data from a contiguous memory block (i.e. a string)
+ //!
+ //! Does not take ownership on memory.
+ THolder<IStream> FromMemory(TStringBuf data);
+
+ //! \brief Read data from C FILE* object.
+ //!
+ //! Does not take ownership on file object.
+ //! Data is buffered internally regardless of file buffering.
+ THolder<IStream> FromStdioFile(FILE* file, size_t buffer_size = 65536);
+
+ //! \brief Read data from POSIX file descriptor.
+ //!
+ //! Does not take ownership on streambuf.
+ THolder<IStream> FromPosixFd(int fd, size_t buffer_size = 65536);
+
+ THolder<IStream> FromZeroCopyInput(IZeroCopyInput* input);
+
+ THolder<IStream> FromInputStream(IInputStream* input, size_t buffer_size = 65536);
+ }
+}
diff --git a/library/cpp/yson_pull/output.cpp b/library/cpp/yson_pull/output.cpp
new file mode 100644
index 0000000000..27c9ef9e69
--- /dev/null
+++ b/library/cpp/yson_pull/output.cpp
@@ -0,0 +1,29 @@
+#include "output.h"
+
+#include <library/cpp/yson_pull/detail/output/stdio_file.h>
+#include <library/cpp/yson_pull/detail/output/stream.h>
+
+#include <util/generic/ptr.h>
+#include <util/stream/file.h>
+#include <util/stream/str.h>
+
+using namespace NYsonPull::NOutput;
+using namespace NYsonPull::NDetail::NOutput;
+
+namespace NOutput = NYsonPull::NOutput;
+
+THolder<IStream> NOutput::FromStdioFile(FILE* file, size_t buffer_size) {
+ return MakeHolder<TStdioFile>(file, buffer_size);
+}
+
+THolder<IStream> NOutput::FromPosixFd(int fd, size_t buffer_size) {
+ return MakeHolder<TFHandle>(fd, buffer_size);
+}
+
+THolder<IStream> NOutput::FromString(TString* output, size_t buffer_size) {
+ return MakeHolder<TOwned<TStringOutput>>(buffer_size, *output);
+}
+
+THolder<IStream> NOutput::FromOutputStream(IOutputStream* output, size_t buffer_size) {
+ return MakeHolder<TStream>(output, buffer_size);
+}
diff --git a/library/cpp/yson_pull/output.h b/library/cpp/yson_pull/output.h
new file mode 100644
index 0000000000..2d78107a93
--- /dev/null
+++ b/library/cpp/yson_pull/output.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include "buffer.h"
+
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+#include <cstddef>
+#include <cstdio>
+#include <cstring>
+#include <memory>
+
+//! \brief Output stream adaptor interface.
+//!
+//! Represents a model of an optionally-buffered writer.
+namespace NYsonPull {
+ namespace NOutput {
+ class IStream {
+ output_buffer buffer_;
+
+ public:
+ virtual ~IStream() = default;
+
+ output_buffer& buffer() noexcept {
+ return buffer_;
+ }
+ const output_buffer& buffer() const noexcept {
+ return buffer_;
+ }
+
+ void flush_buffer(TStringBuf extra = {}) {
+ if (!extra.empty() || !buffer_.is_empty()) {
+ do_flush_buffer(extra);
+ }
+ while (!buffer_.is_empty()) {
+ do_flush_buffer({});
+ }
+ }
+
+ protected:
+ //! \brief Flush data to underlying stream.
+ //!
+ //! The implementation is to flush the buffer contents AND
+ //! extra argument to underlying stream.
+ //!
+ //! This way, at zero buffer size this interface implements an unbuffered
+ //! stream (with an added cost of a virtual call per each write).
+ //!
+ //! Write is assumed to always succeed unless it throws an exception.
+ virtual void do_flush_buffer(TStringBuf extra) = 0;
+ };
+
+ //! \brief Write data to C FILE* object.
+ THolder<IStream> FromStdioFile(FILE* file, size_t buffer_size = 0);
+
+ //! \brief Write data to POSIX file descriptor
+ THolder<IStream> FromPosixFd(int fd, size_t buffer_size = 65536);
+
+ THolder<IStream> FromOutputStream(IOutputStream* output, size_t buffer_size = 65536);
+
+ THolder<IStream> FromString(TString* output, size_t buffer_size = 1024);
+ }
+}
diff --git a/library/cpp/yson_pull/position_info.h b/library/cpp/yson_pull/position_info.h
new file mode 100644
index 0000000000..a65c4663a9
--- /dev/null
+++ b/library/cpp/yson_pull/position_info.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <util/generic/maybe.h>
+#include <util/system/types.h>
+
+namespace NYsonPull {
+ struct TPositionInfo {
+ TMaybe<ui64> Offset;
+ TMaybe<ui64> Line;
+ TMaybe<ui64> Column;
+
+ TPositionInfo() = default;
+ TPositionInfo(
+ TMaybe<ui64> offset_,
+ TMaybe<ui64> line_ = Nothing(),
+ TMaybe<ui64> column_ = Nothing())
+ : Offset{offset_}
+ , Line{line_}
+ , Column{column_} {
+ }
+ };
+
+}
diff --git a/library/cpp/yson_pull/range.h b/library/cpp/yson_pull/range.h
new file mode 100644
index 0000000000..f4fcf3f206
--- /dev/null
+++ b/library/cpp/yson_pull/range.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "reader.h"
+
+#include <util/generic/iterator.h>
+
+namespace NYsonPull {
+ class TStreamEventsRange: public TInputRangeAdaptor<TStreamEventsRange> {
+ TReader Reader_;
+ bool AtEnd;
+
+ public:
+ TStreamEventsRange(THolder<NInput::IStream> stream, EStreamType mode)
+ : Reader_{std::move(stream), mode}
+ , AtEnd(false)
+ {
+ }
+
+ const TEvent* Last() const noexcept {
+ return &Reader_.LastEvent();
+ }
+
+ const TEvent* Next() {
+ if (Y_UNLIKELY(AtEnd)) {
+ return nullptr;
+ }
+
+ auto* event = &Reader_.NextEvent();
+ if (event->Type() == EEventType::EndStream) {
+ AtEnd = true;
+ }
+ return event;
+ }
+ };
+}
diff --git a/library/cpp/yson_pull/read_ops.cpp b/library/cpp/yson_pull/read_ops.cpp
new file mode 100644
index 0000000000..9d7e6a4a2d
--- /dev/null
+++ b/library/cpp/yson_pull/read_ops.cpp
@@ -0,0 +1,66 @@
+#include "read_ops.h"
+
+using namespace NYsonPull;
+using namespace NYsonPull::NReadOps;
+
+namespace {
+ bool TrySkipValueUntil(EEventType end, TReader& reader) {
+ const auto& event = reader.NextEvent();
+ if (event.Type() == end) {
+ return false;
+ }
+ SkipCurrentValue(event, reader);
+ return true;
+ }
+
+ bool TrySkipKeyValueUntil(EEventType end, TReader& reader) {
+ const auto& event = reader.NextEvent();
+ if (event.Type() == end) {
+ return false;
+ }
+ Expect(event, EEventType::Key);
+ SkipValue(reader);
+ return true;
+ }
+}
+
+void NYsonPull::NReadOps::SkipCurrentValue(const TEvent& event, TReader& reader) {
+ switch (event.Type()) {
+ case EEventType::BeginList:
+ while (TrySkipValueUntil(EEventType::EndList, reader)) {
+ }
+ return;
+
+ case EEventType::BeginMap:
+ while (TrySkipKeyValueUntil(EEventType::EndMap, reader)) {
+ }
+ return;
+
+ case EEventType::BeginAttributes:
+ while (TrySkipKeyValueUntil(EEventType::EndAttributes, reader)) {
+ }
+ // attributes after attributes are disallowed in TReader
+ SkipValue(reader);
+ return;
+
+ case EEventType::Scalar:
+ return;
+
+ default:
+ throw yexception() << "Unexpected event: " << event;
+ }
+}
+
+void NYsonPull::NReadOps::SkipValue(TReader& reader) {
+ const auto& event = reader.NextEvent();
+ SkipCurrentValue(event, reader);
+}
+
+void NYsonPull::NReadOps::SkipControlRecords(TReader& reader) {
+ const auto* event = &reader.LastEvent();
+ while (event->Type() == EEventType::BeginAttributes) {
+ SkipCurrentValue(*event, reader);
+ event = &reader.NextEvent();
+ }
+ Expect(*event, EEventType::BeginMap);
+}
diff --git a/library/cpp/yson_pull/read_ops.h b/library/cpp/yson_pull/read_ops.h
new file mode 100644
index 0000000000..5c084983ea
--- /dev/null
+++ b/library/cpp/yson_pull/read_ops.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#include "reader.h"
+
+#include <util/generic/maybe.h>
+#include <util/generic/bt_exception.h>
+#include <util/generic/yexception.h>
+#include <util/system/yassert.h>
+
+/** Imperative recursive-descent parsing helpers.
+ *
+ * These functions help verify conditions and advance parser state.
+ * For aggregate parsing functions, common precondition is to require Begin{X}
+ * event prior to function invocation. Thus, parsers are composable by calling
+ * sub-parser after dispatching on opening event, e.g.:
+ *
+ * if (reader.LastEvent().Type() == EEventType::BeginMap) {
+ * ReadSomeMap(reader)
+ * }
+ *
+ */
+
+namespace NYsonPull {
+ namespace NReadOps {
+ class TExpectationFailure: public TWithBackTrace<yexception> {
+ };
+
+ inline void Expect(const TEvent& got, EEventType expected) {
+ Y_ENSURE_EX(
+ got.Type() == expected,
+ TExpectationFailure() << "expected " << expected << ", got " << got);
+ }
+
+ inline void Expect(const TScalar& got, EScalarType expected) {
+ Y_ENSURE_EX(
+ got.Type() == expected,
+ TExpectationFailure() << "expected scalar " << expected << ", got " << got);
+ }
+
+ // ExpectBegin{X} functions verify that last event WAS X
+ // SkipBegin{X} functions verify that next event WILL BE X and CONSUME it
+
+ inline void ExpectBeginStream(TReader& reader) {
+ Expect(reader.LastEvent(), EEventType::BeginStream);
+ }
+
+ inline void SkipBeginStream(TReader& reader) {
+ Expect(reader.NextEvent(), EEventType::BeginStream);
+ }
+
+ inline void ExpectBeginMap(TReader& reader) {
+ Expect(reader.LastEvent(), EEventType::BeginMap);
+ }
+
+ inline void SkipBeginMap(TReader& reader) {
+ Expect(reader.NextEvent(), EEventType::BeginMap);
+ }
+
+ inline void ExpectBeginList(TReader& reader) {
+ Expect(reader.LastEvent(), EEventType::BeginList);
+ }
+
+ inline void SkipBeginList(TReader& reader) {
+ Expect(reader.NextEvent(), EEventType::BeginList);
+ }
+
+ inline bool ReadListItem(TReader& reader) {
+ return reader.NextEvent().Type() != EEventType::EndList;
+ }
+
+ inline TMaybe<TStringBuf> ReadKey(TReader& reader) {
+ const auto& event = reader.NextEvent();
+ switch (event.Type()) {
+ case EEventType::Key:
+ return event.AsString();
+ case EEventType::EndMap:
+ return Nothing();
+ default:
+ ythrow yexception() << "Unexpected event: " << event;
+ }
+ }
+
+ template <typename T = const TScalar&>
+ inline T ReadScalar(TReader& reader);
+
+ template <>
+ inline const TScalar& ReadScalar<const TScalar&>(TReader& reader) {
+ const auto& event = reader.NextEvent();
+ Expect(event, EEventType::Scalar);
+ return event.AsScalar();
+ }
+
+ template <>
+ inline i64 ReadScalar<i64>(TReader& reader) {
+ const auto& scalar = ReadScalar(reader);
+ Expect(scalar, EScalarType::Int64);
+ return scalar.AsInt64();
+ }
+
+ template <>
+ inline ui64 ReadScalar<ui64>(TReader& reader) {
+ const auto& scalar = ReadScalar(reader);
+ Expect(scalar, EScalarType::UInt64);
+ return scalar.AsUInt64();
+ }
+
+ template <>
+ inline double ReadScalar<double>(TReader& reader) {
+ const auto& scalar = ReadScalar(reader);
+ Expect(scalar, EScalarType::Float64);
+ return scalar.AsFloat64();
+ }
+
+ template <>
+ inline TStringBuf ReadScalar<TStringBuf>(TReader& reader) {
+ const auto& scalar = ReadScalar(reader);
+ Expect(scalar, EScalarType::String);
+ return scalar.AsString();
+ }
+
+ template <>
+ inline TString ReadScalar<TString>(TReader& reader) {
+ return TString(ReadScalar<TStringBuf>(reader));
+ }
+
+ template <>
+ inline bool ReadScalar<bool>(TReader& reader) {
+ const auto& scalar = ReadScalar(reader);
+ Expect(scalar, EScalarType::Boolean);
+ return scalar.AsBoolean();
+ }
+
+ // Skip value that was already started with `event`
+ void SkipCurrentValue(const TEvent& event, TReader& reader);
+
+ // Skip value that starts at `reader.next_event()`
+ void SkipValue(TReader& reader);
+
+ // Skip values with attributes, wait for map value
+ void SkipControlRecords(TReader& reader);
+ }
+}
diff --git a/library/cpp/yson_pull/reader.cpp b/library/cpp/yson_pull/reader.cpp
new file mode 100644
index 0000000000..ea26852756
--- /dev/null
+++ b/library/cpp/yson_pull/reader.cpp
@@ -0,0 +1,27 @@
+#include "reader.h"
+#include <library/cpp/yson_pull/detail/reader.h>
+
+using namespace NYsonPull;
+
+TReader::TReader(
+ THolder<NInput::IStream> stream,
+ EStreamType mode)
+ : Stream_{std::move(stream)}
+ , Impl_{MakeHolder<NDetail::reader_impl>(*Stream_, mode)} {
+}
+
+TReader::TReader(TReader&& other) noexcept
+ : Stream_{std::move(other.Stream_)}
+ , Impl_{std::move(other.Impl_)} {
+}
+
+TReader::~TReader() {
+}
+
+const TEvent& TReader::NextEvent() {
+ return Impl_->next_event();
+}
+
+const TEvent& TReader::LastEvent() const noexcept {
+ return Impl_->last_event();
+}
diff --git a/library/cpp/yson_pull/reader.h b/library/cpp/yson_pull/reader.h
new file mode 100644
index 0000000000..f839b19071
--- /dev/null
+++ b/library/cpp/yson_pull/reader.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "event.h"
+#include "input.h"
+#include "stream_type.h"
+
+#include <util/system/yassert.h>
+
+#include <memory>
+
+namespace NYsonPull {
+ namespace NDetail {
+ class reader_impl;
+ }
+
+ //! \brief YSON reader facade class.
+ //!
+ //! Owns an input stream.
+ class TReader {
+ THolder<NInput::IStream> Stream_;
+ THolder<NDetail::reader_impl> Impl_;
+
+ public:
+ TReader(THolder<NInput::IStream> stream, EStreamType mode);
+ TReader(TReader&&) noexcept;
+ ~TReader();
+
+ //! \brief Advance stream to next event and return it.
+ //!
+ //! Any event data is invalidated by a call to NextEvent();
+ const TEvent& NextEvent();
+
+ //! \brief Get last returned event.
+ const TEvent& LastEvent() const noexcept;
+ };
+
+}
diff --git a/library/cpp/yson_pull/scalar.cpp b/library/cpp/yson_pull/scalar.cpp
new file mode 100644
index 0000000000..4325542e7a
--- /dev/null
+++ b/library/cpp/yson_pull/scalar.cpp
@@ -0,0 +1,57 @@
+#include "scalar.h"
+
+#include <library/cpp/yson_pull/detail/cescape.h>
+
+#include <util/stream/output.h>
+
+using namespace NYsonPull;
+
+template <>
+void Out<TScalar>(IOutputStream& out, const TScalar& value) {
+ out << '(' << value.Type();
+ if (value.Type() != EScalarType::Entity) {
+ out << ' ';
+ }
+ switch (value.Type()) {
+ case EScalarType::Boolean:
+ out << (value.AsBoolean() ? "true" : "false");
+ break;
+ case EScalarType::String:
+ out << NYsonPull::NDetail::NCEscape::quote(value.AsString());
+ break;
+ case EScalarType::Int64:
+ out << value.AsInt64();
+ break;
+ case EScalarType::UInt64:
+ out << value.AsUInt64();
+ break;
+ case EScalarType::Float64:
+ out << value.AsFloat64();
+ break;
+ default:
+ break;
+ }
+ out << ')';
+}
+
+bool NYsonPull::operator==(const TScalar& left, const TScalar& right) noexcept {
+ if (left.Type() != right.Type()) {
+ return false;
+ }
+ switch (left.Type()) {
+ case EScalarType::Boolean:
+ return left.AsBoolean() == right.AsBoolean();
+ case EScalarType::String:
+ return left.AsString() == right.AsString();
+ case EScalarType::Int64:
+ return left.AsInt64() == right.AsInt64();
+ case EScalarType::UInt64:
+ return left.AsUInt64() == right.AsUInt64();
+ case EScalarType::Float64:
+ return left.AsFloat64() == right.AsFloat64();
+ case EScalarType::Entity:
+ return true;
+ default:
+ Y_UNREACHABLE();
+ }
+}
diff --git a/library/cpp/yson_pull/scalar.h b/library/cpp/yson_pull/scalar.h
new file mode 100644
index 0000000000..509fce8b5e
--- /dev/null
+++ b/library/cpp/yson_pull/scalar.h
@@ -0,0 +1,146 @@
+#pragma once
+
+#include "cyson_enums.h"
+
+#include <util/generic/strbuf.h>
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+namespace NYsonPull {
+ //! \brief YSON TScalar value type tag
+ enum class EScalarType {
+ Entity = YSON_SCALAR_ENTITY,
+ Boolean = YSON_SCALAR_BOOLEAN,
+ Int64 = YSON_SCALAR_INT64,
+ UInt64 = YSON_SCALAR_UINT64,
+ Float64 = YSON_SCALAR_FLOAT64,
+ String = YSON_SCALAR_STRING,
+ };
+
+ //! \brief YSON TScalar value variant
+ class TScalar {
+ //! \internal \brief YSON TScalar value underlying representation
+ union TScalarValue {
+ struct TScalarStringRef {
+ const char* Data;
+ size_t Size;
+ };
+
+ ui8 AsNothing[1];
+ bool AsBoolean;
+ i64 AsInt64;
+ ui64 AsUInt64;
+ double AsFloat64;
+ TScalarStringRef AsString;
+
+ constexpr TScalarValue()
+ : AsNothing{} {
+ }
+
+ explicit constexpr TScalarValue(bool value)
+ : AsBoolean{value} {
+ }
+
+ explicit constexpr TScalarValue(i64 value)
+ : AsInt64{value} {
+ }
+
+ explicit constexpr TScalarValue(ui64 value)
+ : AsUInt64{value} {
+ }
+
+ explicit constexpr TScalarValue(double value)
+ : AsFloat64{value} {
+ }
+
+ explicit constexpr TScalarValue(TStringBuf value)
+ : AsString{value.data(), value.size()} {
+ }
+ };
+ static_assert(
+ sizeof(TScalarValue) == sizeof(TStringBuf),
+ "bad scalar_value size");
+
+ EScalarType Type_;
+ TScalarValue Value_;
+
+ public:
+ constexpr TScalar()
+ : Type_{EScalarType::Entity} {
+ }
+
+ explicit constexpr TScalar(bool value)
+ : Type_{EScalarType::Boolean}
+ , Value_{value} {
+ }
+
+ explicit constexpr TScalar(i64 value)
+ : Type_{EScalarType::Int64}
+ , Value_{value} {
+ }
+
+ explicit constexpr TScalar(ui64 value)
+ : Type_{EScalarType::UInt64}
+ , Value_{value} {
+ }
+
+ explicit constexpr TScalar(double value)
+ : Type_{EScalarType::Float64}
+ , Value_{value} {
+ }
+
+ explicit constexpr TScalar(TStringBuf value)
+ : Type_{EScalarType::String}
+ , Value_{value} {
+ }
+
+ // Disambiguation for literal constants
+ // In the absence of this constructor,
+ // they get implicitly converted to bool (yikes!)
+ explicit TScalar(const char* value)
+ : Type_{EScalarType::String}
+ , Value_{TStringBuf{value}} {
+ }
+
+ EScalarType Type() const {
+ return Type_;
+ }
+
+#define CAST_TO(Type) \
+ Y_ASSERT(Type_ == EScalarType::Type); \
+ return Value_.As##Type
+
+ bool AsBoolean() const {
+ CAST_TO(Boolean);
+ }
+ i64 AsInt64() const {
+ CAST_TO(Int64);
+ }
+ ui64 AsUInt64() const {
+ CAST_TO(UInt64);
+ }
+ double AsFloat64() const {
+ CAST_TO(Float64);
+ }
+#undef CAST_TO
+
+ TStringBuf AsString() const {
+ Y_ASSERT(Type_ == EScalarType::String);
+ return TStringBuf{
+ Value_.AsString.Data,
+ Value_.AsString.Size,
+ };
+ }
+
+ const TScalarValue& AsUnsafeValue() const {
+ return Value_;
+ }
+ };
+
+ bool operator==(const TScalar& left, const TScalar& right) noexcept;
+
+ inline bool operator!=(const TScalar& left, const TScalar& right) noexcept {
+ return !(left == right);
+ }
+
+}
diff --git a/library/cpp/yson_pull/stream_type.h b/library/cpp/yson_pull/stream_type.h
new file mode 100644
index 0000000000..beac87fe1b
--- /dev/null
+++ b/library/cpp/yson_pull/stream_type.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "cyson_enums.h"
+
+namespace NYsonPull {
+ enum class EStreamType {
+ Node = YSON_STREAM_TYPE_NODE,
+ ListFragment = YSON_STREAM_TYPE_LIST_FRAGMENT,
+ MapFragment = YSON_STREAM_TYPE_MAP_FRAGMENT,
+ };
+}
diff --git a/library/cpp/yson_pull/ut/cescape_ut.cpp b/library/cpp/yson_pull/ut/cescape_ut.cpp
new file mode 100644
index 0000000000..6628ba1d15
--- /dev/null
+++ b/library/cpp/yson_pull/ut/cescape_ut.cpp
@@ -0,0 +1,71 @@
+#include <library/cpp/yson_pull/detail/cescape.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NYsonPull::NDetail;
+
+namespace {
+ void test_roundtrip(const TVector<ui8>& str) {
+ TStringBuf str_buf(
+ reinterpret_cast<const char*>(str.data()),
+ str.size());
+ auto tmp = NCEscape::encode(str_buf);
+ auto dest = NCEscape::decode(tmp);
+ UNIT_ASSERT_VALUES_EQUAL_C(
+ str_buf, TStringBuf(dest),
+ "A[" << str.size() << "]: " << str_buf << '\n'
+ << "B[" << tmp.size() << "]: " << tmp << '\n'
+ << "C[" << dest.size() << "]: " << dest);
+ }
+
+ template <size_t N>
+ void test_exhaustive(TVector<ui8>& str) {
+ for (int i = 0; i < 256; ++i) {
+ str[str.size() - N] = static_cast<char>(i);
+ test_exhaustive<N - 1>(str);
+ }
+ }
+
+ template <>
+ void test_exhaustive<0>(TVector<ui8>& str) {
+ test_roundtrip(str);
+ }
+
+ template <size_t N>
+ void test_exhaustive() {
+ TVector<ui8> str(N, ' ');
+ test_exhaustive<N>(str);
+ }
+
+} // anonymous namespace
+
+Y_UNIT_TEST_SUITE(CEscape) {
+ Y_UNIT_TEST(ExhaustiveOneChar) {
+ test_exhaustive<1>();
+ }
+
+ Y_UNIT_TEST(ExhaustiveTwoChars) {
+ test_exhaustive<2>();
+ }
+
+ Y_UNIT_TEST(ExhaustiveThreeChars) {
+ test_exhaustive<3>();
+ }
+
+ Y_UNIT_TEST(SpecialEscapeEncode) {
+ //UNIT_ASSERT_VALUES_EQUAL(R"(\b)", NCEscape::encode("\b"));
+ //UNIT_ASSERT_VALUES_EQUAL(R"(\f)", NCEscape::encode("\f"));
+ UNIT_ASSERT_VALUES_EQUAL(R"(\n)", NCEscape::encode("\n"));
+ UNIT_ASSERT_VALUES_EQUAL(R"(\r)", NCEscape::encode("\r"));
+ UNIT_ASSERT_VALUES_EQUAL(R"(\t)", NCEscape::encode("\t"));
+ }
+
+ Y_UNIT_TEST(SpecialEscapeDecode) {
+ UNIT_ASSERT_VALUES_EQUAL("\b", NCEscape::decode(R"(\b)"));
+ UNIT_ASSERT_VALUES_EQUAL("\f", NCEscape::decode(R"(\f)"));
+ UNIT_ASSERT_VALUES_EQUAL("\n", NCEscape::decode(R"(\n)"));
+ UNIT_ASSERT_VALUES_EQUAL("\r", NCEscape::decode(R"(\r)"));
+ UNIT_ASSERT_VALUES_EQUAL("\t", NCEscape::decode(R"(\t)"));
+ }
+
+} // Y_UNIT_TEST_SUITE(CEscape)
diff --git a/library/cpp/yson_pull/ut/loop_ut.cpp b/library/cpp/yson_pull/ut/loop_ut.cpp
new file mode 100644
index 0000000000..8c7b11dd1c
--- /dev/null
+++ b/library/cpp/yson_pull/ut/loop_ut.cpp
@@ -0,0 +1,382 @@
+#include <library/cpp/yson_pull/input.h>
+#include <library/cpp/yson_pull/output.h>
+#include <library/cpp/yson_pull/reader.h>
+#include <library/cpp/yson_pull/writer.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <cerrno>
+#include <cmath>
+
+#ifdef _unix_
+#include <unistd.h>
+#include <sys/wait.h>
+#endif
+
+namespace {
+ constexpr const char* alphabet =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+ void generate(NYsonPull::TWriter& writer, size_t count) {
+ writer.BeginStream();
+ for (size_t i = 0; i < count; ++i) {
+ writer.BeginMap()
+ .Key("ints")
+ .BeginList()
+ .Int64(0)
+ .Int64(-1)
+ .Int64(1000)
+ .Int64(-1000)
+ .EndList()
+ .Key("uints")
+ .BeginList()
+ .UInt64(0)
+ .UInt64(1000)
+ .UInt64(10000000)
+ .EndList()
+ .Key("entities")
+ .BeginList()
+ .Entity()
+ .BeginAttributes()
+ .Key("color")
+ .String("blue")
+ .Key("size")
+ .Int64(100)
+ .EndAttributes()
+ .Entity()
+ .Entity()
+ .EndList()
+ .Key("booleans")
+ .BeginList()
+ .Boolean(true)
+ .Boolean(false)
+ .Boolean(true)
+ .EndList()
+ .Key("floats")
+ .BeginList()
+ .Float64(0.0)
+ .Float64(13.0e30)
+ .Float64(M_PI)
+ .EndList()
+ .Key("strings")
+ .BeginList()
+ .String("hello")
+ .String("")
+ .String("foo \"-bar-\" baz")
+ .String("oh\nwow")
+ .String(alphabet)
+ .EndList()
+ .EndMap();
+ }
+ writer.EndStream();
+ }
+
+#ifdef __clang__
+ // XXX: With all the macros below (esp. UNIT_ASSERT_VALUES_EQUAL) unfolded,
+ // the time it takes clang to optimize generated code becomes abysmal.
+ // Locally disabling optimization brings it back to normal.
+ __attribute__((optnone))
+#endif // __clang__
+ void
+ verify(NYsonPull::TReader& reader, size_t count) {
+#define NEXT(name__) \
+ { \
+ auto& name__ = reader.NextEvent(); // SCOPED_TRACE(e);
+#define END_NEXT }
+#define NEXT_TYPE(type__) \
+ NEXT(e) { \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::type__, e.Type()); \
+ } \
+ END_NEXT
+#define NEXT_KEY(key__) \
+ NEXT(e) { \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); \
+ UNIT_ASSERT_VALUES_EQUAL(key__, e.AsString()); \
+ } \
+ END_NEXT
+#define NEXT_SCALAR(type__, value__) \
+ NEXT(e) { \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::type__, e.AsScalar().Type()); \
+ UNIT_ASSERT_VALUES_EQUAL(value__, e.AsScalar().As##type__()); \
+ } \
+ END_NEXT
+#define NEXT_ENTITY() \
+ NEXT(e) { \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::Entity, e.AsScalar().Type()); \
+ } \
+ END_NEXT
+#define NEXT_FLOAT64(value__) \
+ NEXT(e) { \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); \
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::Float64, e.AsScalar().Type()); \
+ UNIT_ASSERT_DOUBLES_EQUAL(value__, e.AsScalar().AsFloat64(), 1e-5); \
+ } \
+ END_NEXT
+
+ constexpr auto true_ = true;
+ constexpr auto false_ = false;
+
+ NEXT_TYPE(BeginStream);
+ for (size_t i = 0; i < count; ++i) {
+ NEXT_TYPE(BeginMap);
+ NEXT_KEY("ints") {
+ NEXT_TYPE(BeginList);
+ NEXT_SCALAR(Int64, 0);
+ NEXT_SCALAR(Int64, -1);
+ NEXT_SCALAR(Int64, 1000);
+ NEXT_SCALAR(Int64, -1000);
+ NEXT_TYPE(EndList);
+ }
+ NEXT_KEY("uints") {
+ NEXT_TYPE(BeginList);
+ NEXT_SCALAR(UInt64, 0U);
+ NEXT_SCALAR(UInt64, 1000U);
+ NEXT_SCALAR(UInt64, 10000000U);
+ NEXT_TYPE(EndList);
+ }
+ NEXT_KEY("entities") {
+ NEXT_TYPE(BeginList);
+ NEXT_ENTITY();
+ NEXT_TYPE(BeginAttributes) {
+ NEXT_KEY("color") {
+ NEXT_SCALAR(String, "blue");
+ }
+ NEXT_KEY("size") {
+ NEXT_SCALAR(Int64, 100);
+ }
+ }
+ NEXT_TYPE(EndAttributes);
+ NEXT_ENTITY();
+ NEXT_ENTITY();
+ NEXT_TYPE(EndList);
+ }
+ NEXT_KEY("booleans") {
+ NEXT_TYPE(BeginList);
+ NEXT_SCALAR(Boolean, true_);
+ NEXT_SCALAR(Boolean, false_);
+ NEXT_SCALAR(Boolean, true_);
+ NEXT_TYPE(EndList);
+ }
+ NEXT_KEY("floats") {
+ NEXT_TYPE(BeginList);
+ NEXT_FLOAT64(0.0);
+ NEXT_FLOAT64(13.0e30);
+ NEXT_FLOAT64(M_PI);
+ NEXT_TYPE(EndList);
+ }
+ NEXT_KEY("strings") {
+ NEXT_TYPE(BeginList);
+ NEXT_SCALAR(String, "hello");
+ NEXT_SCALAR(String, "");
+ NEXT_SCALAR(String, "foo \"-bar-\" baz");
+ NEXT_SCALAR(String, "oh\nwow");
+ NEXT_SCALAR(String, alphabet);
+ NEXT_TYPE(EndList);
+ }
+ NEXT_TYPE(EndMap);
+ }
+ NEXT_TYPE(EndStream);
+
+#undef NEXT
+#undef END_NEXT
+#undef NEXT_TYPE
+#undef NEXT_KEY
+#undef NEXT_SCALAR
+ }
+
+ class sys_error {};
+
+ IOutputStream& operator<<(IOutputStream& stream, const sys_error&) {
+ stream << strerror(errno);
+ return stream;
+ }
+
+ NYsonPull::TReader make_reader(THolder<NYsonPull::NInput::IStream> stream) {
+ return NYsonPull::TReader(
+ std::move(stream),
+ NYsonPull::EStreamType::ListFragment);
+ }
+
+ template <typename Function>
+ void test_memory(Function make_writer, size_t nrepeat) {
+ TString text;
+ {
+ auto writer = make_writer(NYsonPull::NOutput::FromString(&text));
+ generate(writer, nrepeat);
+ }
+ {
+ auto reader = make_reader(NYsonPull::NInput::FromMemory(text));
+ verify(reader, nrepeat);
+ }
+ {
+ TStringInput input(text);
+ auto reader = make_reader(NYsonPull::NInput::FromInputStream(&input, /* buffer_size = */ 1));
+ verify(reader, nrepeat);
+ }
+ }
+
+#ifdef _unix_
+ template <typename Here, typename There>
+ void pipe(Here&& reader, There&& writer) {
+ int fildes[2];
+ UNIT_ASSERT_VALUES_EQUAL_C(0, ::pipe(fildes), sys_error());
+ auto read_fd = fildes[0];
+ auto write_fd = fildes[1];
+
+ auto pid = ::fork();
+ UNIT_ASSERT_C(pid >= 0, sys_error());
+ if (pid > 0) {
+ // parent
+ UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(write_fd), sys_error());
+ reader(read_fd);
+ UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(read_fd), sys_error());
+ } else {
+ // child
+ UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(read_fd), sys_error());
+ UNIT_ASSERT_NO_EXCEPTION(writer(write_fd));
+ UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(write_fd), sys_error());
+ ::exit(0);
+ }
+ int stat_loc;
+ UNIT_ASSERT_VALUES_EQUAL_C(pid, ::waitpid(pid, &stat_loc, 0), sys_error());
+ }
+
+ template <typename Function>
+ void test_posix_fd(
+ Function make_writer,
+ size_t nrepeat,
+ size_t read_buffer_size,
+ size_t write_buffer_size) {
+ pipe(
+ [&](int fd) {
+ auto reader = make_reader(NYsonPull::NInput::FromPosixFd(fd, read_buffer_size));
+ verify(reader, nrepeat);
+ },
+ [&](int fd) {
+ auto writer = make_writer(NYsonPull::NOutput::FromPosixFd(fd, write_buffer_size));
+ generate(writer, nrepeat);
+ });
+ }
+
+ template <typename Function>
+ void test_stdio_file(
+ Function make_writer,
+ size_t nrepeat,
+ size_t read_buffer_size,
+ size_t write_buffer_size) {
+ pipe(
+ [&](int fd) {
+ auto file = ::fdopen(fd, "rb");
+ UNIT_ASSERT_C(file != nullptr, sys_error());
+ auto reader = make_reader(NYsonPull::NInput::FromStdioFile(file, read_buffer_size));
+ verify(reader, nrepeat);
+ },
+ [&](int fd) {
+ auto file = ::fdopen(fd, "wb");
+ Y_UNUSED(write_buffer_size);
+ auto writer = make_writer(NYsonPull::NOutput::FromStdioFile(file, write_buffer_size));
+ generate(writer, nrepeat);
+ fflush(file);
+ });
+ }
+#endif
+
+ NYsonPull::TWriter text(THolder<NYsonPull::NOutput::IStream> stream) {
+ return NYsonPull::MakeTextWriter(
+ std::move(stream),
+ NYsonPull::EStreamType::ListFragment);
+ }
+
+ NYsonPull::TWriter pretty_text(THolder<NYsonPull::NOutput::IStream> stream) {
+ return NYsonPull::MakePrettyTextWriter(
+ std::move(stream),
+ NYsonPull::EStreamType::ListFragment);
+ }
+
+ NYsonPull::TWriter binary(THolder<NYsonPull::NOutput::IStream> stream) {
+ return NYsonPull::MakeBinaryWriter(
+ std::move(stream),
+ NYsonPull::EStreamType::ListFragment);
+ }
+
+} // anonymous namespace
+
+Y_UNIT_TEST_SUITE(Loop) {
+ Y_UNIT_TEST(memory_pretty_text) {
+ test_memory(pretty_text, 100);
+ }
+
+ Y_UNIT_TEST(memory_text) {
+ test_memory(text, 100);
+ }
+
+ Y_UNIT_TEST(memory_binary) {
+ test_memory(binary, 100);
+ }
+
+#ifdef _unix_
+ Y_UNIT_TEST(posix_fd_pretty_text_buffered) {
+ test_posix_fd(pretty_text, 100, 1024, 1024);
+ }
+
+ Y_UNIT_TEST(posix_fd_pretty_text_unbuffered) {
+ test_posix_fd(pretty_text, 100, 1, 0);
+ }
+
+ Y_UNIT_TEST(posix_fd_text_buffered) {
+ test_posix_fd(text, 100, 1024, 1024);
+ }
+
+ Y_UNIT_TEST(posix_fd_text_unbuffered) {
+ test_posix_fd(text, 100, 1, 0);
+ }
+
+ Y_UNIT_TEST(posix_fd_binary_buffered) {
+ test_posix_fd(binary, 100, 1024, 1024);
+ }
+
+ Y_UNIT_TEST(posix_fd_binary_unbuffered) {
+ test_posix_fd(binary, 100, 1, 0);
+ }
+
+ Y_UNIT_TEST(stdio_file_pretty_text_buffered) {
+ test_stdio_file(pretty_text, 100, 1024, 1024);
+ }
+
+ Y_UNIT_TEST(stdio_file_pretty_text_unbuffered) {
+ test_stdio_file(pretty_text, 100, 1, 0);
+ }
+
+ Y_UNIT_TEST(stdio_file_text_buffered) {
+ test_stdio_file(text, 100, 1024, 1024);
+ }
+
+ Y_UNIT_TEST(stdio_file_text_unbuffered) {
+ test_stdio_file(text, 100, 1, 0);
+ }
+
+ Y_UNIT_TEST(stdio_file_binary_buffered) {
+ test_stdio_file(binary, 100, 1024, 1024);
+ }
+
+ Y_UNIT_TEST(stdio_file_binary_unbuffered) {
+ test_stdio_file(binary, 100, 1, 0);
+ }
+#endif
+} // Y_UNIT_TEST_SUITE(Loop)
diff --git a/library/cpp/yson_pull/ut/reader_ut.cpp b/library/cpp/yson_pull/ut/reader_ut.cpp
new file mode 100644
index 0000000000..1184265ddb
--- /dev/null
+++ b/library/cpp/yson_pull/ut/reader_ut.cpp
@@ -0,0 +1,410 @@
+#include <library/cpp/yson_pull/exceptions.h>
+#include <library/cpp/yson_pull/range.h>
+#include <library/cpp/yson_pull/reader.h>
+#include <library/cpp/yson_pull/detail/cescape.h>
+#include <library/cpp/yson_pull/detail/macros.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace {
+ NYsonPull::TReader memory_reader(TStringBuf data, NYsonPull::EStreamType mode) {
+ return NYsonPull::TReader(
+ NYsonPull::NInput::FromMemory(data),
+ mode);
+ }
+
+ template <typename T>
+ void expect_scalar(const NYsonPull::TScalar& scalar, T value) {
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{value}, scalar);
+ }
+
+ template <>
+ void expect_scalar(const NYsonPull::TScalar& scalar, double value) {
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::Float64, scalar.Type());
+
+ auto scalarValue = scalar.AsFloat64();
+ auto message = TStringBuilder() << "expected " << value << ", got " << scalarValue;
+
+ if (std::isfinite(value)) {
+ UNIT_ASSERT_C(std::isfinite(scalarValue), message);
+ UNIT_ASSERT_DOUBLES_EQUAL(value, scalarValue, 1e-5);
+ } else if (std::isnan(value)) {
+ UNIT_ASSERT_C(std::isnan(scalarValue), message);
+ } else if (value > 0) {
+ UNIT_ASSERT_C(std::isinf(scalarValue) && (scalarValue > 0), message);
+ } else {
+ UNIT_ASSERT_C(std::isinf(scalarValue) && (scalarValue < 0), message);
+ }
+ }
+
+ template <typename T>
+ void test_scalar(TStringBuf data, T value) {
+ // SCOPED_TRACE(NYsonPull::detail::cescape::quote(data));
+ auto reader = memory_reader(data, NYsonPull::EStreamType::Node);
+
+ try {
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type());
+ {
+ auto& event = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, event.Type());
+ expect_scalar(event.AsScalar(), value);
+ }
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type());
+ } catch (const std::exception& err) {
+ UNIT_FAIL(err.what());
+ }
+ }
+
+ void consume(TStringBuf data, NYsonPull::EStreamType mode = NYsonPull::EStreamType::Node) {
+ // SCOPED_TRACE(NYsonPull::detail::cescape::quote(data));
+ auto input_range = NYsonPull::TStreamEventsRange(
+ NYsonPull::NInput::FromMemory(data),
+ mode);
+ for (auto& event : input_range) {
+ Y_UNUSED(event);
+ }
+ }
+
+#define ACCEPT(data) UNIT_ASSERT_NO_EXCEPTION(consume(data))
+#define REJECT(data) UNIT_ASSERT_EXCEPTION(consume(data), NYsonPull::NException::TBadInput)
+
+#define ACCEPT2(data, mode) UNIT_ASSERT_NO_EXCEPTION(consume(data, mode))
+#define REJECT2(data, mode) UNIT_ASSERT_EXCEPTION(consume(data, mode), NYsonPull::NException::TBadInput)
+
+} // anonymous namespace
+
+Y_UNIT_TEST_SUITE(Reader) {
+ Y_UNIT_TEST(ScalarEntity) {
+ test_scalar(TStringBuf("#"), NYsonPull::TScalar{});
+ }
+
+ Y_UNIT_TEST(ScalarBoolean) {
+ test_scalar(TStringBuf("%true"), true);
+ test_scalar(TStringBuf("%false"), false);
+
+ test_scalar(TStringBuf("\x05"sv), true);
+ test_scalar(TStringBuf("\x04"sv), false);
+
+ REJECT("%");
+ REJECT("%trueth");
+ REJECT("%tru");
+ REJECT("%falseth");
+ REJECT("%fals");
+ REJECT("%hithere");
+ }
+
+ Y_UNIT_TEST(ScalarInt64) {
+ test_scalar(TStringBuf("1"), i64{1});
+ test_scalar(TStringBuf("+1"), i64{1});
+ test_scalar(TStringBuf("100000"), i64{100000});
+ test_scalar(TStringBuf("+100000"), i64{100000});
+ test_scalar(TStringBuf("-100000"), i64{-100000});
+ test_scalar(TStringBuf("9223372036854775807"), i64{9223372036854775807});
+ test_scalar(TStringBuf("+9223372036854775807"), i64{9223372036854775807});
+
+ test_scalar(TStringBuf("\x02\x02"sv), i64{1});
+ test_scalar(TStringBuf("\x02\xc0\x9a\x0c"sv), i64{100000});
+ test_scalar(TStringBuf("\x02\xbf\x9a\x0c"sv), i64{-100000});
+ test_scalar(TStringBuf("\x02\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01"sv), i64{9223372036854775807});
+
+ REJECT("1a2");
+ REJECT("1-1-1-1");
+ REJECT("1+0");
+ }
+
+ Y_UNIT_TEST(SclarUInt64) {
+ test_scalar(TStringBuf("1u"), ui64{1});
+ test_scalar(TStringBuf("+1u"), ui64{1});
+ test_scalar(TStringBuf("100000u"), ui64{100000});
+ test_scalar(TStringBuf("+100000u"), ui64{100000});
+ test_scalar(TStringBuf("9223372036854775807u"), ui64{9223372036854775807u});
+ test_scalar(TStringBuf("+9223372036854775807u"), ui64{9223372036854775807u});
+ test_scalar(TStringBuf("18446744073709551615u"), ui64{18446744073709551615u});
+ test_scalar(TStringBuf("+18446744073709551615u"), ui64{18446744073709551615u});
+
+ REJECT("1a2u");
+ REJECT("1-1-1-1u");
+ REJECT("1+0u");
+
+ // TODO: binary
+ }
+
+ Y_UNIT_TEST(ScalarFloat64) {
+ test_scalar(TStringBuf("0.0"), double{0.0});
+ test_scalar(TStringBuf("+0.0"), double{0.0});
+ test_scalar(TStringBuf("+.0"), double{0.0});
+ test_scalar(TStringBuf("+.5"), double{0.5});
+ test_scalar(TStringBuf("-.5"), double{-0.5});
+ test_scalar(TStringBuf("1.0"), double{1.0});
+ test_scalar(TStringBuf("+1.0"), double{1.0});
+ test_scalar(TStringBuf("-1.0"), double{-1.0});
+ test_scalar(TStringBuf("1000.0"), double{1000.0});
+ test_scalar(TStringBuf("+1000.0"), double{1000.0});
+ test_scalar(TStringBuf("-1000.0"), double{-1000.0});
+ test_scalar(TStringBuf("1e12"), double{1e12});
+ test_scalar(TStringBuf("1e+12"), double{1e12});
+ test_scalar(TStringBuf("+1e+12"), double{1e12});
+ test_scalar(TStringBuf("-1e+12"), double{-1e12});
+ test_scalar(TStringBuf("1e-12"), double{1e-12});
+ test_scalar(TStringBuf("+1e-12"), double{1e-12});
+ test_scalar(TStringBuf("-1e-12"), double{-1e-12});
+
+ test_scalar(TStringBuf("\x03\x00\x00\x00\x00\x00\x00\x00\x00"sv), double{0.0});
+
+ test_scalar(
+ TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf8\x7f"sv),
+ double{std::numeric_limits<double>::quiet_NaN()});
+ test_scalar(
+ TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\x7f"sv),
+ double{std::numeric_limits<double>::infinity()});
+ test_scalar(
+ TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\xff"sv),
+ double{-std::numeric_limits<double>::infinity()});
+
+ test_scalar(
+ TStringBuf("%nan"),
+ double{std::numeric_limits<double>::quiet_NaN()});
+ test_scalar(
+ TStringBuf("%inf"),
+ double{std::numeric_limits<double>::infinity()});
+ test_scalar(
+ TStringBuf("%-inf"),
+ double{-std::numeric_limits<double>::infinity()});
+
+ REJECT("++0.0");
+ REJECT("++1.0");
+ REJECT("++.1");
+ REJECT("1.0.0");
+ //REJECT("1e+10000");
+ REJECT(TStringBuf("\x03\x00\x00\x00\x00\x00\x00\x00"sv));
+
+ // XXX: Questionable behaviour?
+ ACCEPT("+.0");
+ ACCEPT("-.0");
+ // XXX: Rejected on Mac OS, accepted on Linux (?!)
+ //REJECT(".0");
+ //REJECT(".5");
+
+ REJECT("%NaN");
+ REJECT("%+inf");
+ REJECT("%infinity");
+ REJECT("%na");
+ REJECT("%in");
+ REJECT("%-in");
+ }
+
+ Y_UNIT_TEST(ScalarString) {
+ test_scalar(TStringBuf(R"(foobar)"), TStringBuf("foobar"));
+ test_scalar(TStringBuf(R"(foobar11)"), TStringBuf("foobar11"));
+ test_scalar(TStringBuf(R"("foobar")"), TStringBuf("foobar"));
+ // wat? "\x0cf" parsed as a single char? no way!
+ test_scalar("\x01\x0c" "foobar"sv,
+ TStringBuf("foobar"));
+
+ REJECT(R"("foobar)");
+ REJECT("\x01\x0c" "fooba"sv);
+ REJECT("\x01\x0d" "foobar"sv); // negative length
+ }
+
+ Y_UNIT_TEST(EmptyList) {
+ auto reader = memory_reader("[]", NYsonPull::EStreamType::Node);
+
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginList, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndList, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type());
+
+ REJECT("[");
+ REJECT("]");
+ }
+
+ Y_UNIT_TEST(EmptyMap) {
+ auto reader = memory_reader("{}", NYsonPull::EStreamType::Node);
+
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginMap, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndMap, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type());
+
+ REJECT("{");
+ REJECT("}");
+ }
+
+ Y_UNIT_TEST(Sample) {
+ auto reader = memory_reader(
+ R"({"11"=11;"nothing"=#;"zero"=0.;"foo"="bar";"list"=[1;2;3]})",
+ NYsonPull::EStreamType::Node);
+
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginMap, reader.NextEvent().Type());
+
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(TStringBuf("11"), e.AsString());
+ }
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{11}}, e.AsScalar());
+ }
+
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(TStringBuf("nothing"), e.AsString());
+ }
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{}, e.AsScalar());
+ }
+
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(TStringBuf("zero"), e.AsString());
+ }
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{0.0}, e.AsScalar());
+ }
+
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(TStringBuf("foo"), e.AsString());
+ }
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{TStringBuf("bar")}, e.AsScalar());
+ }
+
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(TStringBuf("list"), e.AsString());
+ }
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginList, reader.NextEvent().Type());
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{1}}, e.AsScalar());
+ }
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{2}}, e.AsScalar());
+ }
+ {
+ auto& e = reader.NextEvent();
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{3}}, e.AsScalar());
+ }
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndList, reader.NextEvent().Type());
+
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndMap, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type());
+ }
+
+ Y_UNIT_TEST(Accept) {
+ ACCEPT("[]");
+ ACCEPT("{}");
+ ACCEPT("<>[]");
+ ACCEPT("<>{}");
+ ACCEPT("[{};{};{}]");
+ ACCEPT("[{};{};{};]");
+ ACCEPT("[<>{};<>{};<>{}]");
+ ACCEPT("[<>{};<>{};<>{};]");
+
+ ACCEPT("foo");
+ ACCEPT("[foo]");
+ ACCEPT("[foo;]");
+ ACCEPT("{foo=foo}");
+ ACCEPT("{foo=foo;}");
+ ACCEPT("<>{foo=foo}");
+ ACCEPT("{foo=<foo=foo>foo}");
+ ACCEPT("{foo=<foo=foo;>foo}");
+ ACCEPT("{foo=<foo=foo>[foo;foo]}");
+ }
+
+ Y_UNIT_TEST(Reject) {
+ REJECT("[");
+ REJECT("{");
+ REJECT("<");
+
+ REJECT("[[}]");
+ REJECT("<>{]");
+ REJECT("[>]");
+
+ REJECT("<><>[]");
+ REJECT("[<>;<>]");
+
+ REJECT("{<>foo=foo}");
+ REJECT("{foo=<>}");
+ REJECT("{foo}");
+
+ REJECT("<a=b>");
+ REJECT("<>");
+
+ REJECT("@");
+ }
+
+ Y_UNIT_TEST(ReadPastEnd) {
+ auto reader = memory_reader("#", NYsonPull::EStreamType::Node);
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, reader.NextEvent().Type());
+ UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type());
+ UNIT_ASSERT_EXCEPTION(reader.NextEvent(), NYsonPull::NException::TBadInput);
+ }
+
+ Y_UNIT_TEST(BadInput) {
+ // max_size<ui32> < varint size < max_size<ui64>
+ auto t = TString("\x01\xff\xff\xff\xff\xff\xff\xff\xff");
+ auto reader = memory_reader(t, NYsonPull::EStreamType::Node);
+
+ UNIT_ASSERT_EQUAL(reader.NextEvent().Type(), NYsonPull::EEventType::BeginStream);
+ UNIT_ASSERT_EXCEPTION(reader.NextEvent(), NYsonPull::NException::TBadInput);
+ }
+
+ Y_UNIT_TEST(StreamType) {
+ REJECT2("", NYsonPull::EStreamType::Node);
+ ACCEPT2("", NYsonPull::EStreamType::ListFragment);
+ ACCEPT2("", NYsonPull::EStreamType::MapFragment);
+
+ ACCEPT2("[1]", NYsonPull::EStreamType::Node);
+ ACCEPT2("[1]", NYsonPull::EStreamType::ListFragment);
+ REJECT2("[1]", NYsonPull::EStreamType::MapFragment);
+
+ ACCEPT2("<foo=bar>[1]", NYsonPull::EStreamType::Node);
+ ACCEPT2("<foo=bar>[1]", NYsonPull::EStreamType::ListFragment);
+ REJECT2("<foo=bar>[1]", NYsonPull::EStreamType::MapFragment);
+
+ ACCEPT2(" [1] \t \t ", NYsonPull::EStreamType::Node);
+ ACCEPT2(" [1] \t \t ", NYsonPull::EStreamType::ListFragment);
+ REJECT2(" [1] \t \t ", NYsonPull::EStreamType::MapFragment);
+
+ REJECT2("[1];", NYsonPull::EStreamType::Node);
+ ACCEPT2("[1];", NYsonPull::EStreamType::ListFragment);
+ REJECT2("[1];", NYsonPull::EStreamType::MapFragment);
+
+ REJECT2("[1]; foobar", NYsonPull::EStreamType::Node);
+ ACCEPT2("[1]; foobar", NYsonPull::EStreamType::ListFragment);
+ REJECT2("[1]; foobar", NYsonPull::EStreamType::MapFragment);
+
+ REJECT2("a=[1]", NYsonPull::EStreamType::Node);
+ REJECT2("a=[1]", NYsonPull::EStreamType::ListFragment);
+ ACCEPT2("a=[1]", NYsonPull::EStreamType::MapFragment);
+
+ REJECT2("a=[1]; ", NYsonPull::EStreamType::Node);
+ REJECT2("a=[1]; ", NYsonPull::EStreamType::ListFragment);
+ ACCEPT2("a=[1]; ", NYsonPull::EStreamType::MapFragment);
+
+ REJECT2("a=[1]; b=foobar", NYsonPull::EStreamType::Node);
+ REJECT2("a=[1]; b=foobar", NYsonPull::EStreamType::ListFragment);
+ ACCEPT2("a=[1]; b=foobar", NYsonPull::EStreamType::MapFragment);
+ }
+
+} // Y_UNIT_TEST_SUITE(Reader)
diff --git a/library/cpp/yson_pull/ut/writer_ut.cpp b/library/cpp/yson_pull/ut/writer_ut.cpp
new file mode 100644
index 0000000000..5c304bad0f
--- /dev/null
+++ b/library/cpp/yson_pull/ut/writer_ut.cpp
@@ -0,0 +1,256 @@
+#include <library/cpp/yson_pull/scalar.h>
+#include <library/cpp/yson_pull/detail/writer.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/string.h>
+
+#include <climits>
+#include <limits>
+
+using namespace std::string_view_literals;
+
+namespace {
+ template <typename Writer, typename Function>
+ TString with_writer(Function&& function) {
+ TString result;
+ auto writer = NYsonPull::NDetail::make_writer<Writer>(
+ NYsonPull::NOutput::FromString(&result),
+ NYsonPull::EStreamType::Node);
+
+ function(writer);
+
+ return result;
+ }
+
+ template <typename Writer>
+ TString to_yson_string(const NYsonPull::TScalar& value) {
+ return with_writer<Writer>([&](NYsonPull::TWriter& writer) {
+ writer.BeginStream().Scalar(value).EndStream();
+ });
+ }
+
+ template <typename T>
+ TString to_yson_binary_string(T&& value) {
+ return to_yson_string<NYsonPull::NDetail::TBinaryWriterImpl>(std::forward<T>(value));
+ }
+
+ template <typename T>
+ TString to_yson_text_string(T&& value) {
+ return to_yson_string<NYsonPull::NDetail::TTextWriterImpl>(std::forward<T>(value));
+ }
+
+} // anonymous namespace
+
+// =================== Text format =====================
+
+Y_UNIT_TEST_SUITE(Writer) {
+ Y_UNIT_TEST(TextEntity) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ "#",
+ to_yson_text_string(NYsonPull::TScalar{}));
+ }
+
+ Y_UNIT_TEST(TextBoolean) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ "%false",
+ to_yson_text_string(NYsonPull::TScalar{false}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "%true",
+ to_yson_text_string(NYsonPull::TScalar{true}));
+ }
+
+ Y_UNIT_TEST(TextInt64) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ "0",
+ to_yson_text_string(NYsonPull::TScalar{i64{0}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "200",
+ to_yson_text_string(NYsonPull::TScalar{i64{200}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "20000",
+ to_yson_text_string(NYsonPull::TScalar{i64{20000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "200000000",
+ to_yson_text_string(NYsonPull::TScalar{i64{200000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "20000000000000000",
+ to_yson_text_string(NYsonPull::TScalar{i64{20000000000000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "9223372036854775807",
+ to_yson_text_string(NYsonPull::TScalar{i64{INT64_MAX}}));
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ "-200",
+ to_yson_text_string(NYsonPull::TScalar{i64{-200}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "-20000",
+ to_yson_text_string(NYsonPull::TScalar{i64{-20000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "-200000000",
+ to_yson_text_string(NYsonPull::TScalar{i64{-200000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "-20000000000000000",
+ to_yson_text_string(NYsonPull::TScalar{i64{-20000000000000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "-9223372036854775808",
+ to_yson_text_string(NYsonPull::TScalar{i64{INT64_MIN}}));
+ }
+
+ Y_UNIT_TEST(TextUInt64) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ "0u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{0}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "200u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{200}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "20000u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{20000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "200000000u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{200000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "20000000000000000u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{20000000000000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "9223372036854775807u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{INT64_MAX}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "18446744073709551615u",
+ to_yson_text_string(NYsonPull::TScalar{ui64{UINT64_MAX}}));
+ }
+
+ Y_UNIT_TEST(TextFloat64) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ "%inf",
+ to_yson_text_string(NYsonPull::TScalar{std::numeric_limits<double>::infinity()}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "%-inf",
+ to_yson_text_string(NYsonPull::TScalar{-std::numeric_limits<double>::infinity()}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ "%nan",
+ to_yson_text_string(NYsonPull::TScalar{std::numeric_limits<double>::quiet_NaN()}));
+ }
+
+ Y_UNIT_TEST(TextString) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ R"("")",
+ to_yson_text_string(NYsonPull::TScalar{""}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ R"("hello")",
+ to_yson_text_string(NYsonPull::TScalar{"hello"}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ R"("hello\nworld")",
+ to_yson_text_string(NYsonPull::TScalar{"hello\nworld"}));
+ }
+
+ // =================== Binary format =====================
+
+ Y_UNIT_TEST(BinaryEntity) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ "#",
+ to_yson_binary_string(NYsonPull::TScalar{}));
+ }
+
+ Y_UNIT_TEST(BinaryBoolean) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x4"),
+ to_yson_binary_string(NYsonPull::TScalar{false}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x5"),
+ to_yson_binary_string(NYsonPull::TScalar{true}));
+ }
+
+ Y_UNIT_TEST(BinaryInt64) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\0"sv),
+ to_yson_binary_string(NYsonPull::TScalar{i64{0}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\x90\x3"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{200}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\xC0\xB8\x2"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{20000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\x80\x88\xDE\xBE\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{200000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\x80\x80\x90\xF8\x9B\xF9\x86G"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{20000000000000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{INT64_MAX}}));
+
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\x8F\x3"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{-200}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\xBF\xB8\x2"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{-20000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\xFF\x87\xDE\xBE\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{-200000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\xFF\xFF\x8F\xF8\x9B\xF9\x86G"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{-20000000000000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x2\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{i64{INT64_MIN}}));
+ }
+
+ Y_UNIT_TEST(BinaryUInt64) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\0"sv),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{0}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\xC8\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{200}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\xA0\x9C\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{20000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\x80\x84\xAF_"),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{200000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\x80\x80\x88\xFC\xCD\xBC\xC3#"),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{20000000000000000}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{INT64_MAX}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x6\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1"),
+ to_yson_binary_string(NYsonPull::TScalar{ui64{UINT64_MAX}}));
+ }
+
+ Y_UNIT_TEST(BinaryFloat64) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\x7f"sv),
+ to_yson_binary_string(NYsonPull::TScalar{std::numeric_limits<double>::infinity()}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\xff"sv),
+ to_yson_binary_string(NYsonPull::TScalar{-std::numeric_limits<double>::infinity()}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf8\x7f"sv),
+ to_yson_binary_string(NYsonPull::TScalar{std::numeric_limits<double>::quiet_NaN()}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x03\x9a\x99\x99\x99\x99\x99\xf1\x3f"),
+ to_yson_binary_string(NYsonPull::TScalar{double{1.1}}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x03\x9a\x99\x99\x99\x99\x99\xf1\xbf"),
+ to_yson_binary_string(NYsonPull::TScalar{double{-1.1}}));
+ }
+
+ Y_UNIT_TEST(BinaryString) {
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x1\0"sv),
+ to_yson_binary_string(NYsonPull::TScalar{""}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x1\nhello"),
+ to_yson_binary_string(NYsonPull::TScalar{"hello"}));
+ UNIT_ASSERT_VALUES_EQUAL(
+ TStringBuf("\x1\x16hello\nworld"),
+ to_yson_binary_string(NYsonPull::TScalar{"hello\nworld"}));
+ }
+
+} // Y_UNIT_TEST_SUITE(Writer)
diff --git a/library/cpp/yson_pull/ut/ya.make b/library/cpp/yson_pull/ut/ya.make
new file mode 100644
index 0000000000..a269dfd2ad
--- /dev/null
+++ b/library/cpp/yson_pull/ut/ya.make
@@ -0,0 +1,12 @@
+UNITTEST_FOR(library/cpp/yson_pull)
+
+OWNER(borman)
+
+SRCS(
+ cescape_ut.cpp
+ reader_ut.cpp
+ writer_ut.cpp
+ loop_ut.cpp
+)
+
+END()
diff --git a/library/cpp/yson_pull/writer.cpp b/library/cpp/yson_pull/writer.cpp
new file mode 100644
index 0000000000..1df92bf40f
--- /dev/null
+++ b/library/cpp/yson_pull/writer.cpp
@@ -0,0 +1,30 @@
+#include "writer.h"
+#include <library/cpp/yson_pull/detail/writer.h>
+
+using namespace NYsonPull;
+
+TWriter NYsonPull::MakeBinaryWriter(
+ THolder<NOutput::IStream> stream,
+ EStreamType mode) {
+ return NYsonPull::NDetail::make_writer<NYsonPull::NDetail::TBinaryWriterImpl>(
+ std::move(stream),
+ mode);
+}
+
+TWriter NYsonPull::MakeTextWriter(
+ THolder<NOutput::IStream> stream,
+ EStreamType mode) {
+ return NYsonPull::NDetail::make_writer<NYsonPull::NDetail::TTextWriterImpl>(
+ std::move(stream),
+ mode);
+}
+
+TWriter NYsonPull::MakePrettyTextWriter(
+ THolder<NOutput::IStream> stream,
+ EStreamType mode,
+ size_t indent_size) {
+ return NYsonPull::NDetail::make_writer<NYsonPull::NDetail::TPrettyWriterImpl>(
+ std::move(stream),
+ mode,
+ indent_size);
+}
diff --git a/library/cpp/yson_pull/writer.h b/library/cpp/yson_pull/writer.h
new file mode 100644
index 0000000000..dec63328be
--- /dev/null
+++ b/library/cpp/yson_pull/writer.h
@@ -0,0 +1,126 @@
+#pragma once
+
+#include "consumer.h"
+#include "output.h"
+#include "scalar.h"
+#include "stream_type.h"
+
+#include <memory>
+
+namespace NYsonPull {
+ //! \brief YSON writer facade class
+ //!
+ //! Owns a YSON consumer and a corresponding output stream.
+ //! Methods invoke corresponding \p NYsonPull::IConsumer methods and can be chained.
+ class TWriter {
+ THolder<NOutput::IStream> Stream_;
+ THolder<IConsumer> Impl_;
+
+ public:
+ TWriter(
+ THolder<NOutput::IStream> stream,
+ THolder<IConsumer> impl)
+ : Stream_{std::move(stream)}
+ , Impl_{std::move(impl)} {
+ }
+
+ //! \brief Get a reference to underlying consumer.
+ //!
+ //! Useful with \p NYsonPull::bridge
+ IConsumer& GetConsumer() {
+ return *Impl_;
+ }
+
+ TWriter& BeginStream() {
+ Impl_->OnBeginStream();
+ return *this;
+ }
+ TWriter& EndStream() {
+ Impl_->OnEndStream();
+ return *this;
+ }
+
+ TWriter& BeginList() {
+ Impl_->OnBeginList();
+ return *this;
+ }
+ TWriter& EndList() {
+ Impl_->OnEndList();
+ return *this;
+ }
+
+ TWriter& BeginMap() {
+ Impl_->OnBeginMap();
+ return *this;
+ }
+ TWriter& EndMap() {
+ Impl_->OnEndMap();
+ return *this;
+ }
+
+ TWriter& BeginAttributes() {
+ Impl_->OnBeginAttributes();
+ return *this;
+ }
+ TWriter& EndAttributes() {
+ Impl_->OnEndAttributes();
+ return *this;
+ }
+
+ TWriter& Key(TStringBuf name) {
+ Impl_->OnKey(name);
+ return *this;
+ }
+
+ TWriter& Entity() {
+ Impl_->OnEntity();
+ return *this;
+ }
+ TWriter& Boolean(bool value) {
+ Impl_->OnScalarBoolean(value);
+ return *this;
+ }
+ TWriter& Int64(i64 value) {
+ Impl_->OnScalarInt64(value);
+ return *this;
+ }
+ TWriter& UInt64(ui64 value) {
+ Impl_->OnScalarUInt64(value);
+ return *this;
+ }
+ TWriter& Float64(double value) {
+ Impl_->OnScalarFloat64(value);
+ return *this;
+ }
+ TWriter& String(TStringBuf value) {
+ Impl_->OnScalarString(value);
+ return *this;
+ }
+
+ TWriter& Scalar(const TScalar& value) {
+ Impl_->OnScalar(value);
+ return *this;
+ }
+ TWriter& Event(const TEvent& value) {
+ Impl_->OnEvent(value);
+ return *this;
+ }
+ };
+
+ //! \brief Construct a writer for binary YSON format.
+ TWriter MakeBinaryWriter(
+ THolder<NOutput::IStream> stream,
+ EStreamType mode);
+
+ //! \brief Construct a writer for text YSON format.
+ TWriter MakeTextWriter(
+ THolder<NOutput::IStream> stream,
+ EStreamType mode);
+
+ //! \brief Construct a writer for pretty text YSON format.
+ TWriter MakePrettyTextWriter(
+ THolder<NOutput::IStream> stream,
+ EStreamType mode,
+ size_t indent_size = 4);
+
+}
diff --git a/library/cpp/yson_pull/ya.make b/library/cpp/yson_pull/ya.make
new file mode 100644
index 0000000000..a373e0a6ba
--- /dev/null
+++ b/library/cpp/yson_pull/ya.make
@@ -0,0 +1,21 @@
+LIBRARY(yson_pull)
+
+OWNER(borman)
+
+SRCS(
+ consumer.cpp
+ event.cpp
+ exceptions.cpp
+ input.cpp
+ output.cpp
+ read_ops.cpp
+ reader.cpp
+ scalar.cpp
+ writer.cpp
+)
+
+GENERATE_ENUM_SERIALIZATION(event.h)
+
+GENERATE_ENUM_SERIALIZATION(scalar.h)
+
+END()
diff --git a/library/cpp/yson_pull/yson.h b/library/cpp/yson_pull/yson.h
new file mode 100644
index 0000000000..a77eaa5c94
--- /dev/null
+++ b/library/cpp/yson_pull/yson.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "bridge.h"
+#include "consumer.h"
+#include "event.h"
+#include "exceptions.h"
+#include "input.h"
+#include "output.h"
+#include "position_info.h"
+#include "range.h"
+#include "reader.h"
+#include "scalar.h"
+#include "stream_type.h"
+#include "writer.h"