diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yson_pull | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yson_pull')
55 files changed, 5557 insertions, 0 deletions
diff --git a/library/cpp/yson_pull/bridge.h b/library/cpp/yson_pull/bridge.h new file mode 100644 index 0000000000..ac767dcba0 --- /dev/null +++ b/library/cpp/yson_pull/bridge.h @@ -0,0 +1,34 @@ +#pragma once + +#include "consumer.h" +#include "event.h" +#include "writer.h" + +namespace NYsonPull { + //! \brief Connect YSON stream producer and consumer. + //! + //! Useful for writing YSON stream filters. + //! \p Producer must have a \p next_event() method (like \p NYsonPull::reader). + //! \p Consumer must be like \p NYsonPull::consumer interface. + template <typename Producer, typename Consumer> + inline void Bridge(Producer&& producer, Consumer&& consumer) { + for (;;) { + auto& event = producer.NextEvent(); + consumer.OnEvent(event); + if (event.Type() == EEventType::EndStream) { + break; + } + } + } + + template <typename Producer> + inline void Bridge(Producer&& producer, TWriter& writer_) { + Bridge(std::forward<Producer>(producer), writer_.GetConsumer()); + } + + template <typename Producer> + inline void Bridge(Producer&& producer, TWriter&& writer_) { + Bridge(std::forward<Producer>(producer), writer_.GetConsumer()); + } + +} diff --git a/library/cpp/yson_pull/buffer.h b/library/cpp/yson_pull/buffer.h new file mode 100644 index 0000000000..04c9220ef3 --- /dev/null +++ b/library/cpp/yson_pull/buffer.h @@ -0,0 +1,79 @@ +#pragma once + +#include <util/system/types.h> +#include <util/system/yassert.h> + +#include <cstddef> + +namespace NYsonPull { + //! \brief A non-owning buffer model. + //! + //! Represents a \p pos pointer moving between \p begin and \p end. + template <typename T> + class buffer { + T* begin_ = nullptr; + T* pos_ = nullptr; + T* end_ = nullptr; + + public: + T* begin() const noexcept { + return begin_; + } + T* pos() const noexcept { + return pos_; + } + T* end() const noexcept { + return end_; + } + + //! \brief Amount of data after current position. + size_t available() const noexcept { + return end_ - pos_; + } + + //! \brief Amount of data before current position. + size_t used() const noexcept { + return pos_ - begin_; + } + + //! \brief Move current position \p nbytes forward. + void advance(size_t nbytes) noexcept { + Y_ASSERT(pos_ + nbytes <= end_); + pos_ += nbytes; + } + + //! \brief Reset buffer pointers. + void reset(T* new_begin, T* new_end, T* new_pos) { + begin_ = new_begin; + pos_ = new_pos; + end_ = new_end; + } + + //! \brief Reset buffer to beginning + void reset(T* new_begin, T* new_end) { + reset(new_begin, new_end, new_begin); + } + }; + + class output_buffer: public buffer<ui8> { + public: + //! \brief An output buffer is empty when there is no data written to it. + bool is_empty() const noexcept { + return pos() == begin(); + } + + //! \brief An output buffer is full when there is no space to write more data to it. + bool is_full() const noexcept { + return pos() == end(); + } + }; + + class input_buffer: public buffer<const ui8> { + public: + //! An input stream is empty when there is no data to read in it. + bool is_empty() const noexcept { + return pos() == end(); + } + }; + +} diff --git a/library/cpp/yson_pull/consumer.cpp b/library/cpp/yson_pull/consumer.cpp new file mode 100644 index 0000000000..c238e0a6fb --- /dev/null +++ b/library/cpp/yson_pull/consumer.cpp @@ -0,0 +1,83 @@ +#include "consumer.h" + +#include <library/cpp/yson_pull/detail/macros.h> + +using namespace NYsonPull; + +void IConsumer::OnScalar(const TScalar& value) { + switch (value.Type()) { + case EScalarType::Entity: + OnEntity(); + break; + + case EScalarType::Boolean: + OnScalarBoolean(value.AsBoolean()); + break; + + case EScalarType::Int64: + OnScalarInt64(value.AsInt64()); + break; + + case EScalarType::UInt64: + OnScalarUInt64(value.AsUInt64()); + break; + + case EScalarType::Float64: + OnScalarFloat64(value.AsFloat64()); + break; + + case EScalarType::String: + OnScalarString(value.AsString()); + break; + + default: + Y_UNREACHABLE(); + } +} + +void IConsumer::OnEvent(const TEvent& value) { + switch (value.Type()) { + case EEventType::BeginStream: + OnBeginStream(); + break; + + case EEventType::EndStream: + OnEndStream(); + break; + + case EEventType::BeginList: + OnBeginList(); + break; + + case EEventType::EndList: + OnEndList(); + break; + + case EEventType::BeginMap: + OnBeginMap(); + break; + + case EEventType::Key: + OnKey(value.AsString()); + break; + + case EEventType::EndMap: + OnEndMap(); + break; + + case EEventType::BeginAttributes: + OnBeginAttributes(); + break; + + case EEventType::EndAttributes: + OnEndAttributes(); + break; + + case EEventType::Scalar: + OnScalar(value.AsScalar()); + break; + + default: + Y_UNREACHABLE(); + } +} diff --git a/library/cpp/yson_pull/consumer.h b/library/cpp/yson_pull/consumer.h new file mode 100644 index 0000000000..f3b1398d4c --- /dev/null +++ b/library/cpp/yson_pull/consumer.h @@ -0,0 +1,37 @@ +#pragma once + +#include "event.h" + +#include <util/generic/strbuf.h> +#include <util/system/types.h> + +namespace NYsonPull { + class IConsumer { + public: + virtual ~IConsumer() = default; + + virtual void OnBeginStream() = 0; + virtual void OnEndStream() = 0; + + virtual void OnBeginList() = 0; + virtual void OnEndList() = 0; + + virtual void OnBeginMap() = 0; + virtual void OnEndMap() = 0; + + virtual void OnBeginAttributes() = 0; + virtual void OnEndAttributes() = 0; + + virtual void OnKey(TStringBuf name) = 0; + + virtual void OnEntity() = 0; + virtual void OnScalarBoolean(bool value) = 0; + virtual void OnScalarInt64(i64 value) = 0; + virtual void OnScalarUInt64(ui64 value) = 0; + virtual void OnScalarFloat64(double value) = 0; + virtual void OnScalarString(TStringBuf value) = 0; + + virtual void OnScalar(const TScalar& value); + virtual void OnEvent(const TEvent& value); + }; +} diff --git a/library/cpp/yson_pull/cyson_enums.h b/library/cpp/yson_pull/cyson_enums.h new file mode 100644 index 0000000000..315de97307 --- /dev/null +++ b/library/cpp/yson_pull/cyson_enums.h @@ -0,0 +1,47 @@ +#pragma once + +typedef enum yson_event_type { + YSON_EVENT_BEGIN_STREAM = 0, + YSON_EVENT_END_STREAM = 1, + YSON_EVENT_BEGIN_LIST = 2, + YSON_EVENT_END_LIST = 3, + YSON_EVENT_BEGIN_MAP = 4, + YSON_EVENT_END_MAP = 5, + YSON_EVENT_BEGIN_ATTRIBUTES = 6, + YSON_EVENT_END_ATTRIBUTES = 7, + YSON_EVENT_KEY = 8, + YSON_EVENT_SCALAR = 9, + YSON_EVENT_ERROR = 10 +} yson_event_type; + +typedef enum yson_scalar_type { + YSON_SCALAR_ENTITY = 0, + YSON_SCALAR_BOOLEAN = 1, + YSON_SCALAR_INT64 = 2, + YSON_SCALAR_UINT64 = 3, + YSON_SCALAR_FLOAT64 = 4, + YSON_SCALAR_STRING = 5 +} yson_scalar_type; + +typedef enum yson_input_stream_result { + YSON_INPUT_STREAM_RESULT_OK = 0, + YSON_INPUT_STREAM_RESULT_EOF = 1, + YSON_INPUT_STREAM_RESULT_ERROR = 2 +} yson_input_stream_result; + +typedef enum yson_output_stream_result { + YSON_OUTPUT_STREAM_RESULT_OK = 0, + YSON_OUTPUT_STREAM_RESULT_ERROR = 1 +} yson_output_stream_result; + +typedef enum yson_writer_result { + YSON_WRITER_RESULT_OK = 0, + YSON_WRITER_RESULT_BAD_STREAM = 1, + YSON_WRITER_RESULT_ERROR = 2 +} yson_writer_result; + +typedef enum yson_stream_type { + YSON_STREAM_TYPE_NODE = 0, + YSON_STREAM_TYPE_LIST_FRAGMENT = 1, + YSON_STREAM_TYPE_MAP_FRAGMENT = 2 +} yson_stream_type; diff --git a/library/cpp/yson_pull/detail/byte_reader.h b/library/cpp/yson_pull/detail/byte_reader.h new file mode 100644 index 0000000000..7cea50d323 --- /dev/null +++ b/library/cpp/yson_pull/detail/byte_reader.h @@ -0,0 +1,74 @@ +#pragma once + +#include "cescape.h" +#include "fail.h" +#include "stream_counter.h" + +#include <library/cpp/yson_pull/input.h> + +namespace NYsonPull { + namespace NDetail { + template <class StreamCounter> + class byte_reader { + NYsonPull::NInput::IStream& stream_; + StreamCounter stream_counter_; + + public: + byte_reader(NYsonPull::NInput::IStream& stream) + : stream_(stream) + { + } + + // const-ness added to prevent direct stream mutation + const NYsonPull::NInput::IStream& stream() { + return stream_; + } + + template <typename... Args> + ATTRIBUTE(noinline, cold) + void fail[[noreturn]](const char* msg, Args&&... args) { + NYsonPull::NDetail::fail( + stream_counter_.info(), + msg, + std::forward<Args>(args)...); + } + + template <bool AllowFinish> + void fill_buffer() { + stream_.fill_buffer(); + + if (!AllowFinish) { + auto& buf = stream_.buffer(); + if (Y_UNLIKELY(buf.is_empty() && stream_.at_end())) { + fail("Premature end of stream"); + } + } + } + + void fill_buffer() { + return fill_buffer<true>(); + } + + template <bool AllowFinish> + ui8 get_byte() { + fill_buffer<AllowFinish>(); + auto& buf = stream_.buffer(); + return !buf.is_empty() + ? *buf.pos() + : ui8{'\0'}; + } + + ui8 get_byte() { + return get_byte<true>(); + } + + void advance(size_t bytes) { + auto& buf = stream_.buffer(); + stream_counter_.update( + buf.pos(), + buf.pos() + bytes); + buf.advance(bytes); + } + }; + } +} diff --git a/library/cpp/yson_pull/detail/byte_writer.h b/library/cpp/yson_pull/detail/byte_writer.h new file mode 100644 index 0000000000..dc1d4b4b96 --- /dev/null +++ b/library/cpp/yson_pull/detail/byte_writer.h @@ -0,0 +1,77 @@ +#pragma once + +#include "macros.h" + +#include <library/cpp/yson_pull/output.h> + +#include <util/system/types.h> + +#include <cstddef> +#include <cstring> + +namespace NYsonPull { + namespace NDetail { + template <class StreamCounter> + class byte_writer { + NYsonPull::NOutput::IStream& stream_; + StreamCounter stream_counter_; + + public: + byte_writer(NYsonPull::NOutput::IStream& stream) + : stream_(stream) + { + } + + // const-ness added to prevent direct stream mutation + const NYsonPull::NOutput::IStream& stream() { + return stream_; + } + const StreamCounter& counter() { + return stream_counter_; + } + + void flush_buffer() { + stream_.flush_buffer(); + } + + void advance(size_t bytes) { + auto& buf = stream_.buffer(); + stream_counter_.update( + buf.pos(), + buf.pos() + bytes); + buf.advance(bytes); + } + + void write(ui8 c) { + auto& buf = stream_.buffer(); + if (Y_LIKELY(!buf.is_full())) { + *buf.pos() = c; + advance(1); + } else { + auto ptr = reinterpret_cast<char*>(&c); + stream_counter_.update(&c, &c + 1); + stream_.flush_buffer({ptr, 1}); + } + } + + void write(const ui8* data, size_t size) { + auto& buf = stream_.buffer(); + auto free_buf = buf.available(); + if (Y_LIKELY(size < free_buf)) { + ::memcpy(buf.pos(), data, size); + advance(size); + } else { + if (!buf.is_full()) { + ::memcpy(buf.pos(), data, free_buf); + advance(free_buf); + data += free_buf; + size -= free_buf; + } + stream_counter_.update(data, data + size); + stream_.flush_buffer({reinterpret_cast<const char*>(data), + size}); + } + } + }; + } +} diff --git a/library/cpp/yson_pull/detail/cescape.h b/library/cpp/yson_pull/detail/cescape.h new file mode 100644 index 0000000000..1ea150e69a --- /dev/null +++ b/library/cpp/yson_pull/detail/cescape.h @@ -0,0 +1,143 @@ +#pragma once + +#include "byte_writer.h" +#include "cescape_decode.h" +#include "cescape_encode.h" +#include "macros.h" + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> + +/* REFERENCES FOR ESCAPE SEQUENCE INTERPRETATION: + * C99 p. 6.4.3 Universal character names. + * C99 p. 6.4.4.4 Character constants. + * + * <simple-escape-sequence> ::= { + * \' , \" , \? , \\ , + * \a , \b , \f , \n , \r , \t , \v + * } + * + * <octal-escape-sequence> ::= \ <octal-digit> {1, 3} + * <hexadecimal-escape-sequence> ::= \x <hexadecimal-digit> + + * <universal-character-name> ::= \u <hexadecimal-digit> {4} + * || \U <hexadecimal-digit> {8} + * + * NOTE (6.4.4.4.7): + * Each octal or hexadecimal escape sequence is the longest sequence of characters that can + * constitute the escape sequence. + * + * THEREFORE: + * - Octal escape sequence spans until rightmost non-octal-digit character. + * - Octal escape sequence always terminates after three octal digits. + * - Hexadecimal escape sequence spans until rightmost non-hexadecimal-digit character. + * - Universal character name consists of exactly 4 or 8 hexadecimal digit. + * + */ + +namespace NYsonPull { + namespace NDetail { + namespace NCEscape { + inline void encode(TString& dest, TStringBuf data) { + NImpl::escape_impl( + reinterpret_cast<const ui8*>(data.data()), + data.size(), + [&](const ui8* str, size_t size) { + dest.append( + reinterpret_cast<const char*>(str), + size); + }); + } + + // dest must have at least 4*data.size() bytes available + inline size_t encode(ui8* dest, TStringBuf data) { + auto* dest_begin = dest; + NImpl::escape_impl( + reinterpret_cast<const ui8*>(data.data()), + data.size(), + [&](const ui8* str, size_t size) { + ::memcpy(dest, str, size); + dest += size; + }); + return dest - dest_begin; + } + + template <typename U> + void encode(byte_writer<U>& dest, TStringBuf data) { + auto& buffer = dest.stream().buffer(); + if (Y_LIKELY(buffer.available() >= data.size() * 4)) { + auto size = encode(buffer.pos(), data); + dest.advance(size); + } else { + NImpl::escape_impl( + reinterpret_cast<const ui8*>(data.data()), + data.size(), + [&](const ui8* str, size_t size) { + dest.write(str, size); + }); + } + } + + inline TString encode(TStringBuf data) { + TString result; + result.reserve(data.size()); + encode(result, data); + return result; + } + + inline void decode(TString& dest, TStringBuf data) { + NImpl::unescape_impl( + reinterpret_cast<const ui8*>(data.begin()), + reinterpret_cast<const ui8*>(data.end()), + [&](ui8 c) { + dest += c; + }, + [&](const ui8* p, size_t len) { + dest.append(reinterpret_cast<const char*>(p), len); + }); + } + + inline void decode_inplace(TVector<ui8>& data) { + auto* out = static_cast<ui8*>( + ::memchr(data.data(), '\\', data.size())); + if (out == nullptr) { + return; + } + NImpl::unescape_impl( + out, + data.data() + data.size(), + [&](ui8 c) { + *out++ = c; + }, + [&](const ui8* p, size_t len) { + ::memmove(out, p, len); + out += len; + }); + data.resize(out - &data[0]); + } + + inline TString decode(TStringBuf data) { + TString result; + result.reserve(data.size()); + decode(result, data); + return result; + } + + ATTRIBUTE(noinline, cold) + inline TString quote(TStringBuf str) { + TString result; + result.reserve(str.size() + 16); + result += '"'; + encode(result, str); + result += '"'; + return result; + } + + ATTRIBUTE(noinline, cold) + inline TString quote(ui8 ch) { + char c = ch; + return quote(TStringBuf(&c, 1)); + } + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/cescape_decode.h b/library/cpp/yson_pull/detail/cescape_decode.h new file mode 100644 index 0000000000..2ee5dd9500 --- /dev/null +++ b/library/cpp/yson_pull/detail/cescape_decode.h @@ -0,0 +1,154 @@ +#pragma once + +#include <util/system/types.h> + +#include <algorithm> +#include <cstring> + +namespace NYsonPull { + namespace NDetail { + namespace NCEscape { + namespace NImpl { + inline ui8 as_digit(ui8 c) { + return c - ui8{'0'}; + } + + inline ui8 as_hexdigit(ui8 c) { + static constexpr ui8 hex_decode_map[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, + 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255}; + + return hex_decode_map[c]; + } + + inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) { + auto digit = ui8{0}; + while (n-- && (digit = as_digit(*p)) < 8) { + result = result * 8 + digit; + ++p; + } + return p; + } + + inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) { + auto digit = ui8{0}; + while (n-- && (digit = as_hexdigit(*p)) < 16) { + result = result * 16 + digit; + ++p; + } + return p; + } + + inline const ui8* unescape_char_and_advance( + ui8& result, + const ui8* p, + const ui8* end) { + switch (*p) { + default: + result = *p; + ++p; + break; + case 'b': + result = '\b'; + ++p; + break; + case 'f': + result = '\f'; + ++p; + break; + case 'n': + result = '\n'; + ++p; + break; + case 'r': + result = '\r'; + ++p; + break; + case 't': + result = '\t'; + ++p; + break; + + case 'x': { + ++p; + result = 0; + auto* next = read_hex( + result, + p, std::min<ptrdiff_t>(2, end - p)); + if (next > p) { + p = next; + } else { + result = 'x'; + } + } break; + + case '0': + case '1': + case '2': + case '3': + result = 0; + p = read_oct( + result, + p, std::min<ptrdiff_t>(3, end - p)); + break; + + case '4': + case '5': + case '6': + case '7': + result = 0; + p = read_oct( + result, + p, std::min<ptrdiff_t>(2, end - p)); + break; + } + return p; + } + + template <typename T, typename U> + inline void unescape_impl( + const ui8* p, + const ui8* end, + T&& consume_one, + U&& consume_span) { + while (p < end) { + auto* escaped = static_cast<const ui8*>( + ::memchr(p, '\\', end - p)); + if (escaped == nullptr) { + consume_span(p, end - p); + return; + } else { + consume_span(p, escaped - p); + auto c = ui8{'\\'}; + p = escaped + 1; + if (p < end) { + p = unescape_char_and_advance(c, p, end); + } + consume_one(c); + } + } + } + } + } // namespace NCEscape + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/cescape_encode.h b/library/cpp/yson_pull/detail/cescape_encode.h new file mode 100644 index 0000000000..bf5765f1d9 --- /dev/null +++ b/library/cpp/yson_pull/detail/cescape_encode.h @@ -0,0 +1,114 @@ +#pragma once + +#include <util/system/types.h> + +// Whether to ensure strict ASCII compatibility +// Turns UTF-8 strings into unreadable garbage for no known reason +//#define CESCAPE_STRICT_ASCII + +namespace NYsonPull { + namespace NDetail { + namespace NCEscape { + namespace NImpl { + inline ui8 hex_digit(ui8 value) { + constexpr ui8 hex_digits[] = "0123456789ABCDEF"; + return hex_digits[value]; + } + + inline ui8 oct_digit(ui8 value) { + return '0' + value; + } + + inline bool is_printable(ui8 c) { +#ifdef CESCAPE_STRICT_ASCII + return c >= 32 && c <= 126; +#else + return c >= 32; +#endif + } + + inline bool is_hex_digit(ui8 c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } + + inline bool is_oct_digit(ui8 c) { + return c >= '0' && c <= '7'; + } + + constexpr size_t ESCAPE_C_BUFFER_SIZE = 4; + + inline size_t escape_char( + ui8 c, + ui8 next, + ui8 r[ESCAPE_C_BUFFER_SIZE]) { + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (is_printable(c)) { + r[0] = c; + return 1; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (c < 8 && !is_oct_digit(next)) { + r[0] = '\\'; + r[1] = oct_digit(c); + return 2; + } else if (!is_hex_digit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = hex_digit((c & 0xF0) >> 4); + r[3] = hex_digit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = oct_digit((c & 0700) >> 6); + r[2] = oct_digit((c & 0070) >> 3); + r[3] = oct_digit((c & 0007) >> 0); + return 4; + } + } + + template <typename T> + inline void escape_impl(const ui8* str, size_t len, T&& consume) { + ui8 buffer[ESCAPE_C_BUFFER_SIZE]; + + size_t i, j; + for (i = 0, j = 0; i < len; ++i) { + auto next_char = i + 1 < len ? str[i + 1] : 0; + size_t rlen = escape_char(str[i], next_char, buffer); + + if (rlen > 1) { + consume(str + j, i - j); + j = i + 1; + consume(buffer, rlen); + } + } + + if (j > 0) { + consume(str + j, len - j); + } else { + consume(str, len); + } + } + } + } // namespace NCEscape + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/fail.h b/library/cpp/yson_pull/detail/fail.h new file mode 100644 index 0000000000..6937612d0b --- /dev/null +++ b/library/cpp/yson_pull/detail/fail.h @@ -0,0 +1,20 @@ +#pragma once + +#include "format_string.h" +#include "macros.h" + +#include <library/cpp/yson_pull/exceptions.h> +#include <library/cpp/yson_pull/position_info.h> + +namespace NYsonPull { + namespace NDetail { + template <typename... Args> + ATTRIBUTE(noreturn, noinline, cold) + void fail( + const TPositionInfo& info, + Args&&... args) { + auto formatted_message = format_string(std::forward<Args>(args)...); + throw NException::TBadInput(formatted_message, info); + } + } +} diff --git a/library/cpp/yson_pull/detail/format_string.h b/library/cpp/yson_pull/detail/format_string.h new file mode 100644 index 0000000000..683fd1bf36 --- /dev/null +++ b/library/cpp/yson_pull/detail/format_string.h @@ -0,0 +1,26 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/string/builder.h> + +namespace NYsonPull { + namespace NDetail { + namespace NImpl { + inline void apply_args(TStringBuilder&) { + } + + template <typename T, typename... Args> + inline void apply_args(TStringBuilder& builder, T&& arg, Args&&... args) { + apply_args(builder << arg, std::forward<Args>(args)...); + } + } + + template <typename... Args> + TString format_string(Args&&... args) { + TStringBuilder builder; + NImpl::apply_args(builder, std::forward<Args>(args)...); + return TString(std::move(builder)); + } + } +} diff --git a/library/cpp/yson_pull/detail/input/buffered.h b/library/cpp/yson_pull/detail/input/buffered.h new file mode 100644 index 0000000000..9b1482577f --- /dev/null +++ b/library/cpp/yson_pull/detail/input/buffered.h @@ -0,0 +1,35 @@ +#pragma once + +#include <library/cpp/yson_pull/detail/macros.h> + +#include <library/cpp/yson_pull/exceptions.h> +#include <library/cpp/yson_pull/input.h> + +#include <cstdio> +#include <memory> + +namespace NYsonPull { + namespace NDetail { + namespace NInput { + class TBuffered: public NYsonPull::NInput::IStream { + TArrayHolder<ui8> buffer_; + size_t size_; + + public: + explicit TBuffered(size_t buffer_size) + : buffer_{new ui8[buffer_size]} + , size_{buffer_size} { + } + + protected: + ui8* buffer_data() const { + return buffer_.Get(); + } + + size_t buffer_size() const { + return size_; + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/input/stdio_file.h b/library/cpp/yson_pull/detail/input/stdio_file.h new file mode 100644 index 0000000000..c412b7e59b --- /dev/null +++ b/library/cpp/yson_pull/detail/input/stdio_file.h @@ -0,0 +1,42 @@ +#pragma once + +#include "buffered.h" + +#include <library/cpp/yson_pull/detail/macros.h> + +#include <library/cpp/yson_pull/exceptions.h> +#include <library/cpp/yson_pull/input.h> + +#include <cstdio> +#include <memory> + +namespace NYsonPull { + namespace NDetail { + namespace NInput { + class TStdioFile: public TBuffered { + FILE* file_; + + public: + TStdioFile(FILE* file, size_t buffer_size) + : TBuffered(buffer_size) + , file_{file} { + } + + protected: + result do_fill_buffer() override { + auto nread = ::fread(buffer_data(), 1, buffer_size(), file_); + if (Y_UNLIKELY(nread == 0)) { + if (ferror(file_)) { + throw NException::TSystemError(); + } + if (feof(file_)) { + return result::at_end; + } + } + buffer().reset(buffer_data(), buffer_data() + nread); + return result::have_more_data; + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/input/stream.h b/library/cpp/yson_pull/detail/input/stream.h new file mode 100644 index 0000000000..791cd5a3f5 --- /dev/null +++ b/library/cpp/yson_pull/detail/input/stream.h @@ -0,0 +1,69 @@ +#pragma once + +#include <library/cpp/yson_pull/detail/macros.h> + +#include <library/cpp/yson_pull/input.h> + +#include <util/stream/buffered.h> +#include <util/stream/file.h> +#include <util/stream/zerocopy.h> +#include <util/system/file.h> + +namespace NYsonPull { + namespace NDetail { + namespace NInput { + class TStreamBase: public NYsonPull::NInput::IStream { + protected: + result DoFillBufferFrom(IZeroCopyInput& input) { + void* ptr = nullptr; + size_t size = input.Next(&ptr); + if (Y_UNLIKELY(size == 0)) { + return result::at_end; + } + buffer().reset(static_cast<ui8*>(ptr), static_cast<ui8*>(ptr) + size); + return result::have_more_data; + } + }; + + class TZeroCopy: public TStreamBase { + IZeroCopyInput* Input; + + public: + explicit TZeroCopy(IZeroCopyInput* input) + : Input(input) + { + } + + protected: + result do_fill_buffer() override { + return DoFillBufferFrom(*Input); + } + }; + + template <typename TBuffered> + class TOwned: public TStreamBase { + TBuffered Input; + + public: + template <typename... Args> + explicit TOwned(Args&&... args) + : Input(std::forward<Args>(args)...) + { + } + + protected: + result do_fill_buffer() override { + return DoFillBufferFrom(Input); + } + }; + + class TFHandle: public TOwned<TFileInput> { + public: + TFHandle(int fd, size_t buffer_size) + : TOwned<TFileInput>(Duplicate(fd), buffer_size) + { + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/lexer_base.h b/library/cpp/yson_pull/detail/lexer_base.h new file mode 100644 index 0000000000..572bdb3d18 --- /dev/null +++ b/library/cpp/yson_pull/detail/lexer_base.h @@ -0,0 +1,343 @@ +#pragma once + +#include "byte_reader.h" +#include "cescape.h" +#include "macros.h" +#include "number.h" +#include "percent_scalar.h" +#include "stream_counter.h" +#include "varint.h" + +#include <util/generic/maybe.h> +#include <util/generic/vector.h> +#include <util/string/cast.h> + +namespace NYsonPull { + namespace NDetail { + template <bool EnableLinePositionInfo> + class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> { + using Base = byte_reader< + stream_counter<EnableLinePositionInfo>>; + + TVector<ui8> token_buffer_; + TMaybe<size_t> memory_limit_; + + public: + lexer_base( + NYsonPull::NInput::IStream& buffer, + TMaybe<size_t> memory_limit) + : Base(buffer) + , memory_limit_{memory_limit} { + } + + ATTRIBUTE(noinline, hot) + ui8 skip_space_and_get_byte() { + auto& buf = Base::stream().buffer(); + if (Y_LIKELY(!buf.is_empty())) { + auto ch = *buf.pos(); + if (Y_LIKELY(!is_space(ch))) { + return ch; + } + } + return skip_space_and_get_byte_fallback(); + } + + ATTRIBUTE(hot) + ui8 get_byte() { + auto& buf = Base::stream().buffer(); + if (Y_LIKELY(!buf.is_empty())) { + return *buf.pos(); + } + return Base::get_byte(); + } + + number read_numeric() { + token_buffer_.clear(); + auto type = number_type::int64; + while (true) { + auto ch = this->Base::template get_byte<true>(); + if (isdigit(ch) || ch == '+' || ch == '-') { + token_buffer_.push_back(ch); + } else if (ch == '.' || ch == 'e' || ch == 'E') { + token_buffer_.push_back(ch); + type = number_type::float64; + } else if (ch == 'u') { + token_buffer_.push_back(ch); + type = number_type::uint64; + } else if (Y_UNLIKELY(isalpha(ch))) { + COLD_BLOCK_BYVALUE + Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal"); + COLD_BLOCK_END + } else { + break; + } + check_memory_limit(); + Base::advance(1); + } + + auto str = token_buffer(); + try { + switch (type) { + case number_type::float64: + return FromString<double>(str); + case number_type::int64: + return FromString<i64>(str); + case number_type::uint64: + str.Chop(1); // 'u' suffix + return FromString<ui64>(str); + } + Y_UNREACHABLE(); + } catch (const std::exception& err) { + Base::fail(err.what()); + } + } + + TStringBuf read_quoted_string() { + auto count_trailing_slashes = [](ui8* begin, ui8* end) { + auto count = size_t{0}; + if (begin < end) { + for (auto p = end - 1; p >= begin && *p == '\\'; --p) { + ++count; + } + } + return count; + }; + + token_buffer_.clear(); + auto& buf = Base::stream().buffer(); + while (true) { + this->Base::template fill_buffer<false>(); + auto* quote = reinterpret_cast<const ui8*>( + ::memchr(buf.pos(), '"', buf.available())); + if (quote == nullptr) { + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + buf.end()); + Base::advance(buf.available()); + continue; + } + + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + quote); + Base::advance(quote - buf.pos() + 1); // +1 for the quote itself + + // We must count the number of '\' at the end of StringValue + // to check if it's not \" + int slash_count = count_trailing_slashes( + token_buffer_.data(), + token_buffer_.data() + token_buffer_.size()); + if (slash_count % 2 == 0) { + break; + } else { + token_buffer_.push_back('"'); + } + check_memory_limit(); + } + + NCEscape::decode_inplace(token_buffer_); + return token_buffer(); + } + + TStringBuf read_unquoted_string() { + token_buffer_.clear(); + while (true) { + auto ch = this->Base::template get_byte<true>(); + if (isalpha(ch) || isdigit(ch) || + ch == '_' || ch == '-' || ch == '%' || ch == '.') { + token_buffer_.push_back(ch); + } else { + break; + } + check_memory_limit(); + Base::advance(1); + } + return token_buffer(); + } + + ATTRIBUTE(noinline, hot) + TStringBuf read_binary_string() { + auto slength = NVarInt::read<i32>(*this); + if (Y_UNLIKELY(slength < 0)) { + COLD_BLOCK_BYVALUE + Base::fail("Negative binary string literal length ", slength); + COLD_BLOCK_END + } + auto length = static_cast<ui32>(slength); + + auto& buf = Base::stream().buffer(); + if (Y_LIKELY(buf.available() >= length)) { + auto result = TStringBuf{ + reinterpret_cast<const char*>(buf.pos()), + length}; + Base::advance(length); + return result; + } else { // reading in Buffer + return read_binary_string_fallback(length); + } + } + + ATTRIBUTE(noinline) + TStringBuf read_binary_string_fallback(size_t length) { + auto& buf = Base::stream().buffer(); + auto needToRead = length; + token_buffer_.clear(); + while (needToRead) { + this->Base::template fill_buffer<false>(); + auto chunk_size = std::min(needToRead, buf.available()); + + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + buf.pos() + chunk_size); + check_memory_limit(); + needToRead -= chunk_size; + Base::advance(chunk_size); + } + return token_buffer(); + } + + percent_scalar read_percent_scalar() { + auto throw_incorrect_percent_scalar = [&]() { + Base::fail("Incorrect %-literal prefix ", NCEscape::quote(token_buffer())); + }; + + auto assert_literal = [&](TStringBuf literal) -> void { + for (size_t i = 2; i < literal.size(); ++i) { + token_buffer_.push_back(this->Base::template get_byte<false>()); + Base::advance(1); + if (Y_UNLIKELY(token_buffer_.back() != literal[i])) { + throw_incorrect_percent_scalar(); + } + } + }; + + token_buffer_.clear(); + token_buffer_.push_back(this->Base::template get_byte<false>()); + Base::advance(1); + + switch (token_buffer_[0]) { + case 't': + assert_literal(percent_scalar::true_literal); + return percent_scalar(true); + case 'f': + assert_literal(percent_scalar::false_literal); + return percent_scalar(false); + case 'n': + assert_literal(percent_scalar::nan_literal); + return percent_scalar(std::numeric_limits<double>::quiet_NaN()); + case 'i': + assert_literal(percent_scalar::positive_inf_literal); + return percent_scalar(std::numeric_limits<double>::infinity()); + case '-': + assert_literal(percent_scalar::negative_inf_literal); + return percent_scalar(-std::numeric_limits<double>::infinity()); + default: + throw_incorrect_percent_scalar(); + } + + Y_UNREACHABLE(); + } + + i64 read_binary_int64() { + return NVarInt::read<i64>(*this); + } + + ui64 read_binary_uint64() { + return NVarInt::read<ui64>(*this); + } + + double read_binary_double() { + union { + double as_double; + ui8 as_bytes[sizeof(double)]; + } data; + static_assert(sizeof(data) == sizeof(double), "bad union size"); + + auto needToRead = sizeof(double); + + auto& buf = Base::stream().buffer(); + while (needToRead != 0) { + Base::fill_buffer(); + + auto chunk_size = std::min(needToRead, buf.available()); + if (chunk_size == 0) { + Base::fail("Error parsing binary double literal"); + } + std::copy( + buf.pos(), + buf.pos() + chunk_size, + data.as_bytes + (sizeof(double) - needToRead)); + needToRead -= chunk_size; + Base::advance(chunk_size); + } + return data.as_double; + } + + private: + static bool is_space(ui8 ch) { + static const ui8 lookupTable[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return lookupTable[ch]; + } + + ATTRIBUTE(noinline, cold) + ui8 skip_space_and_get_byte_fallback() { + auto& buf = Base::stream().buffer(); + while (true) { + // FIXME + if (buf.is_empty()) { + if (Base::stream().at_end()) { + return '\0'; + } + Base::fill_buffer(); + } else { + if (!is_space(*buf.pos())) { + break; + } + Base::advance(1); + } + } + return Base::get_byte(); + } + + void check_memory_limit() { + if (Y_UNLIKELY(memory_limit_ && token_buffer_.capacity() > *memory_limit_)) { + COLD_BLOCK_BYVALUE + Base::fail( + "Memory limit exceeded while parsing YSON stream: " + "allocated ", + token_buffer_.capacity(), + ", limit ", *memory_limit_); + COLD_BLOCK_END + } + } + + TStringBuf token_buffer() const { + auto* begin = reinterpret_cast<const char*>(token_buffer_.data()); + return {begin, token_buffer_.size()}; + } + }; + } +} diff --git a/library/cpp/yson_pull/detail/macros.h b/library/cpp/yson_pull/detail/macros.h new file mode 100644 index 0000000000..7243f9cfe1 --- /dev/null +++ b/library/cpp/yson_pull/detail/macros.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/system/compiler.h> + +#if defined(__GNUC__) +#define ATTRIBUTE(args...) __attribute__((args)) +#else +#define ATTRIBUTE(...) +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#define COLD_BLOCK_BYVALUE [=]() ATTRIBUTE(noinline, cold) { +#define COLD_BLOCK_BYREF [&]() ATTRIBUTE(noinline, cold) { +#define COLD_BLOCK_END \ + } \ + (); +#else +// Clang does not support gnu-style attributes on lambda functions yet +#define COLD_BLOCK_BYVALUE [=]() { +#define COLD_BLOCK_BYREF [&]() { +#define COLD_BLOCK_END \ + } \ + (); +#endif diff --git a/library/cpp/yson_pull/detail/number.h b/library/cpp/yson_pull/detail/number.h new file mode 100644 index 0000000000..5595f55e05 --- /dev/null +++ b/library/cpp/yson_pull/detail/number.h @@ -0,0 +1,37 @@ +#pragma once + +#include <util/system/types.h> + +namespace NYsonPull { + namespace NDetail { + enum class number_type { + float64, + uint64, + int64 + }; + + struct number { + number_type type; + union { + double as_float64; + ui64 as_uint64; + i64 as_int64; + } value; + + number(double v) { + type = number_type::float64; + value.as_float64 = v; + } + + number(i64 v) { + type = number_type::int64; + value.as_int64 = v; + } + + number(ui64 v) { + type = number_type::uint64; + value.as_uint64 = v; + } + }; + } +} diff --git a/library/cpp/yson_pull/detail/output/buffered.h b/library/cpp/yson_pull/detail/output/buffered.h new file mode 100644 index 0000000000..475cf34785 --- /dev/null +++ b/library/cpp/yson_pull/detail/output/buffered.h @@ -0,0 +1,51 @@ +#pragma once + +#include <library/cpp/yson_pull/detail/macros.h> + +#include <library/cpp/yson_pull/output.h> + +#include <util/generic/strbuf.h> + +namespace NYsonPull { + namespace NDetail { + namespace NOutput { + template <typename T> + class TBuffered: public NYsonPull::NOutput::IStream { + TArrayHolder<ui8> buffer_; + size_t size_; + + public: + TBuffered(size_t buffer_size) + : buffer_{new ui8[buffer_size]} + , size_{buffer_size} { + reset_buffer(); + } + + protected: + void do_flush_buffer(TStringBuf extra) override { + auto& buf = buffer(); + if (!buf.is_empty()) { + do_write({reinterpret_cast<const char*>(buf.begin()), buf.used()}); + reset_buffer(); + } + if (extra.size() >= buf.available()) { + do_write(extra); + } else if (extra.size() > 0) { + ::memcpy(buf.pos(), extra.data(), extra.size()); + buf.advance(extra.size()); + } + } + + private: + void do_write(TStringBuf data) { + // CRTP dispatch + static_cast<T*>(this)->write(data); + } + + void reset_buffer() { + buffer().reset(buffer_.Get(), buffer_.Get() + size_); + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/output/stdio_file.h b/library/cpp/yson_pull/detail/output/stdio_file.h new file mode 100644 index 0000000000..03f2b40dc5 --- /dev/null +++ b/library/cpp/yson_pull/detail/output/stdio_file.h @@ -0,0 +1,33 @@ +#pragma once + +#include "buffered.h" + +#include <library/cpp/yson_pull/detail/macros.h> + +#include <library/cpp/yson_pull/exceptions.h> + +#include <cstdio> + +namespace NYsonPull { + namespace NDetail { + namespace NOutput { + class TStdioFile: public TBuffered<TStdioFile> { + FILE* file_; + + public: + TStdioFile(FILE* file, size_t buffer_size) + : TBuffered<TStdioFile>(buffer_size) + , file_(file) + { + } + + void write(TStringBuf data) { + auto nwritten = ::fwrite(data.data(), 1, data.size(), file_); + if (Y_UNLIKELY(static_cast<size_t>(nwritten) != data.size())) { + throw NException::TSystemError(); + } + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/output/stream.h b/library/cpp/yson_pull/detail/output/stream.h new file mode 100644 index 0000000000..d4810f3353 --- /dev/null +++ b/library/cpp/yson_pull/detail/output/stream.h @@ -0,0 +1,56 @@ +#pragma once + +#include "buffered.h" + +#include <library/cpp/yson_pull/detail/macros.h> +#include <library/cpp/yson_pull/exceptions.h> + +#include <util/stream/output.h> +#include <util/stream/file.h> +#include <util/system/file.h> + +namespace NYsonPull { + namespace NDetail { + namespace NOutput { + class TStream: public TBuffered<TStream> { + IOutputStream* Output; + + public: + TStream(IOutputStream* output, size_t buffer_size) + : TBuffered<TStream>(buffer_size) + , Output(output) + { + } + + void write(TStringBuf data) { + Output->Write(data); + } + }; + + template <typename TOutput> + class TOwned: public TBuffered<TOwned<TOutput>> { + TOutput Output; + + public: + template <typename... Args> + TOwned(size_t buffer_size, Args&&... args) + : TBuffered<TOwned>(buffer_size) + , Output(std::forward<Args>(args)...) + { + } + + void write(TStringBuf data) { + Output.Write(data); + } + }; + + class TFHandle: public TOwned<TUnbufferedFileOutput> { + public: + TFHandle(int fd, size_t buffer_size) + : TOwned<TUnbufferedFileOutput>(buffer_size, Duplicate(fd)) + { + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/percent_scalar.h b/library/cpp/yson_pull/detail/percent_scalar.h new file mode 100644 index 0000000000..ff4571842e --- /dev/null +++ b/library/cpp/yson_pull/detail/percent_scalar.h @@ -0,0 +1,36 @@ +#pragma once + +#include <util/generic/strbuf.h> + +namespace NYsonPull::NDetail { + enum class percent_scalar_type { + boolean, + float64 + }; + + struct percent_scalar { + //! Text boolean literals + static constexpr TStringBuf true_literal = "%true"; + static constexpr TStringBuf false_literal = "%false"; + //! Text floating-point literals + static constexpr TStringBuf nan_literal = "%nan"; + static constexpr TStringBuf positive_inf_literal = "%inf"; + static constexpr TStringBuf negative_inf_literal = "%-inf"; + + percent_scalar_type type; + union { + double as_float64; + bool as_boolean; + } value; + + percent_scalar(double v) { + type = percent_scalar_type::float64; + value.as_float64 = v; + } + + percent_scalar(bool v) { + type = percent_scalar_type::boolean; + value.as_boolean = v; + } + }; +} diff --git a/library/cpp/yson_pull/detail/reader.h b/library/cpp/yson_pull/detail/reader.h new file mode 100644 index 0000000000..0e02396358 --- /dev/null +++ b/library/cpp/yson_pull/detail/reader.h @@ -0,0 +1,677 @@ +#pragma once + +#include "lexer_base.h" +#include "symbols.h" + +#include <library/cpp/yson_pull/reader.h> + +#include <util/generic/maybe.h> +#include <util/generic/vector.h> + +namespace NYsonPull { + namespace NDetail { + /*! \internal */ + //////////////////////////////////////////////////////////////////////////////// + + enum class special_token : ui8 { + // Special values: + // YSON + semicolon = 0, // ; + equals = 1, // = + hash = 2, // # + left_bracket = 3, // [ + right_bracket = 4, // ] + left_brace = 5, // { + right_brace = 6, // } + left_angle = 7, // < + right_angle = 8, // > + }; + + // char_class tree representation: + // Root = xb + // BinaryStringOrOtherSpecialToken = x0b + // BinaryString = 00b + // OtherSpecialToken = 10b + // Other = x1b + // BinaryScalar = xx01b + // BinaryInt64 = 0001b + // BinaryDouble = 0101b + // BinaryFalse = 1001b + // BinaryTrue = 1101b + // Other = xxx11b + // Quote = 00011b + // DigitOrMinus = 00111b + // String = 01011b + // Space = 01111b + // Plus = 10011b + // None = 10111b + // Percent = 11011b + enum class char_class : ui8 { + binary_string = 0, // = 00b + + special_token_mask = 2, // = 10b + semicolon = 2 + (0 << 2), + equals = 2 + (1 << 2), + hash = 2 + (2 << 2), + left_bracket = 2 + (3 << 2), + right_bracket = 2 + (4 << 2), + left_brace = 2 + (5 << 2), + right_brace = 2 + (6 << 2), + left_angle = 2 + (7 << 2), + right_angle = 2 + (8 << 2), + + binary_scalar_mask = 1, + binary_int64 = 1 + (0 << 2), // = 001b + binary_double = 1 + (1 << 2), // = 101b + binary_false = 1 + (2 << 2), // = 1001b + binary_true = 1 + (3 << 2), // = 1101b + binary_uint64 = 1 + (4 << 2), // = 10001b + + other_mask = 3, + quote = 3 + (0 << 2), // = 00011b + number = 3 + (1 << 2), // = 00111b + string = 3 + (2 << 2), // = 01011b + percent = 3 + (6 << 2), // = 11011b + none = 3 + (5 << 2), // = 10111b + }; + +#define CHAR_SUBCLASS(x) (static_cast<ui8>(x) >> 2) + + inline char_class get_char_class(ui8 ch) { +#define NN char_class::none +#define BS char_class::binary_string +#define BI char_class::binary_int64 +#define BD char_class::binary_double +#define BF char_class::binary_false +#define BT char_class::binary_true +#define BU char_class::binary_uint64 +#define SP NN // char_class::space +#define NB char_class::number +#define ST char_class::string +#define QU char_class::quote +#define PC char_class::percent +#define TT(name) (static_cast<char_class>( \ + (static_cast<ui8>(special_token::name) << 2) | static_cast<ui8>(char_class::special_token_mask))) + + static constexpr char_class lookup[256] = + { + NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + // 32 + SP, // ' ' + NN, // '!' + QU, // '"' + TT(hash), // '#' + NN, // '$' + PC, // '%' + NN, // '&' + NN, // "'" + NN, // '(' + NN, // ')' + NN, // '*' + NB, // '+' + NN, // ',' + NB, // '-' + NN, // '.' + NN, // '/' + + // 48 + NB, NB, NB, NB, NB, NB, NB, NB, NB, NB, // '0' - '9' + NN, // ':' + TT(semicolon), // ';' + TT(left_angle), // '<' + TT(equals), // '=' + TT(right_angle), // '>' + NN, // '?' + + // 64 + NN, // '@' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' + TT(left_bracket), // '[' + NN, // '\' + TT(right_bracket), // ']' + NN, // '^' + ST, // '_' + + // 96 + NN, // '`' + + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' + TT(left_brace), // '{' + NN, // '|' + TT(right_brace), // '}' + NN, // '~' + NN, // '^?' non-printable + // 128 + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; + +#undef NN +#undef BS +#undef BI +#undef BD +#undef SP +#undef NB +#undef ST +#undef QU +#undef TT + return lookup[ch]; + } + + template <bool EnableLinePositionInfo> + class gen_reader_impl { + enum class state { + delimiter = 0, //! expecting ';' or closing-char ('>', ']', '}') + maybe_value = 1, //! expecting a value or closing-char + maybe_key = 2, //! expecting a key or closing-char + equals = 3, //! expecting '=' (followed by value) + value = 4, //! expecting a value + value_noattr = 5, //! expecting a value w/o attrs (after attrs) + + // by design, rare states have numbers starting from first_rare_state + first_rare_state = 6, + before_begin = first_rare_state, //! before started reading the stream + before_end = first_rare_state + 1, //! Expecting end of stream + after_end = first_rare_state + 2, //! after end of stream + }; + + lexer_base<EnableLinePositionInfo> lexer_; + state state_; + TEvent event_; + TVector<EEventType> stack_; + EStreamType mode_; + + public: + gen_reader_impl( + NYsonPull::NInput::IStream& buffer, + EStreamType mode, + TMaybe<size_t> memoryLimit = {}) + : lexer_(buffer, memoryLimit) + , state_{state::before_begin} + , mode_{mode} { + } + + const TEvent& last_event() const { + return event_; + } + + ATTRIBUTE(hot) + const TEvent& next_event() { + if (Y_LIKELY(state_ < state::first_rare_state)) { + // 'hot' handler for in-stream events + next_event_hot(); + } else { + // these events happen no more than once per stream + next_event_cold(); + } + return event_; + } + + private: + ATTRIBUTE(hot) + void next_event_hot() { + auto ch = lexer_.get_byte(); + auto cls = get_char_class(ch); + if (Y_UNLIKELY(cls == char_class::none)) { + ch = lexer_.skip_space_and_get_byte(); + if (Y_UNLIKELY(ch == NSymbol::eof)) { + handle_eof(); + return; + } + cls = get_char_class(ch); + } + + // states maybe_value/value/value_noattr are distinguished + // later in state_value_special + switch (state_) { + case state::maybe_value: + state_value(ch, cls); + break; + case state::maybe_key: + state_maybe_key(ch, cls); + break; + case state::equals: + state_equals(ch); + break; + case state::value: + state_value(ch, cls); + break; + case state::value_noattr: + state_value(ch, cls); + break; + case state::delimiter: + state_delimiter(ch, cls); + break; + default: + Y_UNREACHABLE(); + } + } + + ATTRIBUTE(noinline, cold) + void next_event_cold() { + switch (state_) { + case state::before_begin: + state_before_begin(); + break; + case state::after_end: + lexer_.fail("Attempted read past stream end"); + case state::before_end: + state_before_end(); + break; + default: + Y_UNREACHABLE(); + } + } + + //! Present a scalar value for caller + template <typename T> + void yield(T value) { + event_ = TEvent{TScalar{value}}; + } + + //! Present a scalar value with non-scalar tag (i.e. key) + template <typename T> + void yield(EEventType type, T value) { + event_ = TEvent{type, TScalar{value}}; + } + + //! Present a value from number variant + void yield(const number& value) { + switch (value.type) { + case number_type::int64: + yield(value.value.as_int64); + break; + case number_type::uint64: + yield(value.value.as_uint64); + break; + case number_type::float64: + yield(value.value.as_float64); + break; + } + } + + //! Present a value from %-literal variant + void yield(const percent_scalar& value) { + switch (value.type) { + case percent_scalar_type::boolean: + yield(value.value.as_boolean); + break; + case percent_scalar_type::float64: + yield(value.value.as_float64); + break; + } + } + + //! Present a value-less event + void yield(EEventType type) { + event_ = TEvent{type}; + } + + //! Push the opening of a paired event + void push(EEventType type) { + stack_.push_back(type); + } + + //! Close the paired_event, verify that delimiters are well-formed + void pop(EEventType first, EEventType last) { + if (Y_UNLIKELY(stack_.empty() || stack_.back() != first)) { + pop_fail(first, last); + return; + } + stack_.pop_back(); + + yield(last); + switch (first) { + case EEventType::BeginList: + next(state::delimiter); + break; + + case EEventType::BeginMap: + next(state::delimiter); + break; + + case EEventType::BeginAttributes: + next(state::value_noattr); + break; + + case EEventType::BeginStream: + next(state::after_end); + break; + + default: + Y_UNREACHABLE(); + } + + if (Y_UNLIKELY(mode_ == EStreamType::Node && stack_.size() == 1 && state_ == state::delimiter)) { + next(state::before_end); + } + } + + ATTRIBUTE(noinline, cold) + void pop_fail(EEventType first, EEventType last) { + if (stack_.empty()) { + lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but event stack is empty"); + } else { + lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but '", stack_.back(), "' is found."); + } + } + + //! Transition to new_state + void next(state new_state) { + state_ = new_state; + } + + bool in_map() { + return (stack_.back() == EEventType::BeginMap) || (stack_.back() == EEventType::BeginAttributes) || (stack_.back() == EEventType::BeginStream && mode_ == EStreamType::MapFragment); + } + + ATTRIBUTE(noinline, cold) + void handle_eof() { + switch (state_) { + case state::maybe_value: + case state::maybe_key: + case state::delimiter: + case state::before_end: + pop(EEventType::BeginStream, EEventType::EndStream); + return; + + default: + lexer_.fail("Unexpected end of stream"); + } + } + + ATTRIBUTE(noinline, cold) + void state_before_begin() { + push(EEventType::BeginStream); + yield(EEventType::BeginStream); + switch (mode_) { + case EStreamType::Node: + next(state::value); + break; + case EStreamType::ListFragment: + next(state::maybe_value); + break; + case EStreamType::MapFragment: + next(state::maybe_key); + break; + default: + Y_UNREACHABLE(); + } + } + + ATTRIBUTE(noinline, cold) + void state_before_end() { + auto ch = lexer_.skip_space_and_get_byte(); + if (ch == NSymbol::eof) { + handle_eof(); + } else { + lexer_.fail("Expected stream end, but found ", NCEscape::quote(ch)); + } + } + + ATTRIBUTE(hot) + void state_delimiter(ui8 ch, char_class cls) { + if (Y_LIKELY(ch == NSymbol::item_separator)) { + lexer_.advance(1); + next(in_map() ? state::maybe_key : state::maybe_value); + // immediately read next value + next_event_hot(); + return; + } + state_delimiter_fallback(ch, cls); + } + + ATTRIBUTE(noinline, hot) + void state_delimiter_fallback(ui8 ch, char_class cls) { + auto cls_bits = static_cast<ui8>(cls); + if ((cls_bits & 3) == static_cast<ui8>(char_class::special_token_mask)) { + auto token = static_cast<special_token>(cls_bits >> 2); + lexer_.advance(1); + switch (token) { + /* // handled in the fast track + case special_token::semicolon: + next(in_map()? state::maybe_key : state::maybe_value); + // immediately read next value + return next_event(); + */ + + case special_token::right_bracket: + pop(EEventType::BeginList, EEventType::EndList); + return; + + case special_token::right_brace: + pop(EEventType::BeginMap, EEventType::EndMap); + return; + + case special_token::right_angle: + pop(EEventType::BeginAttributes, EEventType::EndAttributes); + return; + + default: + break; + } + } + + COLD_BLOCK_BYVALUE + lexer_.fail( + "Unexpected ", NCEscape::quote(ch), ", expected one of ", + NCEscape::quote(NSymbol::item_separator), ", ", + NCEscape::quote(NSymbol::end_list), ", ", + NCEscape::quote(NSymbol::end_map), ", ", + NCEscape::quote(NSymbol::end_attributes)); + COLD_BLOCK_END + } + + ATTRIBUTE(noinline, hot) + void state_maybe_key(ui8 ch, char_class cls) { + auto key = TStringBuf{}; + // Keys are always strings, put binary-string key into fast lane + if (Y_LIKELY(ch == NSymbol::string_marker)) { + lexer_.advance(1); + key = lexer_.read_binary_string(); + } else { + switch (cls) { + case char_class::quote: + lexer_.advance(1); + key = lexer_.read_quoted_string(); + break; + + case char_class::string: + key = lexer_.read_unquoted_string(); + break; + + case char_class::right_brace: + lexer_.advance(1); + pop(EEventType::BeginMap, EEventType::EndMap); + return; + + case char_class::right_angle: + lexer_.advance(1); + pop(EEventType::BeginAttributes, EEventType::EndAttributes); + return; + + default: + COLD_BLOCK_BYVALUE + lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected key string"); + COLD_BLOCK_END + } + } + + yield(EEventType::Key, key); + next(state::equals); + } + + ATTRIBUTE(hot) + void state_equals(ui8 ch) { + // skip '=' + if (Y_UNLIKELY(ch != NSymbol::key_value_separator)) { + COLD_BLOCK_BYVALUE + lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected ", NCEscape::quote(NSymbol::key_value_separator)); + COLD_BLOCK_END + } + lexer_.advance(1); + next(state::value); + // immediately read the following value + // (this symbol yields no result) + next_event_hot(); + } + + ATTRIBUTE(noinline, hot) + void state_value(ui8 ch, char_class cls) { + auto cls_bits = static_cast<ui8>(cls); + if (cls_bits & 1) { // Other = x1b + if (cls_bits & (1 << 1)) { // Other = xxx11b + state_value_text_scalar(cls); + } else { // BinaryScalar = x01b + state_value_binary_scalar(cls); + } + next(state::delimiter); + } else { // BinaryStringOrOtherSpecialToken = x0b + lexer_.advance(1); + if (cls_bits & 1 << 1) { + // special token + auto token = static_cast<special_token>(cls_bits >> 2); + state_value_special(token, ch); + } else { + // binary string + yield(lexer_.read_binary_string()); + next(state::delimiter); + } + } + } + + ATTRIBUTE(noinline) + void state_value_special(special_token token, ui8 ch) { + // Value starters are always accepted values + switch (token) { + case special_token::hash: + yield(TScalar{}); + next(state::delimiter); + return; + + case special_token::left_bracket: + push(EEventType::BeginList); + yield(EEventType::BeginList); + next(state::maybe_value); + return; + + case special_token::left_brace: + push(EEventType::BeginMap); + yield(EEventType::BeginMap); + next(state::maybe_key); + return; + + default: + break; + } + + // ...closing-chars are only allowed in maybe_value state + if (state_ == state::maybe_value) { + switch (token) { + case special_token::right_bracket: + pop(EEventType::BeginList, EEventType::EndList); + return; + + case special_token::right_brace: + pop(EEventType::BeginMap, EEventType::EndMap); + return; + + // right_angle is impossible in maybe_value state + // (only in delimiter, maybe_key) + + default: + break; + } + } + + // attributes are not allowed after attributes (thus, value_noattr state) + if (state_ != state::value_noattr && token == special_token::left_angle) { + push(EEventType::BeginAttributes); + yield(EEventType::BeginAttributes); + next(state::maybe_key); + return; + } + + COLD_BLOCK_BYVALUE + lexer_.fail("Unexpected ", NCEscape::quote(ch)); + COLD_BLOCK_END + } + + ATTRIBUTE(hot) + void state_value_binary_scalar(char_class cls) { + lexer_.advance(1); + switch (cls) { + case char_class::binary_double: + yield(lexer_.read_binary_double()); + break; + + case char_class::binary_int64: + yield(lexer_.read_binary_int64()); + break; + + case char_class::binary_uint64: + yield(lexer_.read_binary_uint64()); + break; + + case char_class::binary_false: + yield(false); + break; + + case char_class::binary_true: + yield(true); + break; + + default: + Y_UNREACHABLE(); + } + } + + ATTRIBUTE(noinline) + void state_value_text_scalar(char_class cls) { + switch (cls) { + case char_class::quote: + lexer_.advance(1); + yield(lexer_.read_quoted_string()); + break; + + case char_class::number: + yield(lexer_.read_numeric()); + break; + + case char_class::string: + yield(lexer_.read_unquoted_string()); + break; + + case char_class::percent: + lexer_.advance(1); + yield(lexer_.read_percent_scalar()); + break; + + case char_class::none: + COLD_BLOCK_BYVALUE + lexer_.fail("Invalid yson value."); + COLD_BLOCK_END + break; + + default: + Y_UNREACHABLE(); + } + } + }; + + class reader_impl: public gen_reader_impl<false> { + public: + using gen_reader_impl<false>::gen_reader_impl; + }; + } +} diff --git a/library/cpp/yson_pull/detail/stream_counter.h b/library/cpp/yson_pull/detail/stream_counter.h new file mode 100644 index 0000000000..3b41b27eb6 --- /dev/null +++ b/library/cpp/yson_pull/detail/stream_counter.h @@ -0,0 +1,51 @@ +#pragma once + +#include <library/cpp/yson_pull/position_info.h> + +#include <cstddef> + +namespace NYsonPull { + namespace NDetail { + template <bool EnableLinePositionInfo> + class stream_counter; + + template <> + class stream_counter<true> { + private: + size_t offset_ = 0; + size_t line_ = 1; + size_t column_ = 1; + + public: + TPositionInfo info() const { + return {offset_, line_, column_}; + } + + void update(const ui8* begin, const ui8* end) { + offset_ += end - begin; + for (auto current = begin; current != end; ++current) { + ++column_; + if (*current == '\n') { //TODO: memchr + ++line_; + column_ = 1; + } + } + } + }; + + template <> + class stream_counter<false> { + private: + size_t offset_ = 0; + + public: + TPositionInfo info() const { + return {offset_, {}, {}}; + } + + void update(const ui8* begin, const ui8* end) { + offset_ += end - begin; + } + }; + } +} diff --git a/library/cpp/yson_pull/detail/symbols.h b/library/cpp/yson_pull/detail/symbols.h new file mode 100644 index 0000000000..fe94bb9c41 --- /dev/null +++ b/library/cpp/yson_pull/detail/symbols.h @@ -0,0 +1,55 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/system/types.h> + +namespace NYsonPull { + namespace NDetail { + namespace NSymbol { +#define SYM(name, value) constexpr ui8 name = value + + //! Indicates the beginning of a list. + SYM(begin_list, '['); + //! Indicates the end of a list. + SYM(end_list, ']'); + + //! Indicates the beginning of a map. + SYM(begin_map, '{'); + //! Indicates the end of a map. + SYM(end_map, '}'); + + //! Indicates the beginning of an attribute map. + SYM(begin_attributes, '<'); + //! Indicates the end of an attribute map. + SYM(end_attributes, '>'); + + //! Separates items in lists and pairs in maps or attribute maps. + SYM(item_separator, ';'); + //! Separates keys from values in maps and attribute maps. + SYM(key_value_separator, '='); + + //! Indicates an entity. + SYM(entity, '#'); + //! Indicates end of stream. + SYM(eof, '\0'); + + //! Marks the beginning of a binary string literal. + SYM(string_marker, '\x01'); + //! Marks the beginning of a binary int64 literal. + SYM(int64_marker, '\x02'); + //! Marks the beginning of a binary uint64 literal. + SYM(uint64_marker, '\x06'); + //! Marks the beginning of a binary double literal. + SYM(double_marker, '\x03'); + //! Marks a binary `false' boolean value. + SYM(false_marker, '\x04'); + //! Marks a binary `true' boolean value. + SYM(true_marker, '\x05'); + + //! Text string quote symbol + SYM(quote, '"'); + +#undef SYM + } + } +} diff --git a/library/cpp/yson_pull/detail/traits.h b/library/cpp/yson_pull/detail/traits.h new file mode 100644 index 0000000000..869a3b9c44 --- /dev/null +++ b/library/cpp/yson_pull/detail/traits.h @@ -0,0 +1,29 @@ +#pragma once + +#include <type_traits> + +namespace NYsonPull { + namespace NDetail { + namespace NTraits { + template <typename T, typename U> + using if_signed = typename std::enable_if< + std::is_signed<T>::value, + U>::type; + + template <typename T, typename U> + using if_unsigned = typename std::enable_if< + std::is_unsigned<T>::value, + U>::type; + + template <typename T> + using to_unsigned = typename std::enable_if< + std::is_signed<T>::value, + typename std::make_unsigned<T>::type>::type; + + template <typename T> + using to_signed = typename std::enable_if< + std::is_unsigned<T>::value, + typename std::make_signed<T>::type>::type; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/varint.h b/library/cpp/yson_pull/detail/varint.h new file mode 100644 index 0000000000..38bf45d925 --- /dev/null +++ b/library/cpp/yson_pull/detail/varint.h @@ -0,0 +1,260 @@ +#pragma once + +#include "byte_reader.h" +#include "byte_writer.h" +#include "traits.h" +#include "zigzag.h" + +#include <util/system/types.h> + +#include <cstddef> +#include <type_traits> + +namespace NYsonPull { + namespace NDetail { + namespace NVarInt { + namespace NImpl { + template <typename T> + constexpr inline size_t max_size() { + return (8 * sizeof(T) - 1) / 7 + 1; + } + + template <typename T> + inline size_t write(ui64 value, T&& consume) { + auto stop = false; + auto nwritten = size_t{0}; + while (!stop) { + ++nwritten; + auto byte = static_cast<ui8>(value | 0x80); + value >>= 7; + if (value == 0) { + stop = true; + byte &= 0x7F; + } + consume(byte); + } + return nwritten; + } + + template <typename U> + inline bool read_fast(byte_reader<U>& reader, ui64* value) { + auto& buf = reader.stream().buffer(); + auto* ptr = buf.pos(); + ui32 b; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + ui32 part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); + part0 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + + // We have overrun the maximum size of a Varint (10 bytes). The data + // must be corrupt. + return false; + + done: + reader.advance(ptr - buf.pos()); + *value = (static_cast<ui64>(part0)) | (static_cast<ui64>(part1) << 28) | (static_cast<ui64>(part2) << 56); + return true; + } + + template <typename U> + inline bool read_fast(byte_reader<U>& reader, ui32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + auto& buf = reader.stream().buffer(); + auto* ptr = buf.pos(); + ui32 b; + ui32 result; + + b = *(ptr++); + result = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= b << 28; + if (!(b & 0x80)) + goto done; + + // FIXME + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + + for (size_t i = 0; i < max_size<ui64>() - max_size<ui32>(); i++) { + b = *(ptr++); + if (!(b & 0x80)) + goto done; + } + + // We have overrun the maximum size of a Varint (10 bytes). Assume + // the data is corrupt. + return false; + + done: + reader.advance(ptr - buf.pos()); + *value = result; + return true; + } + + template <typename U> + inline bool read_slow(byte_reader<U>& reader, ui64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + auto& buf = reader.stream().buffer(); + ui64 result = 0; + int count = 0; + ui32 b; + + do { + if (count == max_size<ui64>()) { + return false; + } + reader.fill_buffer(); + if (reader.stream().at_end()) { + return false; + } + b = *buf.pos(); + result |= static_cast<ui64>(b & 0x7F) << (7 * count); + reader.advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; + } + + template <typename U> + inline bool read_slow(byte_reader<U>& reader, ui32* value) { + ui64 result; + // fallback to 64-bit reading + if (read_slow(reader, &result) && result <= std::numeric_limits<ui32>::max()) { + *value = static_cast<ui32>(result); + return true; + } + + return false; + } + + // Following functions is an adaptation + // of Protobuf code from coded_stream.cc + template <typename T, typename U> + inline bool read_dispatch(byte_reader<U>& reader, T* value) { + auto& buf = reader.stream().buffer(); + // NOTE: checking for 64-bit max_size(), since 32-bit + // read_fast() might fallback to 64-bit reading + if (buf.available() >= max_size<ui64>() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (!buf.is_empty() && !(buf.end()[-1] & 0x80))) + { + return read_fast(reader, value); + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return read_slow(reader, value); + } + } + + } + + // Various functions to read/write varints. + + // Returns the number of bytes written. + template <typename T> + inline NTraits::if_unsigned<T, size_t> write(ui8* data, T value) { + return NImpl::write( + static_cast<ui64>(value), + [&](ui8 byte) { *data++ = byte; }); + } + + template <typename T> + inline NTraits::if_signed<T, size_t> write(ui8* data, T value) { + return NImpl::write( + static_cast<ui64>(NZigZag::encode(value)), + [&](ui8 byte) { *data++ = byte; }); + } + + template <typename T, typename U> + inline void write(byte_writer<U>& stream, T value) { + ui8 data[NImpl::max_size<T>()]; + auto size = write(data, value); + stream.write(data, size); + } + + template <typename T, typename U> + inline NTraits::if_unsigned<T, T> read(byte_reader<U>& reader) { + auto value = T{}; + auto& buf = reader.stream().buffer(); + if (!buf.is_empty() && *buf.pos() < 0x80) { + value = *buf.pos(); + reader.advance(1); + return value; + } + + if (Y_UNLIKELY(!NImpl::read_dispatch(reader, &value))) { + reader.fail("Error parsing varint value"); + } + return value; + } + + template <typename T, typename U> + inline NTraits::if_signed<T, T> read(byte_reader<U>& reader) { + return NZigZag::decode( + read<NTraits::to_unsigned<T>>(reader)); + } + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/writer.h b/library/cpp/yson_pull/detail/writer.h new file mode 100644 index 0000000000..b24b994292 --- /dev/null +++ b/library/cpp/yson_pull/detail/writer.h @@ -0,0 +1,566 @@ +#pragma once + +#include "byte_writer.h" +#include "cescape.h" +#include "percent_scalar.h" +#include "stream_counter.h" +#include "symbols.h" +#include "varint.h" + +#include <library/cpp/yson_pull/consumer.h> +#include <library/cpp/yson_pull/event.h> +#include <library/cpp/yson_pull/output.h> +#include <library/cpp/yson_pull/stream_type.h> +#include <library/cpp/yson_pull/writer.h> + +#include <util/generic/vector.h> +#include <util/system/yassert.h> + +#include <cmath> + +namespace NYsonPull { + namespace NDetail { + class writer: public IConsumer { + enum class state { + maybe_key, + maybe_value, + value, + value_noattr, + before_begin, + before_end, + after_end, + }; + + byte_writer<stream_counter<false>> stream_; + TVector<EEventType> stack_; + bool need_item_separator_ = false; + EStreamType mode_ = EStreamType::ListFragment; + state state_ = state::before_begin; + + public: + void OnBeginStream() override { + update_state(EEventType::BeginStream); + } + + void OnEndStream() override { + update_state(EEventType::EndStream); + stream_.flush_buffer(); + } + + void OnBeginList() override { + begin_node(); + write(NSymbol::begin_list); + update_state(EEventType::BeginList); + begin_collection(collection_type::list); + } + + void OnEndList() override { + update_state(EEventType::EndList); + end_collection(collection_type::list); + write(NSymbol::end_list); + end_node(); + } + + void OnBeginMap() override { + begin_node(); + write(NSymbol::begin_map); + update_state(EEventType::BeginMap); + begin_collection(collection_type::map); + } + + void OnEndMap() override { + update_state(EEventType::EndMap); + end_collection(collection_type::map); + write(NSymbol::end_map); + end_node(); + } + + void OnBeginAttributes() override { + begin_node(); + write(NSymbol::begin_attributes); + update_state(EEventType::BeginAttributes); + begin_collection(collection_type::attributes); + } + + void OnEndAttributes() override { + update_state(EEventType::EndAttributes); + end_collection(collection_type::attributes); + write(NSymbol::end_attributes); + // no end_node + } + + void OnEntity() override { + begin_node(); + update_state(EEventType::Scalar); + write(NSymbol::entity); + end_node(); + } + + protected: + enum class collection_type { + list, + map, + attributes, + }; + + writer(NYsonPull::NOutput::IStream& stream, EStreamType mode) + : stream_(stream) + , mode_{mode} { + } + + bool need_item_separator() const { + return need_item_separator_; + } + void need_item_separator(bool value) { + need_item_separator_ = value; + } + + size_t depth() const { + Y_ASSERT(!stack_.empty()); + if (mode_ == EStreamType::Node) { + return stack_.size() - 1; + } else { + return stack_.size() - 2; + } + } + EStreamType mode() const { + return mode_; + } + + void write(ui8 c) { + stream_.write(c); + } + + void write(TStringBuf value) { + write_raw(value.data(), value.size()); + } + + void write_raw(const void* ptr, size_t len) { + stream_.write(static_cast<const ui8*>(ptr), len); + } + + template <typename T> + void write_varint(T value) { + NVarInt::write(stream_, value); + } + + void write_escaped_string(TStringBuf value) { + write(NSymbol::quote); + NCEscape::encode(stream_, value); + write(NSymbol::quote); + } + + void push(EEventType type) { + stack_.push_back(type); + } + + void pop(EEventType type) { + if (stack_.empty()) { + fail("Unpaired events: empty event stack"); + } + if (stack_.back() != type) { + fail("Unpaired events: expected ", type, ", got ", stack_.back()); + } + stack_.pop_back(); + } + + void update_state(EEventType event) { + switch (state_) { + case state::before_begin: + if (event != EEventType::BeginStream) { + fail("Expected begin_stream, got ", event); + } + begin_stream(); + return; + + case state::before_end: + if (event != EEventType::EndStream) { + fail("Expected end_stream, got ", event); + } + end_stream(); + return; + + case state::after_end: + fail("Attempted write past stream end"); + + case state::maybe_key: + if (event == EEventType::Key) { + state_ = state::value; + return; + } + + switch (event) { + case EEventType::EndStream: + end_stream(); + return; + + case EEventType::EndMap: + pop(EEventType::BeginMap); + next_state(); + return; + + case EEventType::EndAttributes: + pop(EEventType::BeginAttributes); + state_ = state::value_noattr; + return; + + default: + fail("Unexpected event ", event, " in maybe_key"); + } + break; + + case state::maybe_value: + switch (event) { + case EEventType::EndList: + pop(EEventType::BeginList); + next_state(); + return; + + case EEventType::EndStream: + end_stream(); + return; + + default: + break; + } + [[fallthrough]]; + case state::value: + if (event == EEventType::BeginAttributes) { + push(EEventType::BeginAttributes); + next_state(); + return; + } + [[fallthrough]]; + case state::value_noattr: + switch (event) { + case EEventType::Scalar: + next_state(); + return; + + case EEventType::BeginList: + push(EEventType::BeginList); + next_state(); + return; + + case EEventType::BeginMap: + push(EEventType::BeginMap); + next_state(); + return; + + default: + fail("Unexpected event ", event, " (in value_*)"); + } + break; + } + } + + void next_state() { + Y_ASSERT(!stack_.empty()); + switch (stack_.back()) { + case EEventType::BeginMap: + case EEventType::BeginAttributes: + state_ = state::maybe_key; + break; + + case EEventType::BeginList: + state_ = state::maybe_value; + break; + + case EEventType::BeginStream: + state_ = state::before_end; + break; + + default: + Y_UNREACHABLE(); + } + } + + void begin_stream() { + push(EEventType::BeginStream); + switch (mode_) { + case EStreamType::ListFragment: + push(EEventType::BeginList); + state_ = state::maybe_value; + break; + + case EStreamType::MapFragment: + push(EEventType::BeginMap); + state_ = state::maybe_key; + break; + + case EStreamType::Node: + state_ = state::value; + break; + } + } + + void end_stream() { + switch (mode_) { + case EStreamType::ListFragment: + pop(EEventType::BeginList); + break; + + case EStreamType::MapFragment: + pop(EEventType::BeginMap); + break; + + case EStreamType::Node: + break; + } + pop(EEventType::BeginStream); + state_ = state::after_end; + } + + virtual void begin_node() { + if (need_item_separator_) { + write(NSymbol::item_separator); + } + } + + virtual void end_node() { + need_item_separator_ = true; + } + + virtual void begin_key() { + begin_node(); + } + + virtual void end_key() { + need_item_separator_ = false; + write(NSymbol::key_value_separator); + } + + virtual void begin_collection(collection_type type) { + Y_UNUSED(type); + need_item_separator_ = false; + } + + virtual void end_collection(collection_type type) { + need_item_separator_ = (type != collection_type::attributes); + } + + template <typename... Args> + ATTRIBUTE(noinline, cold) + void fail[[noreturn]](const char* msg, Args&&... args) { + auto formatted_message = format_string( + msg, + std::forward<Args>(args)...); + throw NException::TBadOutput( + formatted_message, + stream_.counter().info()); + } + }; + + class TBinaryWriterImpl final: public writer { + public: + TBinaryWriterImpl(NYsonPull::NOutput::IStream& stream, EStreamType mode) + : writer(stream, mode) + { + } + + void OnScalarBoolean(bool value) override { + update_state(EEventType::Scalar); + + begin_node(); + write(value ? NSymbol::true_marker : NSymbol::false_marker); + end_node(); + } + + void OnScalarInt64(i64 value) override { + update_state(EEventType::Scalar); + + begin_node(); + write(NSymbol::int64_marker); + write_varint(value); + end_node(); + } + + void OnScalarUInt64(ui64 value) override { + update_state(EEventType::Scalar); + + begin_node(); + write(NSymbol::uint64_marker); + write_varint(value); + end_node(); + } + + void OnScalarFloat64(double value) override { + update_state(EEventType::Scalar); + + begin_node(); + write(NSymbol::double_marker); + write_raw(&value, sizeof value); + end_node(); + } + + void OnScalarString(TStringBuf value) override { + update_state(EEventType::Scalar); + + begin_node(); + write(NSymbol::string_marker); + write_varint(static_cast<i32>(value.size())); + write_raw(value.data(), value.size()); + end_node(); + } + + void OnKey(TStringBuf name) override { + update_state(EEventType::Key); + + begin_key(); + write(NSymbol::string_marker); + write_varint(static_cast<i32>(name.size())); + write_raw(name.data(), name.size()); + end_key(); + } + }; + + class TTextWriterImpl: public writer { + public: + TTextWriterImpl(NYsonPull::NOutput::IStream& stream, EStreamType mode) + : writer(stream, mode) + { + } + + void OnScalarBoolean(bool value) override { + update_state(EEventType::Scalar); + + begin_node(); + write(value ? percent_scalar::true_literal : percent_scalar::false_literal); + end_node(); + } + + void OnScalarInt64(i64 value) override { + update_state(EEventType::Scalar); + + char buf[32]; + auto len = ::snprintf(buf, sizeof(buf), "%" PRIi64, value); + + begin_node(); + write_raw(buf, len); + end_node(); + } + + void OnScalarUInt64(ui64 value) override { + update_state(EEventType::Scalar); + + char buf[32]; + auto len = ::snprintf(buf, sizeof(buf), "%" PRIu64, value); + + begin_node(); + write_raw(buf, len); + write('u'); + end_node(); + } + + void OnScalarFloat64(double value) override { + update_state(EEventType::Scalar); + + begin_node(); + + if (std::isfinite(value)) { + char buf[32]; + auto len = ::snprintf(buf, sizeof(buf), "%#.17lg", value); + write_raw(buf, len); + } else if (std::isnan(value)) { + write(percent_scalar::nan_literal); + } else if (value > 0) { + write(percent_scalar::positive_inf_literal); + } else { + write(percent_scalar::negative_inf_literal); + } + + end_node(); + } + + void OnScalarString(TStringBuf value) override { + update_state(EEventType::Scalar); + + begin_node(); + write_escaped_string(value); + end_node(); + } + + void OnKey(TStringBuf name) override { + update_state(EEventType::Key); + + begin_key(); + write_escaped_string(name); + end_key(); + } + + protected: + void begin_node() override { + if (need_item_separator()) { + write(NSymbol::item_separator); + write(' '); + } + } + + void end_node() override { + if (mode() != EStreamType::Node && depth() == 0) { + write(NSymbol::item_separator); + write('\n'); + need_item_separator(false); + } else { + writer::end_node(); + } + } + + void end_key() override { + write(' '); + writer::end_key(); + write(' '); + } + }; + + class TPrettyWriterImpl final: public TTextWriterImpl { + size_t indent_size_; + + public: + TPrettyWriterImpl( + NYsonPull::NOutput::IStream& stream, + EStreamType mode, + size_t indent_size) + : TTextWriterImpl(stream, mode) + , indent_size_{indent_size} { + } + + protected: + void begin_node() override { + if (need_item_separator()) { + write(NSymbol::item_separator); + newline(); + } + } + + void begin_collection(collection_type type) override { + TTextWriterImpl::begin_collection(type); + newline(); + } + + void end_collection(collection_type type) override { + TTextWriterImpl::end_collection(type); + newline(); + } + + void newline() { + write('\n'); + indent(depth()); + } + + void indent(size_t count) { + for (size_t i = 0; i < count * indent_size_; ++i) { + write(' '); + } + } + }; + + template <typename T, typename... Args> + NYsonPull::TWriter make_writer( + THolder<NYsonPull::NOutput::IStream> stream, + Args&&... args) { + auto impl = MakeHolder<T>(*stream, std::forward<Args>(args)...); + return NYsonPull::TWriter(std::move(stream), std::move(impl)); + } + } +} diff --git a/library/cpp/yson_pull/detail/zigzag.h b/library/cpp/yson_pull/detail/zigzag.h new file mode 100644 index 0000000000..98fcac0e9f --- /dev/null +++ b/library/cpp/yson_pull/detail/zigzag.h @@ -0,0 +1,24 @@ +#pragma once + +#include "traits.h" + +namespace NYsonPull { + namespace NDetail { + namespace NZigZag { + //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x| + + template <typename TSigned> + inline NTraits::to_unsigned<TSigned> encode(TSigned x) { + using TUnsigned = NTraits::to_unsigned<TSigned>; + constexpr auto rshift = sizeof(TSigned) * 8 - 1; + return (static_cast<TUnsigned>(x) << 1) ^ static_cast<TUnsigned>(x >> rshift); + } + + template <typename TUnsigned> + inline NTraits::to_signed<TUnsigned> decode(TUnsigned x) { + using TSigned = NTraits::to_signed<TUnsigned>; + return static_cast<TSigned>(x >> 1) ^ -static_cast<TSigned>(x & 1); + } + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/event.cpp b/library/cpp/yson_pull/event.cpp new file mode 100644 index 0000000000..b7ede494b6 --- /dev/null +++ b/library/cpp/yson_pull/event.cpp @@ -0,0 +1,18 @@ +#include "event.h" + +#include <library/cpp/yson_pull/detail/cescape.h> + +#include <util/stream/output.h> + +using namespace NYsonPull; + +template <> +void Out<TEvent>(IOutputStream& out, const TEvent& value) { + out << '(' << value.Type(); + if (value.Type() == EEventType::Scalar) { + out << ' ' << value.AsScalar(); + } else if (value.Type() == EEventType::Key) { + out << ' ' << NYsonPull::NDetail::NCEscape::quote(value.AsString()); + } + out << ')'; +} diff --git a/library/cpp/yson_pull/event.h b/library/cpp/yson_pull/event.h new file mode 100644 index 0000000000..b41d5ea3b5 --- /dev/null +++ b/library/cpp/yson_pull/event.h @@ -0,0 +1,85 @@ +#pragma once + +#include "cyson_enums.h" +#include "scalar.h" + +#include <util/generic/strbuf.h> +#include <util/system/types.h> +#include <util/system/yassert.h> + +namespace NYsonPull { + //! A well-formed decoded YSON stream can be described by the following grammar: + //! + //! STREAM[node] ::= begin_stream VALUE end_stream + //! STREAM[list_fragment] ::= begin_stream LIST_FRAGMENT end_stream + //! STREAM[map_fragment] ::= begin_stream MAP_FRAGMENT end_stream + //! LIST_FRAGMENT ::= { VALUE; } + //! MAP_FRAGMENT ::= { KEY VALUE; } + //! KEY ::= key(String) + //! VALUE ::= VALUE_NOATTR | ATTRIBUTES VALUE_NOATTR + //! ATTRIBUTES ::= begin_attributes MAP_FRAGMENT end_attributes + //! VALUE_NOATTR ::= scalar(Scalar) | LIST | MAP + //! LIST ::= begin_list LIST_FRAGMENT end_list + //! MAP ::= begin_map MAP_FRAGMENT end_map + + //! \brief YSON event type tag. Corresponds to YSON grammar. + enum class EEventType { + BeginStream = YSON_EVENT_BEGIN_STREAM, + EndStream = YSON_EVENT_END_STREAM, + BeginList = YSON_EVENT_BEGIN_LIST, + EndList = YSON_EVENT_END_LIST, + BeginMap = YSON_EVENT_BEGIN_MAP, + EndMap = YSON_EVENT_END_MAP, + BeginAttributes = YSON_EVENT_BEGIN_ATTRIBUTES, + EndAttributes = YSON_EVENT_END_ATTRIBUTES, + Key = YSON_EVENT_KEY, + Scalar = YSON_EVENT_SCALAR, + }; + + //! \brief YSON event variant type. + class TEvent { + EEventType Type_; + TScalar Value_; + + public: + //! \brief Construct a tag-only event. + explicit constexpr TEvent(EEventType type = EEventType::BeginStream) + : Type_{type} { + } + + //! \brief Construct a tag+value event. + //! + //! Only \p EEventType::key is meaningful. + constexpr TEvent(EEventType type, const TScalar& value) + : Type_{type} + , Value_{value} { + } + + //! \brief Construct a \p EEventType::scalar event. + explicit constexpr TEvent(const TScalar& value) + : Type_{EEventType::Scalar} + , Value_{value} { + } + + EEventType Type() const { + return Type_; + } + + //! \brief Get TScalar value. + //! + //! Undefined behaviour when event type is not \p EEventType::scalar. + const TScalar& AsScalar() const { + Y_ASSERT(Type_ == EEventType::Scalar || Type_ == EEventType::Key); + return Value_; + } + + //! \brief Get string value. + //! + //! Undefined behaviour when event type is not \p EEventType::key. + TStringBuf AsString() const { + Y_ASSERT(Type_ == EEventType::Key || (Type_ == EEventType::Scalar && Value_.Type() == EScalarType::String)); + return Value_.AsString(); + } + }; + +} diff --git a/library/cpp/yson_pull/exceptions.cpp b/library/cpp/yson_pull/exceptions.cpp new file mode 100644 index 0000000000..e1d68493e7 --- /dev/null +++ b/library/cpp/yson_pull/exceptions.cpp @@ -0,0 +1,45 @@ +#include "exceptions.h" + +#include <util/string/builder.h> + +#include <cerrno> +#include <cstring> + +using namespace NYsonPull::NException; + +const char* TBadStream::what() const noexcept { + TStringBuilder stream; + stream << "Invalid YSON"; + if (Position_.Offset || Position_.Line || Position_.Column) { + bool first = true; + stream << " at "; + if (Position_.Offset) { + stream << "offset " << *Position_.Offset; + first = false; + } + if (Position_.Line) { + if (!first) { + stream << ", "; + } + stream << "line " << *Position_.Line; + first = false; + } + if (Position_.Column) { + if (!first) { + stream << ", "; + } + stream << "column " << *Position_.Column; + } + } + stream << ": " << Message_; + FormattedMessage_ = stream; + return FormattedMessage_.c_str(); +} + +NYsonPull::NException::TSystemError::TSystemError() + : SavedErrno_{errno} { +} + +const char* NYsonPull::NException::TSystemError::what() const noexcept { + return ::strerror(SavedErrno_); +} diff --git a/library/cpp/yson_pull/exceptions.h b/library/cpp/yson_pull/exceptions.h new file mode 100644 index 0000000000..ebfed950a5 --- /dev/null +++ b/library/cpp/yson_pull/exceptions.h @@ -0,0 +1,59 @@ +#pragma once + +#include "position_info.h" + +#include <util/generic/string.h> + +#include <stdexcept> +#include <string> + +namespace NYsonPull { + namespace NException { + class TBadStream: public std::exception { + TString Message_; + TPositionInfo Position_; + mutable TString FormattedMessage_; + + public: + TBadStream( + TString message, + const TPositionInfo& position) + : Message_(std::move(message)) + , Position_(position) + { + } + + const TPositionInfo& Position() const { + return Position_; + } + + const char* what() const noexcept override; + }; + + class TBadInput: public TBadStream { + public: + using TBadStream::TBadStream; + }; + + class TBadOutput: public TBadStream { + public: + using TBadStream::TBadStream; + }; + + class TSystemError: public std::exception { + int SavedErrno_; + + public: + TSystemError(); + TSystemError(int saved_errno) + : SavedErrno_{saved_errno} { + } + + int saved_errno() const noexcept { + return SavedErrno_; + } + + const char* what() const noexcept override; + }; + } +} diff --git a/library/cpp/yson_pull/input.cpp b/library/cpp/yson_pull/input.cpp new file mode 100644 index 0000000000..1373c89868 --- /dev/null +++ b/library/cpp/yson_pull/input.cpp @@ -0,0 +1,33 @@ +#include "input.h" + +#include <library/cpp/yson_pull/detail/input/stdio_file.h> +#include <library/cpp/yson_pull/detail/input/stream.h> + +#include <util/generic/ptr.h> +#include <util/stream/file.h> +#include <util/stream/mem.h> + +using namespace NYsonPull::NInput; +using namespace NYsonPull::NDetail::NInput; + +namespace NInput = NYsonPull::NInput; + +THolder<IStream> NInput::FromStdioFile(FILE* file, size_t buffer_size) { + return MakeHolder<TStdioFile>(file, buffer_size); +} + +THolder<IStream> NInput::FromPosixFd(int fd, size_t buffer_size) { + return MakeHolder<TFHandle>(fd, buffer_size); +} + +THolder<IStream> NInput::FromMemory(TStringBuf data) { + return MakeHolder<TOwned<TMemoryInput>>(data); +} + +THolder<IStream> NInput::FromInputStream(IInputStream* input, size_t buffer_size) { + return MakeHolder<TOwned<TBufferedInput>>(input, buffer_size); +} + +THolder<IStream> NInput::FromZeroCopyInput(IZeroCopyInput* input) { + return MakeHolder<TZeroCopy>(input); +} diff --git a/library/cpp/yson_pull/input.h b/library/cpp/yson_pull/input.h new file mode 100644 index 0000000000..2cdfae857e --- /dev/null +++ b/library/cpp/yson_pull/input.h @@ -0,0 +1,81 @@ +#pragma once + +#include "buffer.h" + +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/system/types.h> +#include <util/system/yassert.h> + +#include <cstddef> +#include <memory> + +class IInputStream; +class IZeroCopyInput; + +namespace NYsonPull { + namespace NInput { + //! \brief Input stream adaptor interface. + //! + //! Represents a model of a chunked input data stream. + class IStream { + input_buffer buffer_; + bool at_end_ = false; + + public: + virtual ~IStream() = default; + + bool at_end() const { + return at_end_; + } + + input_buffer& buffer() noexcept { + return buffer_; + } + const input_buffer& buffer() const noexcept { + return buffer_; + } + + void fill_buffer() { + while (buffer_.is_empty() && !at_end()) { + at_end_ = do_fill_buffer() == result::at_end; + } + } + + protected: + enum class result { + have_more_data, //! May continue reading + at_end, //! Reached end of stream + }; + + //! \brief Read next chunk of data. + //! + //! The implementation is to discard the buffer contents + //! and reset the buffer to a next chunk of data. + //! End-of-stream condition is to be reported via return value. + //! + //! Read is assumed to always succeed unless it throws an exception. + virtual result do_fill_buffer() = 0; + }; + + //! \brief Read data from a contiguous memory block (i.e. a string) + //! + //! Does not take ownership on memory. + THolder<IStream> FromMemory(TStringBuf data); + + //! \brief Read data from C FILE* object. + //! + //! Does not take ownership on file object. + //! Data is buffered internally regardless of file buffering. + THolder<IStream> FromStdioFile(FILE* file, size_t buffer_size = 65536); + + //! \brief Read data from POSIX file descriptor. + //! + //! Does not take ownership on streambuf. + THolder<IStream> FromPosixFd(int fd, size_t buffer_size = 65536); + + THolder<IStream> FromZeroCopyInput(IZeroCopyInput* input); + + THolder<IStream> FromInputStream(IInputStream* input, size_t buffer_size = 65536); + } +} diff --git a/library/cpp/yson_pull/output.cpp b/library/cpp/yson_pull/output.cpp new file mode 100644 index 0000000000..27c9ef9e69 --- /dev/null +++ b/library/cpp/yson_pull/output.cpp @@ -0,0 +1,29 @@ +#include "output.h" + +#include <library/cpp/yson_pull/detail/output/stdio_file.h> +#include <library/cpp/yson_pull/detail/output/stream.h> + +#include <util/generic/ptr.h> +#include <util/stream/file.h> +#include <util/stream/str.h> + +using namespace NYsonPull::NOutput; +using namespace NYsonPull::NDetail::NOutput; + +namespace NOutput = NYsonPull::NOutput; + +THolder<IStream> NOutput::FromStdioFile(FILE* file, size_t buffer_size) { + return MakeHolder<TStdioFile>(file, buffer_size); +} + +THolder<IStream> NOutput::FromPosixFd(int fd, size_t buffer_size) { + return MakeHolder<TFHandle>(fd, buffer_size); +} + +THolder<IStream> NOutput::FromString(TString* output, size_t buffer_size) { + return MakeHolder<TOwned<TStringOutput>>(buffer_size, *output); +} + +THolder<IStream> NOutput::FromOutputStream(IOutputStream* output, size_t buffer_size) { + return MakeHolder<TStream>(output, buffer_size); +} diff --git a/library/cpp/yson_pull/output.h b/library/cpp/yson_pull/output.h new file mode 100644 index 0000000000..2d78107a93 --- /dev/null +++ b/library/cpp/yson_pull/output.h @@ -0,0 +1,65 @@ +#pragma once + +#include "buffer.h" + +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/system/types.h> +#include <util/system/yassert.h> + +#include <cstddef> +#include <cstdio> +#include <cstring> +#include <memory> + +//! \brief Output stream adaptor interface. +//! +//! Represents a model of an optionally-buffered writer. +namespace NYsonPull { + namespace NOutput { + class IStream { + output_buffer buffer_; + + public: + virtual ~IStream() = default; + + output_buffer& buffer() noexcept { + return buffer_; + } + const output_buffer& buffer() const noexcept { + return buffer_; + } + + void flush_buffer(TStringBuf extra = {}) { + if (!extra.empty() || !buffer_.is_empty()) { + do_flush_buffer(extra); + } + while (!buffer_.is_empty()) { + do_flush_buffer({}); + } + } + + protected: + //! \brief Flush data to underlying stream. + //! + //! The implementation is to flush the buffer contents AND + //! extra argument to underlying stream. + //! + //! This way, at zero buffer size this interface implements an unbuffered + //! stream (with an added cost of a virtual call per each write). + //! + //! Write is assumed to always succeed unless it throws an exception. + virtual void do_flush_buffer(TStringBuf extra) = 0; + }; + + //! \brief Write data to C FILE* object. + THolder<IStream> FromStdioFile(FILE* file, size_t buffer_size = 0); + + //! \brief Write data to POSIX file descriptor + THolder<IStream> FromPosixFd(int fd, size_t buffer_size = 65536); + + THolder<IStream> FromOutputStream(IOutputStream* output, size_t buffer_size = 65536); + + THolder<IStream> FromString(TString* output, size_t buffer_size = 1024); + } +} diff --git a/library/cpp/yson_pull/position_info.h b/library/cpp/yson_pull/position_info.h new file mode 100644 index 0000000000..a65c4663a9 --- /dev/null +++ b/library/cpp/yson_pull/position_info.h @@ -0,0 +1,23 @@ +#pragma once + +#include <util/generic/maybe.h> +#include <util/system/types.h> + +namespace NYsonPull { + struct TPositionInfo { + TMaybe<ui64> Offset; + TMaybe<ui64> Line; + TMaybe<ui64> Column; + + TPositionInfo() = default; + TPositionInfo( + TMaybe<ui64> offset_, + TMaybe<ui64> line_ = Nothing(), + TMaybe<ui64> column_ = Nothing()) + : Offset{offset_} + , Line{line_} + , Column{column_} { + } + }; + +} diff --git a/library/cpp/yson_pull/range.h b/library/cpp/yson_pull/range.h new file mode 100644 index 0000000000..f4fcf3f206 --- /dev/null +++ b/library/cpp/yson_pull/range.h @@ -0,0 +1,35 @@ +#pragma once + +#include "reader.h" + +#include <util/generic/iterator.h> + +namespace NYsonPull { + class TStreamEventsRange: public TInputRangeAdaptor<TStreamEventsRange> { + TReader Reader_; + bool AtEnd; + + public: + TStreamEventsRange(THolder<NInput::IStream> stream, EStreamType mode) + : Reader_{std::move(stream), mode} + , AtEnd(false) + { + } + + const TEvent* Last() const noexcept { + return &Reader_.LastEvent(); + } + + const TEvent* Next() { + if (Y_UNLIKELY(AtEnd)) { + return nullptr; + } + + auto* event = &Reader_.NextEvent(); + if (event->Type() == EEventType::EndStream) { + AtEnd = true; + } + return event; + } + }; +} diff --git a/library/cpp/yson_pull/read_ops.cpp b/library/cpp/yson_pull/read_ops.cpp new file mode 100644 index 0000000000..9d7e6a4a2d --- /dev/null +++ b/library/cpp/yson_pull/read_ops.cpp @@ -0,0 +1,66 @@ +#include "read_ops.h" + +using namespace NYsonPull; +using namespace NYsonPull::NReadOps; + +namespace { + bool TrySkipValueUntil(EEventType end, TReader& reader) { + const auto& event = reader.NextEvent(); + if (event.Type() == end) { + return false; + } + SkipCurrentValue(event, reader); + return true; + } + + bool TrySkipKeyValueUntil(EEventType end, TReader& reader) { + const auto& event = reader.NextEvent(); + if (event.Type() == end) { + return false; + } + Expect(event, EEventType::Key); + SkipValue(reader); + return true; + } +} + +void NYsonPull::NReadOps::SkipCurrentValue(const TEvent& event, TReader& reader) { + switch (event.Type()) { + case EEventType::BeginList: + while (TrySkipValueUntil(EEventType::EndList, reader)) { + } + return; + + case EEventType::BeginMap: + while (TrySkipKeyValueUntil(EEventType::EndMap, reader)) { + } + return; + + case EEventType::BeginAttributes: + while (TrySkipKeyValueUntil(EEventType::EndAttributes, reader)) { + } + // attributes after attributes are disallowed in TReader + SkipValue(reader); + return; + + case EEventType::Scalar: + return; + + default: + throw yexception() << "Unexpected event: " << event; + } +} + +void NYsonPull::NReadOps::SkipValue(TReader& reader) { + const auto& event = reader.NextEvent(); + SkipCurrentValue(event, reader); +} + +void NYsonPull::NReadOps::SkipControlRecords(TReader& reader) { + const auto* event = &reader.LastEvent(); + while (event->Type() == EEventType::BeginAttributes) { + SkipCurrentValue(*event, reader); + event = &reader.NextEvent(); + } + Expect(*event, EEventType::BeginMap); +} diff --git a/library/cpp/yson_pull/read_ops.h b/library/cpp/yson_pull/read_ops.h new file mode 100644 index 0000000000..5c084983ea --- /dev/null +++ b/library/cpp/yson_pull/read_ops.h @@ -0,0 +1,142 @@ +#pragma once + +#include "reader.h" + +#include <util/generic/maybe.h> +#include <util/generic/bt_exception.h> +#include <util/generic/yexception.h> +#include <util/system/yassert.h> + +/** Imperative recursive-descent parsing helpers. + * + * These functions help verify conditions and advance parser state. + * For aggregate parsing functions, common precondition is to require Begin{X} + * event prior to function invocation. Thus, parsers are composable by calling + * sub-parser after dispatching on opening event, e.g.: + * + * if (reader.LastEvent().Type() == EEventType::BeginMap) { + * ReadSomeMap(reader) + * } + * + */ + +namespace NYsonPull { + namespace NReadOps { + class TExpectationFailure: public TWithBackTrace<yexception> { + }; + + inline void Expect(const TEvent& got, EEventType expected) { + Y_ENSURE_EX( + got.Type() == expected, + TExpectationFailure() << "expected " << expected << ", got " << got); + } + + inline void Expect(const TScalar& got, EScalarType expected) { + Y_ENSURE_EX( + got.Type() == expected, + TExpectationFailure() << "expected scalar " << expected << ", got " << got); + } + + // ExpectBegin{X} functions verify that last event WAS X + // SkipBegin{X} functions verify that next event WILL BE X and CONSUME it + + inline void ExpectBeginStream(TReader& reader) { + Expect(reader.LastEvent(), EEventType::BeginStream); + } + + inline void SkipBeginStream(TReader& reader) { + Expect(reader.NextEvent(), EEventType::BeginStream); + } + + inline void ExpectBeginMap(TReader& reader) { + Expect(reader.LastEvent(), EEventType::BeginMap); + } + + inline void SkipBeginMap(TReader& reader) { + Expect(reader.NextEvent(), EEventType::BeginMap); + } + + inline void ExpectBeginList(TReader& reader) { + Expect(reader.LastEvent(), EEventType::BeginList); + } + + inline void SkipBeginList(TReader& reader) { + Expect(reader.NextEvent(), EEventType::BeginList); + } + + inline bool ReadListItem(TReader& reader) { + return reader.NextEvent().Type() != EEventType::EndList; + } + + inline TMaybe<TStringBuf> ReadKey(TReader& reader) { + const auto& event = reader.NextEvent(); + switch (event.Type()) { + case EEventType::Key: + return event.AsString(); + case EEventType::EndMap: + return Nothing(); + default: + ythrow yexception() << "Unexpected event: " << event; + } + } + + template <typename T = const TScalar&> + inline T ReadScalar(TReader& reader); + + template <> + inline const TScalar& ReadScalar<const TScalar&>(TReader& reader) { + const auto& event = reader.NextEvent(); + Expect(event, EEventType::Scalar); + return event.AsScalar(); + } + + template <> + inline i64 ReadScalar<i64>(TReader& reader) { + const auto& scalar = ReadScalar(reader); + Expect(scalar, EScalarType::Int64); + return scalar.AsInt64(); + } + + template <> + inline ui64 ReadScalar<ui64>(TReader& reader) { + const auto& scalar = ReadScalar(reader); + Expect(scalar, EScalarType::UInt64); + return scalar.AsUInt64(); + } + + template <> + inline double ReadScalar<double>(TReader& reader) { + const auto& scalar = ReadScalar(reader); + Expect(scalar, EScalarType::Float64); + return scalar.AsFloat64(); + } + + template <> + inline TStringBuf ReadScalar<TStringBuf>(TReader& reader) { + const auto& scalar = ReadScalar(reader); + Expect(scalar, EScalarType::String); + return scalar.AsString(); + } + + template <> + inline TString ReadScalar<TString>(TReader& reader) { + return TString(ReadScalar<TStringBuf>(reader)); + } + + template <> + inline bool ReadScalar<bool>(TReader& reader) { + const auto& scalar = ReadScalar(reader); + Expect(scalar, EScalarType::Boolean); + return scalar.AsBoolean(); + } + + // Skip value that was already started with `event` + void SkipCurrentValue(const TEvent& event, TReader& reader); + + // Skip value that starts at `reader.next_event()` + void SkipValue(TReader& reader); + + // Skip values with attributes, wait for map value + void SkipControlRecords(TReader& reader); + } +} diff --git a/library/cpp/yson_pull/reader.cpp b/library/cpp/yson_pull/reader.cpp new file mode 100644 index 0000000000..ea26852756 --- /dev/null +++ b/library/cpp/yson_pull/reader.cpp @@ -0,0 +1,27 @@ +#include "reader.h" +#include <library/cpp/yson_pull/detail/reader.h> + +using namespace NYsonPull; + +TReader::TReader( + THolder<NInput::IStream> stream, + EStreamType mode) + : Stream_{std::move(stream)} + , Impl_{MakeHolder<NDetail::reader_impl>(*Stream_, mode)} { +} + +TReader::TReader(TReader&& other) noexcept + : Stream_{std::move(other.Stream_)} + , Impl_{std::move(other.Impl_)} { +} + +TReader::~TReader() { +} + +const TEvent& TReader::NextEvent() { + return Impl_->next_event(); +} + +const TEvent& TReader::LastEvent() const noexcept { + return Impl_->last_event(); +} diff --git a/library/cpp/yson_pull/reader.h b/library/cpp/yson_pull/reader.h new file mode 100644 index 0000000000..f839b19071 --- /dev/null +++ b/library/cpp/yson_pull/reader.h @@ -0,0 +1,37 @@ +#pragma once + +#include "event.h" +#include "input.h" +#include "stream_type.h" + +#include <util/system/yassert.h> + +#include <memory> + +namespace NYsonPull { + namespace NDetail { + class reader_impl; + } + + //! \brief YSON reader facade class. + //! + //! Owns an input stream. + class TReader { + THolder<NInput::IStream> Stream_; + THolder<NDetail::reader_impl> Impl_; + + public: + TReader(THolder<NInput::IStream> stream, EStreamType mode); + TReader(TReader&&) noexcept; + ~TReader(); + + //! \brief Advance stream to next event and return it. + //! + //! Any event data is invalidated by a call to NextEvent(); + const TEvent& NextEvent(); + + //! \brief Get last returned event. + const TEvent& LastEvent() const noexcept; + }; + +} diff --git a/library/cpp/yson_pull/scalar.cpp b/library/cpp/yson_pull/scalar.cpp new file mode 100644 index 0000000000..4325542e7a --- /dev/null +++ b/library/cpp/yson_pull/scalar.cpp @@ -0,0 +1,57 @@ +#include "scalar.h" + +#include <library/cpp/yson_pull/detail/cescape.h> + +#include <util/stream/output.h> + +using namespace NYsonPull; + +template <> +void Out<TScalar>(IOutputStream& out, const TScalar& value) { + out << '(' << value.Type(); + if (value.Type() != EScalarType::Entity) { + out << ' '; + } + switch (value.Type()) { + case EScalarType::Boolean: + out << (value.AsBoolean() ? "true" : "false"); + break; + case EScalarType::String: + out << NYsonPull::NDetail::NCEscape::quote(value.AsString()); + break; + case EScalarType::Int64: + out << value.AsInt64(); + break; + case EScalarType::UInt64: + out << value.AsUInt64(); + break; + case EScalarType::Float64: + out << value.AsFloat64(); + break; + default: + break; + } + out << ')'; +} + +bool NYsonPull::operator==(const TScalar& left, const TScalar& right) noexcept { + if (left.Type() != right.Type()) { + return false; + } + switch (left.Type()) { + case EScalarType::Boolean: + return left.AsBoolean() == right.AsBoolean(); + case EScalarType::String: + return left.AsString() == right.AsString(); + case EScalarType::Int64: + return left.AsInt64() == right.AsInt64(); + case EScalarType::UInt64: + return left.AsUInt64() == right.AsUInt64(); + case EScalarType::Float64: + return left.AsFloat64() == right.AsFloat64(); + case EScalarType::Entity: + return true; + default: + Y_UNREACHABLE(); + } +} diff --git a/library/cpp/yson_pull/scalar.h b/library/cpp/yson_pull/scalar.h new file mode 100644 index 0000000000..509fce8b5e --- /dev/null +++ b/library/cpp/yson_pull/scalar.h @@ -0,0 +1,146 @@ +#pragma once + +#include "cyson_enums.h" + +#include <util/generic/strbuf.h> +#include <util/system/types.h> +#include <util/system/yassert.h> + +namespace NYsonPull { + //! \brief YSON TScalar value type tag + enum class EScalarType { + Entity = YSON_SCALAR_ENTITY, + Boolean = YSON_SCALAR_BOOLEAN, + Int64 = YSON_SCALAR_INT64, + UInt64 = YSON_SCALAR_UINT64, + Float64 = YSON_SCALAR_FLOAT64, + String = YSON_SCALAR_STRING, + }; + + //! \brief YSON TScalar value variant + class TScalar { + //! \internal \brief YSON TScalar value underlying representation + union TScalarValue { + struct TScalarStringRef { + const char* Data; + size_t Size; + }; + + ui8 AsNothing[1]; + bool AsBoolean; + i64 AsInt64; + ui64 AsUInt64; + double AsFloat64; + TScalarStringRef AsString; + + constexpr TScalarValue() + : AsNothing{} { + } + + explicit constexpr TScalarValue(bool value) + : AsBoolean{value} { + } + + explicit constexpr TScalarValue(i64 value) + : AsInt64{value} { + } + + explicit constexpr TScalarValue(ui64 value) + : AsUInt64{value} { + } + + explicit constexpr TScalarValue(double value) + : AsFloat64{value} { + } + + explicit constexpr TScalarValue(TStringBuf value) + : AsString{value.data(), value.size()} { + } + }; + static_assert( + sizeof(TScalarValue) == sizeof(TStringBuf), + "bad scalar_value size"); + + EScalarType Type_; + TScalarValue Value_; + + public: + constexpr TScalar() + : Type_{EScalarType::Entity} { + } + + explicit constexpr TScalar(bool value) + : Type_{EScalarType::Boolean} + , Value_{value} { + } + + explicit constexpr TScalar(i64 value) + : Type_{EScalarType::Int64} + , Value_{value} { + } + + explicit constexpr TScalar(ui64 value) + : Type_{EScalarType::UInt64} + , Value_{value} { + } + + explicit constexpr TScalar(double value) + : Type_{EScalarType::Float64} + , Value_{value} { + } + + explicit constexpr TScalar(TStringBuf value) + : Type_{EScalarType::String} + , Value_{value} { + } + + // Disambiguation for literal constants + // In the absence of this constructor, + // they get implicitly converted to bool (yikes!) + explicit TScalar(const char* value) + : Type_{EScalarType::String} + , Value_{TStringBuf{value}} { + } + + EScalarType Type() const { + return Type_; + } + +#define CAST_TO(Type) \ + Y_ASSERT(Type_ == EScalarType::Type); \ + return Value_.As##Type + + bool AsBoolean() const { + CAST_TO(Boolean); + } + i64 AsInt64() const { + CAST_TO(Int64); + } + ui64 AsUInt64() const { + CAST_TO(UInt64); + } + double AsFloat64() const { + CAST_TO(Float64); + } +#undef CAST_TO + + TStringBuf AsString() const { + Y_ASSERT(Type_ == EScalarType::String); + return TStringBuf{ + Value_.AsString.Data, + Value_.AsString.Size, + }; + } + + const TScalarValue& AsUnsafeValue() const { + return Value_; + } + }; + + bool operator==(const TScalar& left, const TScalar& right) noexcept; + + inline bool operator!=(const TScalar& left, const TScalar& right) noexcept { + return !(left == right); + } + +} diff --git a/library/cpp/yson_pull/stream_type.h b/library/cpp/yson_pull/stream_type.h new file mode 100644 index 0000000000..beac87fe1b --- /dev/null +++ b/library/cpp/yson_pull/stream_type.h @@ -0,0 +1,11 @@ +#pragma once + +#include "cyson_enums.h" + +namespace NYsonPull { + enum class EStreamType { + Node = YSON_STREAM_TYPE_NODE, + ListFragment = YSON_STREAM_TYPE_LIST_FRAGMENT, + MapFragment = YSON_STREAM_TYPE_MAP_FRAGMENT, + }; +} diff --git a/library/cpp/yson_pull/ut/cescape_ut.cpp b/library/cpp/yson_pull/ut/cescape_ut.cpp new file mode 100644 index 0000000000..6628ba1d15 --- /dev/null +++ b/library/cpp/yson_pull/ut/cescape_ut.cpp @@ -0,0 +1,71 @@ +#include <library/cpp/yson_pull/detail/cescape.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NYsonPull::NDetail; + +namespace { + void test_roundtrip(const TVector<ui8>& str) { + TStringBuf str_buf( + reinterpret_cast<const char*>(str.data()), + str.size()); + auto tmp = NCEscape::encode(str_buf); + auto dest = NCEscape::decode(tmp); + UNIT_ASSERT_VALUES_EQUAL_C( + str_buf, TStringBuf(dest), + "A[" << str.size() << "]: " << str_buf << '\n' + << "B[" << tmp.size() << "]: " << tmp << '\n' + << "C[" << dest.size() << "]: " << dest); + } + + template <size_t N> + void test_exhaustive(TVector<ui8>& str) { + for (int i = 0; i < 256; ++i) { + str[str.size() - N] = static_cast<char>(i); + test_exhaustive<N - 1>(str); + } + } + + template <> + void test_exhaustive<0>(TVector<ui8>& str) { + test_roundtrip(str); + } + + template <size_t N> + void test_exhaustive() { + TVector<ui8> str(N, ' '); + test_exhaustive<N>(str); + } + +} // anonymous namespace + +Y_UNIT_TEST_SUITE(CEscape) { + Y_UNIT_TEST(ExhaustiveOneChar) { + test_exhaustive<1>(); + } + + Y_UNIT_TEST(ExhaustiveTwoChars) { + test_exhaustive<2>(); + } + + Y_UNIT_TEST(ExhaustiveThreeChars) { + test_exhaustive<3>(); + } + + Y_UNIT_TEST(SpecialEscapeEncode) { + //UNIT_ASSERT_VALUES_EQUAL(R"(\b)", NCEscape::encode("\b")); + //UNIT_ASSERT_VALUES_EQUAL(R"(\f)", NCEscape::encode("\f")); + UNIT_ASSERT_VALUES_EQUAL(R"(\n)", NCEscape::encode("\n")); + UNIT_ASSERT_VALUES_EQUAL(R"(\r)", NCEscape::encode("\r")); + UNIT_ASSERT_VALUES_EQUAL(R"(\t)", NCEscape::encode("\t")); + } + + Y_UNIT_TEST(SpecialEscapeDecode) { + UNIT_ASSERT_VALUES_EQUAL("\b", NCEscape::decode(R"(\b)")); + UNIT_ASSERT_VALUES_EQUAL("\f", NCEscape::decode(R"(\f)")); + UNIT_ASSERT_VALUES_EQUAL("\n", NCEscape::decode(R"(\n)")); + UNIT_ASSERT_VALUES_EQUAL("\r", NCEscape::decode(R"(\r)")); + UNIT_ASSERT_VALUES_EQUAL("\t", NCEscape::decode(R"(\t)")); + } + +} // Y_UNIT_TEST_SUITE(CEscape) diff --git a/library/cpp/yson_pull/ut/loop_ut.cpp b/library/cpp/yson_pull/ut/loop_ut.cpp new file mode 100644 index 0000000000..8c7b11dd1c --- /dev/null +++ b/library/cpp/yson_pull/ut/loop_ut.cpp @@ -0,0 +1,382 @@ +#include <library/cpp/yson_pull/input.h> +#include <library/cpp/yson_pull/output.h> +#include <library/cpp/yson_pull/reader.h> +#include <library/cpp/yson_pull/writer.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <cerrno> +#include <cmath> + +#ifdef _unix_ +#include <unistd.h> +#include <sys/wait.h> +#endif + +namespace { + constexpr const char* alphabet = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + void generate(NYsonPull::TWriter& writer, size_t count) { + writer.BeginStream(); + for (size_t i = 0; i < count; ++i) { + writer.BeginMap() + .Key("ints") + .BeginList() + .Int64(0) + .Int64(-1) + .Int64(1000) + .Int64(-1000) + .EndList() + .Key("uints") + .BeginList() + .UInt64(0) + .UInt64(1000) + .UInt64(10000000) + .EndList() + .Key("entities") + .BeginList() + .Entity() + .BeginAttributes() + .Key("color") + .String("blue") + .Key("size") + .Int64(100) + .EndAttributes() + .Entity() + .Entity() + .EndList() + .Key("booleans") + .BeginList() + .Boolean(true) + .Boolean(false) + .Boolean(true) + .EndList() + .Key("floats") + .BeginList() + .Float64(0.0) + .Float64(13.0e30) + .Float64(M_PI) + .EndList() + .Key("strings") + .BeginList() + .String("hello") + .String("") + .String("foo \"-bar-\" baz") + .String("oh\nwow") + .String(alphabet) + .EndList() + .EndMap(); + } + writer.EndStream(); + } + +#ifdef __clang__ + // XXX: With all the macros below (esp. UNIT_ASSERT_VALUES_EQUAL) unfolded, + // the time it takes clang to optimize generated code becomes abysmal. + // Locally disabling optimization brings it back to normal. + __attribute__((optnone)) +#endif // __clang__ + void + verify(NYsonPull::TReader& reader, size_t count) { +#define NEXT(name__) \ + { \ + auto& name__ = reader.NextEvent(); // SCOPED_TRACE(e); +#define END_NEXT } +#define NEXT_TYPE(type__) \ + NEXT(e) { \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::type__, e.Type()); \ + } \ + END_NEXT +#define NEXT_KEY(key__) \ + NEXT(e) { \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); \ + UNIT_ASSERT_VALUES_EQUAL(key__, e.AsString()); \ + } \ + END_NEXT +#define NEXT_SCALAR(type__, value__) \ + NEXT(e) { \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::type__, e.AsScalar().Type()); \ + UNIT_ASSERT_VALUES_EQUAL(value__, e.AsScalar().As##type__()); \ + } \ + END_NEXT +#define NEXT_ENTITY() \ + NEXT(e) { \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::Entity, e.AsScalar().Type()); \ + } \ + END_NEXT +#define NEXT_FLOAT64(value__) \ + NEXT(e) { \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); \ + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::Float64, e.AsScalar().Type()); \ + UNIT_ASSERT_DOUBLES_EQUAL(value__, e.AsScalar().AsFloat64(), 1e-5); \ + } \ + END_NEXT + + constexpr auto true_ = true; + constexpr auto false_ = false; + + NEXT_TYPE(BeginStream); + for (size_t i = 0; i < count; ++i) { + NEXT_TYPE(BeginMap); + NEXT_KEY("ints") { + NEXT_TYPE(BeginList); + NEXT_SCALAR(Int64, 0); + NEXT_SCALAR(Int64, -1); + NEXT_SCALAR(Int64, 1000); + NEXT_SCALAR(Int64, -1000); + NEXT_TYPE(EndList); + } + NEXT_KEY("uints") { + NEXT_TYPE(BeginList); + NEXT_SCALAR(UInt64, 0U); + NEXT_SCALAR(UInt64, 1000U); + NEXT_SCALAR(UInt64, 10000000U); + NEXT_TYPE(EndList); + } + NEXT_KEY("entities") { + NEXT_TYPE(BeginList); + NEXT_ENTITY(); + NEXT_TYPE(BeginAttributes) { + NEXT_KEY("color") { + NEXT_SCALAR(String, "blue"); + } + NEXT_KEY("size") { + NEXT_SCALAR(Int64, 100); + } + } + NEXT_TYPE(EndAttributes); + NEXT_ENTITY(); + NEXT_ENTITY(); + NEXT_TYPE(EndList); + } + NEXT_KEY("booleans") { + NEXT_TYPE(BeginList); + NEXT_SCALAR(Boolean, true_); + NEXT_SCALAR(Boolean, false_); + NEXT_SCALAR(Boolean, true_); + NEXT_TYPE(EndList); + } + NEXT_KEY("floats") { + NEXT_TYPE(BeginList); + NEXT_FLOAT64(0.0); + NEXT_FLOAT64(13.0e30); + NEXT_FLOAT64(M_PI); + NEXT_TYPE(EndList); + } + NEXT_KEY("strings") { + NEXT_TYPE(BeginList); + NEXT_SCALAR(String, "hello"); + NEXT_SCALAR(String, ""); + NEXT_SCALAR(String, "foo \"-bar-\" baz"); + NEXT_SCALAR(String, "oh\nwow"); + NEXT_SCALAR(String, alphabet); + NEXT_TYPE(EndList); + } + NEXT_TYPE(EndMap); + } + NEXT_TYPE(EndStream); + +#undef NEXT +#undef END_NEXT +#undef NEXT_TYPE +#undef NEXT_KEY +#undef NEXT_SCALAR + } + + class sys_error {}; + + IOutputStream& operator<<(IOutputStream& stream, const sys_error&) { + stream << strerror(errno); + return stream; + } + + NYsonPull::TReader make_reader(THolder<NYsonPull::NInput::IStream> stream) { + return NYsonPull::TReader( + std::move(stream), + NYsonPull::EStreamType::ListFragment); + } + + template <typename Function> + void test_memory(Function make_writer, size_t nrepeat) { + TString text; + { + auto writer = make_writer(NYsonPull::NOutput::FromString(&text)); + generate(writer, nrepeat); + } + { + auto reader = make_reader(NYsonPull::NInput::FromMemory(text)); + verify(reader, nrepeat); + } + { + TStringInput input(text); + auto reader = make_reader(NYsonPull::NInput::FromInputStream(&input, /* buffer_size = */ 1)); + verify(reader, nrepeat); + } + } + +#ifdef _unix_ + template <typename Here, typename There> + void pipe(Here&& reader, There&& writer) { + int fildes[2]; + UNIT_ASSERT_VALUES_EQUAL_C(0, ::pipe(fildes), sys_error()); + auto read_fd = fildes[0]; + auto write_fd = fildes[1]; + + auto pid = ::fork(); + UNIT_ASSERT_C(pid >= 0, sys_error()); + if (pid > 0) { + // parent + UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(write_fd), sys_error()); + reader(read_fd); + UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(read_fd), sys_error()); + } else { + // child + UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(read_fd), sys_error()); + UNIT_ASSERT_NO_EXCEPTION(writer(write_fd)); + UNIT_ASSERT_VALUES_EQUAL_C(0, ::close(write_fd), sys_error()); + ::exit(0); + } + int stat_loc; + UNIT_ASSERT_VALUES_EQUAL_C(pid, ::waitpid(pid, &stat_loc, 0), sys_error()); + } + + template <typename Function> + void test_posix_fd( + Function make_writer, + size_t nrepeat, + size_t read_buffer_size, + size_t write_buffer_size) { + pipe( + [&](int fd) { + auto reader = make_reader(NYsonPull::NInput::FromPosixFd(fd, read_buffer_size)); + verify(reader, nrepeat); + }, + [&](int fd) { + auto writer = make_writer(NYsonPull::NOutput::FromPosixFd(fd, write_buffer_size)); + generate(writer, nrepeat); + }); + } + + template <typename Function> + void test_stdio_file( + Function make_writer, + size_t nrepeat, + size_t read_buffer_size, + size_t write_buffer_size) { + pipe( + [&](int fd) { + auto file = ::fdopen(fd, "rb"); + UNIT_ASSERT_C(file != nullptr, sys_error()); + auto reader = make_reader(NYsonPull::NInput::FromStdioFile(file, read_buffer_size)); + verify(reader, nrepeat); + }, + [&](int fd) { + auto file = ::fdopen(fd, "wb"); + Y_UNUSED(write_buffer_size); + auto writer = make_writer(NYsonPull::NOutput::FromStdioFile(file, write_buffer_size)); + generate(writer, nrepeat); + fflush(file); + }); + } +#endif + + NYsonPull::TWriter text(THolder<NYsonPull::NOutput::IStream> stream) { + return NYsonPull::MakeTextWriter( + std::move(stream), + NYsonPull::EStreamType::ListFragment); + } + + NYsonPull::TWriter pretty_text(THolder<NYsonPull::NOutput::IStream> stream) { + return NYsonPull::MakePrettyTextWriter( + std::move(stream), + NYsonPull::EStreamType::ListFragment); + } + + NYsonPull::TWriter binary(THolder<NYsonPull::NOutput::IStream> stream) { + return NYsonPull::MakeBinaryWriter( + std::move(stream), + NYsonPull::EStreamType::ListFragment); + } + +} // anonymous namespace + +Y_UNIT_TEST_SUITE(Loop) { + Y_UNIT_TEST(memory_pretty_text) { + test_memory(pretty_text, 100); + } + + Y_UNIT_TEST(memory_text) { + test_memory(text, 100); + } + + Y_UNIT_TEST(memory_binary) { + test_memory(binary, 100); + } + +#ifdef _unix_ + Y_UNIT_TEST(posix_fd_pretty_text_buffered) { + test_posix_fd(pretty_text, 100, 1024, 1024); + } + + Y_UNIT_TEST(posix_fd_pretty_text_unbuffered) { + test_posix_fd(pretty_text, 100, 1, 0); + } + + Y_UNIT_TEST(posix_fd_text_buffered) { + test_posix_fd(text, 100, 1024, 1024); + } + + Y_UNIT_TEST(posix_fd_text_unbuffered) { + test_posix_fd(text, 100, 1, 0); + } + + Y_UNIT_TEST(posix_fd_binary_buffered) { + test_posix_fd(binary, 100, 1024, 1024); + } + + Y_UNIT_TEST(posix_fd_binary_unbuffered) { + test_posix_fd(binary, 100, 1, 0); + } + + Y_UNIT_TEST(stdio_file_pretty_text_buffered) { + test_stdio_file(pretty_text, 100, 1024, 1024); + } + + Y_UNIT_TEST(stdio_file_pretty_text_unbuffered) { + test_stdio_file(pretty_text, 100, 1, 0); + } + + Y_UNIT_TEST(stdio_file_text_buffered) { + test_stdio_file(text, 100, 1024, 1024); + } + + Y_UNIT_TEST(stdio_file_text_unbuffered) { + test_stdio_file(text, 100, 1, 0); + } + + Y_UNIT_TEST(stdio_file_binary_buffered) { + test_stdio_file(binary, 100, 1024, 1024); + } + + Y_UNIT_TEST(stdio_file_binary_unbuffered) { + test_stdio_file(binary, 100, 1, 0); + } +#endif +} // Y_UNIT_TEST_SUITE(Loop) diff --git a/library/cpp/yson_pull/ut/reader_ut.cpp b/library/cpp/yson_pull/ut/reader_ut.cpp new file mode 100644 index 0000000000..1184265ddb --- /dev/null +++ b/library/cpp/yson_pull/ut/reader_ut.cpp @@ -0,0 +1,410 @@ +#include <library/cpp/yson_pull/exceptions.h> +#include <library/cpp/yson_pull/range.h> +#include <library/cpp/yson_pull/reader.h> +#include <library/cpp/yson_pull/detail/cescape.h> +#include <library/cpp/yson_pull/detail/macros.h> + +#include <library/cpp/testing/unittest/registar.h> + +namespace { + NYsonPull::TReader memory_reader(TStringBuf data, NYsonPull::EStreamType mode) { + return NYsonPull::TReader( + NYsonPull::NInput::FromMemory(data), + mode); + } + + template <typename T> + void expect_scalar(const NYsonPull::TScalar& scalar, T value) { + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{value}, scalar); + } + + template <> + void expect_scalar(const NYsonPull::TScalar& scalar, double value) { + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EScalarType::Float64, scalar.Type()); + + auto scalarValue = scalar.AsFloat64(); + auto message = TStringBuilder() << "expected " << value << ", got " << scalarValue; + + if (std::isfinite(value)) { + UNIT_ASSERT_C(std::isfinite(scalarValue), message); + UNIT_ASSERT_DOUBLES_EQUAL(value, scalarValue, 1e-5); + } else if (std::isnan(value)) { + UNIT_ASSERT_C(std::isnan(scalarValue), message); + } else if (value > 0) { + UNIT_ASSERT_C(std::isinf(scalarValue) && (scalarValue > 0), message); + } else { + UNIT_ASSERT_C(std::isinf(scalarValue) && (scalarValue < 0), message); + } + } + + template <typename T> + void test_scalar(TStringBuf data, T value) { + // SCOPED_TRACE(NYsonPull::detail::cescape::quote(data)); + auto reader = memory_reader(data, NYsonPull::EStreamType::Node); + + try { + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type()); + { + auto& event = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, event.Type()); + expect_scalar(event.AsScalar(), value); + } + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type()); + } catch (const std::exception& err) { + UNIT_FAIL(err.what()); + } + } + + void consume(TStringBuf data, NYsonPull::EStreamType mode = NYsonPull::EStreamType::Node) { + // SCOPED_TRACE(NYsonPull::detail::cescape::quote(data)); + auto input_range = NYsonPull::TStreamEventsRange( + NYsonPull::NInput::FromMemory(data), + mode); + for (auto& event : input_range) { + Y_UNUSED(event); + } + } + +#define ACCEPT(data) UNIT_ASSERT_NO_EXCEPTION(consume(data)) +#define REJECT(data) UNIT_ASSERT_EXCEPTION(consume(data), NYsonPull::NException::TBadInput) + +#define ACCEPT2(data, mode) UNIT_ASSERT_NO_EXCEPTION(consume(data, mode)) +#define REJECT2(data, mode) UNIT_ASSERT_EXCEPTION(consume(data, mode), NYsonPull::NException::TBadInput) + +} // anonymous namespace + +Y_UNIT_TEST_SUITE(Reader) { + Y_UNIT_TEST(ScalarEntity) { + test_scalar(TStringBuf("#"), NYsonPull::TScalar{}); + } + + Y_UNIT_TEST(ScalarBoolean) { + test_scalar(TStringBuf("%true"), true); + test_scalar(TStringBuf("%false"), false); + + test_scalar(TStringBuf("\x05"sv), true); + test_scalar(TStringBuf("\x04"sv), false); + + REJECT("%"); + REJECT("%trueth"); + REJECT("%tru"); + REJECT("%falseth"); + REJECT("%fals"); + REJECT("%hithere"); + } + + Y_UNIT_TEST(ScalarInt64) { + test_scalar(TStringBuf("1"), i64{1}); + test_scalar(TStringBuf("+1"), i64{1}); + test_scalar(TStringBuf("100000"), i64{100000}); + test_scalar(TStringBuf("+100000"), i64{100000}); + test_scalar(TStringBuf("-100000"), i64{-100000}); + test_scalar(TStringBuf("9223372036854775807"), i64{9223372036854775807}); + test_scalar(TStringBuf("+9223372036854775807"), i64{9223372036854775807}); + + test_scalar(TStringBuf("\x02\x02"sv), i64{1}); + test_scalar(TStringBuf("\x02\xc0\x9a\x0c"sv), i64{100000}); + test_scalar(TStringBuf("\x02\xbf\x9a\x0c"sv), i64{-100000}); + test_scalar(TStringBuf("\x02\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01"sv), i64{9223372036854775807}); + + REJECT("1a2"); + REJECT("1-1-1-1"); + REJECT("1+0"); + } + + Y_UNIT_TEST(SclarUInt64) { + test_scalar(TStringBuf("1u"), ui64{1}); + test_scalar(TStringBuf("+1u"), ui64{1}); + test_scalar(TStringBuf("100000u"), ui64{100000}); + test_scalar(TStringBuf("+100000u"), ui64{100000}); + test_scalar(TStringBuf("9223372036854775807u"), ui64{9223372036854775807u}); + test_scalar(TStringBuf("+9223372036854775807u"), ui64{9223372036854775807u}); + test_scalar(TStringBuf("18446744073709551615u"), ui64{18446744073709551615u}); + test_scalar(TStringBuf("+18446744073709551615u"), ui64{18446744073709551615u}); + + REJECT("1a2u"); + REJECT("1-1-1-1u"); + REJECT("1+0u"); + + // TODO: binary + } + + Y_UNIT_TEST(ScalarFloat64) { + test_scalar(TStringBuf("0.0"), double{0.0}); + test_scalar(TStringBuf("+0.0"), double{0.0}); + test_scalar(TStringBuf("+.0"), double{0.0}); + test_scalar(TStringBuf("+.5"), double{0.5}); + test_scalar(TStringBuf("-.5"), double{-0.5}); + test_scalar(TStringBuf("1.0"), double{1.0}); + test_scalar(TStringBuf("+1.0"), double{1.0}); + test_scalar(TStringBuf("-1.0"), double{-1.0}); + test_scalar(TStringBuf("1000.0"), double{1000.0}); + test_scalar(TStringBuf("+1000.0"), double{1000.0}); + test_scalar(TStringBuf("-1000.0"), double{-1000.0}); + test_scalar(TStringBuf("1e12"), double{1e12}); + test_scalar(TStringBuf("1e+12"), double{1e12}); + test_scalar(TStringBuf("+1e+12"), double{1e12}); + test_scalar(TStringBuf("-1e+12"), double{-1e12}); + test_scalar(TStringBuf("1e-12"), double{1e-12}); + test_scalar(TStringBuf("+1e-12"), double{1e-12}); + test_scalar(TStringBuf("-1e-12"), double{-1e-12}); + + test_scalar(TStringBuf("\x03\x00\x00\x00\x00\x00\x00\x00\x00"sv), double{0.0}); + + test_scalar( + TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf8\x7f"sv), + double{std::numeric_limits<double>::quiet_NaN()}); + test_scalar( + TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\x7f"sv), + double{std::numeric_limits<double>::infinity()}); + test_scalar( + TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\xff"sv), + double{-std::numeric_limits<double>::infinity()}); + + test_scalar( + TStringBuf("%nan"), + double{std::numeric_limits<double>::quiet_NaN()}); + test_scalar( + TStringBuf("%inf"), + double{std::numeric_limits<double>::infinity()}); + test_scalar( + TStringBuf("%-inf"), + double{-std::numeric_limits<double>::infinity()}); + + REJECT("++0.0"); + REJECT("++1.0"); + REJECT("++.1"); + REJECT("1.0.0"); + //REJECT("1e+10000"); + REJECT(TStringBuf("\x03\x00\x00\x00\x00\x00\x00\x00"sv)); + + // XXX: Questionable behaviour? + ACCEPT("+.0"); + ACCEPT("-.0"); + // XXX: Rejected on Mac OS, accepted on Linux (?!) + //REJECT(".0"); + //REJECT(".5"); + + REJECT("%NaN"); + REJECT("%+inf"); + REJECT("%infinity"); + REJECT("%na"); + REJECT("%in"); + REJECT("%-in"); + } + + Y_UNIT_TEST(ScalarString) { + test_scalar(TStringBuf(R"(foobar)"), TStringBuf("foobar")); + test_scalar(TStringBuf(R"(foobar11)"), TStringBuf("foobar11")); + test_scalar(TStringBuf(R"("foobar")"), TStringBuf("foobar")); + // wat? "\x0cf" parsed as a single char? no way! + test_scalar("\x01\x0c" "foobar"sv, + TStringBuf("foobar")); + + REJECT(R"("foobar)"); + REJECT("\x01\x0c" "fooba"sv); + REJECT("\x01\x0d" "foobar"sv); // negative length + } + + Y_UNIT_TEST(EmptyList) { + auto reader = memory_reader("[]", NYsonPull::EStreamType::Node); + + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginList, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndList, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type()); + + REJECT("["); + REJECT("]"); + } + + Y_UNIT_TEST(EmptyMap) { + auto reader = memory_reader("{}", NYsonPull::EStreamType::Node); + + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginMap, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndMap, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type()); + + REJECT("{"); + REJECT("}"); + } + + Y_UNIT_TEST(Sample) { + auto reader = memory_reader( + R"({"11"=11;"nothing"=#;"zero"=0.;"foo"="bar";"list"=[1;2;3]})", + NYsonPull::EStreamType::Node); + + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginMap, reader.NextEvent().Type()); + + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf("11"), e.AsString()); + } + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{11}}, e.AsScalar()); + } + + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf("nothing"), e.AsString()); + } + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{}, e.AsScalar()); + } + + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf("zero"), e.AsString()); + } + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{0.0}, e.AsScalar()); + } + + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf("foo"), e.AsString()); + } + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{TStringBuf("bar")}, e.AsScalar()); + } + + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Key, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(TStringBuf("list"), e.AsString()); + } + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginList, reader.NextEvent().Type()); + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{1}}, e.AsScalar()); + } + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{2}}, e.AsScalar()); + } + { + auto& e = reader.NextEvent(); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, e.Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::TScalar{i64{3}}, e.AsScalar()); + } + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndList, reader.NextEvent().Type()); + + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndMap, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type()); + } + + Y_UNIT_TEST(Accept) { + ACCEPT("[]"); + ACCEPT("{}"); + ACCEPT("<>[]"); + ACCEPT("<>{}"); + ACCEPT("[{};{};{}]"); + ACCEPT("[{};{};{};]"); + ACCEPT("[<>{};<>{};<>{}]"); + ACCEPT("[<>{};<>{};<>{};]"); + + ACCEPT("foo"); + ACCEPT("[foo]"); + ACCEPT("[foo;]"); + ACCEPT("{foo=foo}"); + ACCEPT("{foo=foo;}"); + ACCEPT("<>{foo=foo}"); + ACCEPT("{foo=<foo=foo>foo}"); + ACCEPT("{foo=<foo=foo;>foo}"); + ACCEPT("{foo=<foo=foo>[foo;foo]}"); + } + + Y_UNIT_TEST(Reject) { + REJECT("["); + REJECT("{"); + REJECT("<"); + + REJECT("[[}]"); + REJECT("<>{]"); + REJECT("[>]"); + + REJECT("<><>[]"); + REJECT("[<>;<>]"); + + REJECT("{<>foo=foo}"); + REJECT("{foo=<>}"); + REJECT("{foo}"); + + REJECT("<a=b>"); + REJECT("<>"); + + REJECT("@"); + } + + Y_UNIT_TEST(ReadPastEnd) { + auto reader = memory_reader("#", NYsonPull::EStreamType::Node); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::BeginStream, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::Scalar, reader.NextEvent().Type()); + UNIT_ASSERT_VALUES_EQUAL(NYsonPull::EEventType::EndStream, reader.NextEvent().Type()); + UNIT_ASSERT_EXCEPTION(reader.NextEvent(), NYsonPull::NException::TBadInput); + } + + Y_UNIT_TEST(BadInput) { + // max_size<ui32> < varint size < max_size<ui64> + auto t = TString("\x01\xff\xff\xff\xff\xff\xff\xff\xff"); + auto reader = memory_reader(t, NYsonPull::EStreamType::Node); + + UNIT_ASSERT_EQUAL(reader.NextEvent().Type(), NYsonPull::EEventType::BeginStream); + UNIT_ASSERT_EXCEPTION(reader.NextEvent(), NYsonPull::NException::TBadInput); + } + + Y_UNIT_TEST(StreamType) { + REJECT2("", NYsonPull::EStreamType::Node); + ACCEPT2("", NYsonPull::EStreamType::ListFragment); + ACCEPT2("", NYsonPull::EStreamType::MapFragment); + + ACCEPT2("[1]", NYsonPull::EStreamType::Node); + ACCEPT2("[1]", NYsonPull::EStreamType::ListFragment); + REJECT2("[1]", NYsonPull::EStreamType::MapFragment); + + ACCEPT2("<foo=bar>[1]", NYsonPull::EStreamType::Node); + ACCEPT2("<foo=bar>[1]", NYsonPull::EStreamType::ListFragment); + REJECT2("<foo=bar>[1]", NYsonPull::EStreamType::MapFragment); + + ACCEPT2(" [1] \t \t ", NYsonPull::EStreamType::Node); + ACCEPT2(" [1] \t \t ", NYsonPull::EStreamType::ListFragment); + REJECT2(" [1] \t \t ", NYsonPull::EStreamType::MapFragment); + + REJECT2("[1];", NYsonPull::EStreamType::Node); + ACCEPT2("[1];", NYsonPull::EStreamType::ListFragment); + REJECT2("[1];", NYsonPull::EStreamType::MapFragment); + + REJECT2("[1]; foobar", NYsonPull::EStreamType::Node); + ACCEPT2("[1]; foobar", NYsonPull::EStreamType::ListFragment); + REJECT2("[1]; foobar", NYsonPull::EStreamType::MapFragment); + + REJECT2("a=[1]", NYsonPull::EStreamType::Node); + REJECT2("a=[1]", NYsonPull::EStreamType::ListFragment); + ACCEPT2("a=[1]", NYsonPull::EStreamType::MapFragment); + + REJECT2("a=[1]; ", NYsonPull::EStreamType::Node); + REJECT2("a=[1]; ", NYsonPull::EStreamType::ListFragment); + ACCEPT2("a=[1]; ", NYsonPull::EStreamType::MapFragment); + + REJECT2("a=[1]; b=foobar", NYsonPull::EStreamType::Node); + REJECT2("a=[1]; b=foobar", NYsonPull::EStreamType::ListFragment); + ACCEPT2("a=[1]; b=foobar", NYsonPull::EStreamType::MapFragment); + } + +} // Y_UNIT_TEST_SUITE(Reader) diff --git a/library/cpp/yson_pull/ut/writer_ut.cpp b/library/cpp/yson_pull/ut/writer_ut.cpp new file mode 100644 index 0000000000..5c304bad0f --- /dev/null +++ b/library/cpp/yson_pull/ut/writer_ut.cpp @@ -0,0 +1,256 @@ +#include <library/cpp/yson_pull/scalar.h> +#include <library/cpp/yson_pull/detail/writer.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/string.h> + +#include <climits> +#include <limits> + +using namespace std::string_view_literals; + +namespace { + template <typename Writer, typename Function> + TString with_writer(Function&& function) { + TString result; + auto writer = NYsonPull::NDetail::make_writer<Writer>( + NYsonPull::NOutput::FromString(&result), + NYsonPull::EStreamType::Node); + + function(writer); + + return result; + } + + template <typename Writer> + TString to_yson_string(const NYsonPull::TScalar& value) { + return with_writer<Writer>([&](NYsonPull::TWriter& writer) { + writer.BeginStream().Scalar(value).EndStream(); + }); + } + + template <typename T> + TString to_yson_binary_string(T&& value) { + return to_yson_string<NYsonPull::NDetail::TBinaryWriterImpl>(std::forward<T>(value)); + } + + template <typename T> + TString to_yson_text_string(T&& value) { + return to_yson_string<NYsonPull::NDetail::TTextWriterImpl>(std::forward<T>(value)); + } + +} // anonymous namespace + +// =================== Text format ===================== + +Y_UNIT_TEST_SUITE(Writer) { + Y_UNIT_TEST(TextEntity) { + UNIT_ASSERT_VALUES_EQUAL( + "#", + to_yson_text_string(NYsonPull::TScalar{})); + } + + Y_UNIT_TEST(TextBoolean) { + UNIT_ASSERT_VALUES_EQUAL( + "%false", + to_yson_text_string(NYsonPull::TScalar{false})); + UNIT_ASSERT_VALUES_EQUAL( + "%true", + to_yson_text_string(NYsonPull::TScalar{true})); + } + + Y_UNIT_TEST(TextInt64) { + UNIT_ASSERT_VALUES_EQUAL( + "0", + to_yson_text_string(NYsonPull::TScalar{i64{0}})); + UNIT_ASSERT_VALUES_EQUAL( + "200", + to_yson_text_string(NYsonPull::TScalar{i64{200}})); + UNIT_ASSERT_VALUES_EQUAL( + "20000", + to_yson_text_string(NYsonPull::TScalar{i64{20000}})); + UNIT_ASSERT_VALUES_EQUAL( + "200000000", + to_yson_text_string(NYsonPull::TScalar{i64{200000000}})); + UNIT_ASSERT_VALUES_EQUAL( + "20000000000000000", + to_yson_text_string(NYsonPull::TScalar{i64{20000000000000000}})); + UNIT_ASSERT_VALUES_EQUAL( + "9223372036854775807", + to_yson_text_string(NYsonPull::TScalar{i64{INT64_MAX}})); + + UNIT_ASSERT_VALUES_EQUAL( + "-200", + to_yson_text_string(NYsonPull::TScalar{i64{-200}})); + UNIT_ASSERT_VALUES_EQUAL( + "-20000", + to_yson_text_string(NYsonPull::TScalar{i64{-20000}})); + UNIT_ASSERT_VALUES_EQUAL( + "-200000000", + to_yson_text_string(NYsonPull::TScalar{i64{-200000000}})); + UNIT_ASSERT_VALUES_EQUAL( + "-20000000000000000", + to_yson_text_string(NYsonPull::TScalar{i64{-20000000000000000}})); + UNIT_ASSERT_VALUES_EQUAL( + "-9223372036854775808", + to_yson_text_string(NYsonPull::TScalar{i64{INT64_MIN}})); + } + + Y_UNIT_TEST(TextUInt64) { + UNIT_ASSERT_VALUES_EQUAL( + "0u", + to_yson_text_string(NYsonPull::TScalar{ui64{0}})); + UNIT_ASSERT_VALUES_EQUAL( + "200u", + to_yson_text_string(NYsonPull::TScalar{ui64{200}})); + UNIT_ASSERT_VALUES_EQUAL( + "20000u", + to_yson_text_string(NYsonPull::TScalar{ui64{20000}})); + UNIT_ASSERT_VALUES_EQUAL( + "200000000u", + to_yson_text_string(NYsonPull::TScalar{ui64{200000000}})); + UNIT_ASSERT_VALUES_EQUAL( + "20000000000000000u", + to_yson_text_string(NYsonPull::TScalar{ui64{20000000000000000}})); + UNIT_ASSERT_VALUES_EQUAL( + "9223372036854775807u", + to_yson_text_string(NYsonPull::TScalar{ui64{INT64_MAX}})); + UNIT_ASSERT_VALUES_EQUAL( + "18446744073709551615u", + to_yson_text_string(NYsonPull::TScalar{ui64{UINT64_MAX}})); + } + + Y_UNIT_TEST(TextFloat64) { + UNIT_ASSERT_VALUES_EQUAL( + "%inf", + to_yson_text_string(NYsonPull::TScalar{std::numeric_limits<double>::infinity()})); + UNIT_ASSERT_VALUES_EQUAL( + "%-inf", + to_yson_text_string(NYsonPull::TScalar{-std::numeric_limits<double>::infinity()})); + UNIT_ASSERT_VALUES_EQUAL( + "%nan", + to_yson_text_string(NYsonPull::TScalar{std::numeric_limits<double>::quiet_NaN()})); + } + + Y_UNIT_TEST(TextString) { + UNIT_ASSERT_VALUES_EQUAL( + R"("")", + to_yson_text_string(NYsonPull::TScalar{""})); + UNIT_ASSERT_VALUES_EQUAL( + R"("hello")", + to_yson_text_string(NYsonPull::TScalar{"hello"})); + UNIT_ASSERT_VALUES_EQUAL( + R"("hello\nworld")", + to_yson_text_string(NYsonPull::TScalar{"hello\nworld"})); + } + + // =================== Binary format ===================== + + Y_UNIT_TEST(BinaryEntity) { + UNIT_ASSERT_VALUES_EQUAL( + "#", + to_yson_binary_string(NYsonPull::TScalar{})); + } + + Y_UNIT_TEST(BinaryBoolean) { + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x4"), + to_yson_binary_string(NYsonPull::TScalar{false})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x5"), + to_yson_binary_string(NYsonPull::TScalar{true})); + } + + Y_UNIT_TEST(BinaryInt64) { + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\0"sv), + to_yson_binary_string(NYsonPull::TScalar{i64{0}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\x90\x3"), + to_yson_binary_string(NYsonPull::TScalar{i64{200}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\xC0\xB8\x2"), + to_yson_binary_string(NYsonPull::TScalar{i64{20000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\x80\x88\xDE\xBE\x1"), + to_yson_binary_string(NYsonPull::TScalar{i64{200000000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\x80\x80\x90\xF8\x9B\xF9\x86G"), + to_yson_binary_string(NYsonPull::TScalar{i64{20000000000000000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1"), + to_yson_binary_string(NYsonPull::TScalar{i64{INT64_MAX}})); + + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\x8F\x3"), + to_yson_binary_string(NYsonPull::TScalar{i64{-200}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\xBF\xB8\x2"), + to_yson_binary_string(NYsonPull::TScalar{i64{-20000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\xFF\x87\xDE\xBE\x1"), + to_yson_binary_string(NYsonPull::TScalar{i64{-200000000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\xFF\xFF\x8F\xF8\x9B\xF9\x86G"), + to_yson_binary_string(NYsonPull::TScalar{i64{-20000000000000000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x2\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1"), + to_yson_binary_string(NYsonPull::TScalar{i64{INT64_MIN}})); + } + + Y_UNIT_TEST(BinaryUInt64) { + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\0"sv), + to_yson_binary_string(NYsonPull::TScalar{ui64{0}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\xC8\x1"), + to_yson_binary_string(NYsonPull::TScalar{ui64{200}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\xA0\x9C\x1"), + to_yson_binary_string(NYsonPull::TScalar{ui64{20000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\x80\x84\xAF_"), + to_yson_binary_string(NYsonPull::TScalar{ui64{200000000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\x80\x80\x88\xFC\xCD\xBC\xC3#"), + to_yson_binary_string(NYsonPull::TScalar{ui64{20000000000000000}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"), + to_yson_binary_string(NYsonPull::TScalar{ui64{INT64_MAX}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x6\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1"), + to_yson_binary_string(NYsonPull::TScalar{ui64{UINT64_MAX}})); + } + + Y_UNIT_TEST(BinaryFloat64) { + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\x7f"sv), + to_yson_binary_string(NYsonPull::TScalar{std::numeric_limits<double>::infinity()})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf0\xff"sv), + to_yson_binary_string(NYsonPull::TScalar{-std::numeric_limits<double>::infinity()})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x03\x00\x00\x00\x00\x00\x00\xf8\x7f"sv), + to_yson_binary_string(NYsonPull::TScalar{std::numeric_limits<double>::quiet_NaN()})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x03\x9a\x99\x99\x99\x99\x99\xf1\x3f"), + to_yson_binary_string(NYsonPull::TScalar{double{1.1}})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x03\x9a\x99\x99\x99\x99\x99\xf1\xbf"), + to_yson_binary_string(NYsonPull::TScalar{double{-1.1}})); + } + + Y_UNIT_TEST(BinaryString) { + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x1\0"sv), + to_yson_binary_string(NYsonPull::TScalar{""})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x1\nhello"), + to_yson_binary_string(NYsonPull::TScalar{"hello"})); + UNIT_ASSERT_VALUES_EQUAL( + TStringBuf("\x1\x16hello\nworld"), + to_yson_binary_string(NYsonPull::TScalar{"hello\nworld"})); + } + +} // Y_UNIT_TEST_SUITE(Writer) diff --git a/library/cpp/yson_pull/ut/ya.make b/library/cpp/yson_pull/ut/ya.make new file mode 100644 index 0000000000..a269dfd2ad --- /dev/null +++ b/library/cpp/yson_pull/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/yson_pull) + +OWNER(borman) + +SRCS( + cescape_ut.cpp + reader_ut.cpp + writer_ut.cpp + loop_ut.cpp +) + +END() diff --git a/library/cpp/yson_pull/writer.cpp b/library/cpp/yson_pull/writer.cpp new file mode 100644 index 0000000000..1df92bf40f --- /dev/null +++ b/library/cpp/yson_pull/writer.cpp @@ -0,0 +1,30 @@ +#include "writer.h" +#include <library/cpp/yson_pull/detail/writer.h> + +using namespace NYsonPull; + +TWriter NYsonPull::MakeBinaryWriter( + THolder<NOutput::IStream> stream, + EStreamType mode) { + return NYsonPull::NDetail::make_writer<NYsonPull::NDetail::TBinaryWriterImpl>( + std::move(stream), + mode); +} + +TWriter NYsonPull::MakeTextWriter( + THolder<NOutput::IStream> stream, + EStreamType mode) { + return NYsonPull::NDetail::make_writer<NYsonPull::NDetail::TTextWriterImpl>( + std::move(stream), + mode); +} + +TWriter NYsonPull::MakePrettyTextWriter( + THolder<NOutput::IStream> stream, + EStreamType mode, + size_t indent_size) { + return NYsonPull::NDetail::make_writer<NYsonPull::NDetail::TPrettyWriterImpl>( + std::move(stream), + mode, + indent_size); +} diff --git a/library/cpp/yson_pull/writer.h b/library/cpp/yson_pull/writer.h new file mode 100644 index 0000000000..dec63328be --- /dev/null +++ b/library/cpp/yson_pull/writer.h @@ -0,0 +1,126 @@ +#pragma once + +#include "consumer.h" +#include "output.h" +#include "scalar.h" +#include "stream_type.h" + +#include <memory> + +namespace NYsonPull { + //! \brief YSON writer facade class + //! + //! Owns a YSON consumer and a corresponding output stream. + //! Methods invoke corresponding \p NYsonPull::IConsumer methods and can be chained. + class TWriter { + THolder<NOutput::IStream> Stream_; + THolder<IConsumer> Impl_; + + public: + TWriter( + THolder<NOutput::IStream> stream, + THolder<IConsumer> impl) + : Stream_{std::move(stream)} + , Impl_{std::move(impl)} { + } + + //! \brief Get a reference to underlying consumer. + //! + //! Useful with \p NYsonPull::bridge + IConsumer& GetConsumer() { + return *Impl_; + } + + TWriter& BeginStream() { + Impl_->OnBeginStream(); + return *this; + } + TWriter& EndStream() { + Impl_->OnEndStream(); + return *this; + } + + TWriter& BeginList() { + Impl_->OnBeginList(); + return *this; + } + TWriter& EndList() { + Impl_->OnEndList(); + return *this; + } + + TWriter& BeginMap() { + Impl_->OnBeginMap(); + return *this; + } + TWriter& EndMap() { + Impl_->OnEndMap(); + return *this; + } + + TWriter& BeginAttributes() { + Impl_->OnBeginAttributes(); + return *this; + } + TWriter& EndAttributes() { + Impl_->OnEndAttributes(); + return *this; + } + + TWriter& Key(TStringBuf name) { + Impl_->OnKey(name); + return *this; + } + + TWriter& Entity() { + Impl_->OnEntity(); + return *this; + } + TWriter& Boolean(bool value) { + Impl_->OnScalarBoolean(value); + return *this; + } + TWriter& Int64(i64 value) { + Impl_->OnScalarInt64(value); + return *this; + } + TWriter& UInt64(ui64 value) { + Impl_->OnScalarUInt64(value); + return *this; + } + TWriter& Float64(double value) { + Impl_->OnScalarFloat64(value); + return *this; + } + TWriter& String(TStringBuf value) { + Impl_->OnScalarString(value); + return *this; + } + + TWriter& Scalar(const TScalar& value) { + Impl_->OnScalar(value); + return *this; + } + TWriter& Event(const TEvent& value) { + Impl_->OnEvent(value); + return *this; + } + }; + + //! \brief Construct a writer for binary YSON format. + TWriter MakeBinaryWriter( + THolder<NOutput::IStream> stream, + EStreamType mode); + + //! \brief Construct a writer for text YSON format. + TWriter MakeTextWriter( + THolder<NOutput::IStream> stream, + EStreamType mode); + + //! \brief Construct a writer for pretty text YSON format. + TWriter MakePrettyTextWriter( + THolder<NOutput::IStream> stream, + EStreamType mode, + size_t indent_size = 4); + +} diff --git a/library/cpp/yson_pull/ya.make b/library/cpp/yson_pull/ya.make new file mode 100644 index 0000000000..a373e0a6ba --- /dev/null +++ b/library/cpp/yson_pull/ya.make @@ -0,0 +1,21 @@ +LIBRARY(yson_pull) + +OWNER(borman) + +SRCS( + consumer.cpp + event.cpp + exceptions.cpp + input.cpp + output.cpp + read_ops.cpp + reader.cpp + scalar.cpp + writer.cpp +) + +GENERATE_ENUM_SERIALIZATION(event.h) + +GENERATE_ENUM_SERIALIZATION(scalar.h) + +END() diff --git a/library/cpp/yson_pull/yson.h b/library/cpp/yson_pull/yson.h new file mode 100644 index 0000000000..a77eaa5c94 --- /dev/null +++ b/library/cpp/yson_pull/yson.h @@ -0,0 +1,14 @@ +#pragma once + +#include "bridge.h" +#include "consumer.h" +#include "event.h" +#include "exceptions.h" +#include "input.h" +#include "output.h" +#include "position_info.h" +#include "range.h" +#include "reader.h" +#include "scalar.h" +#include "stream_type.h" +#include "writer.h" |