diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/yson_pull/detail | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/yson_pull/detail')
23 files changed, 2121 insertions, 2121 deletions
diff --git a/library/cpp/yson_pull/detail/byte_reader.h b/library/cpp/yson_pull/detail/byte_reader.h index 915277518a..7cea50d323 100644 --- a/library/cpp/yson_pull/detail/byte_reader.h +++ b/library/cpp/yson_pull/detail/byte_reader.h @@ -6,69 +6,69 @@ #include <library/cpp/yson_pull/input.h> -namespace NYsonPull { - namespace NDetail { - template <class StreamCounter> - class byte_reader { - NYsonPull::NInput::IStream& stream_; - StreamCounter stream_counter_; +namespace NYsonPull { + namespace NDetail { + template <class StreamCounter> + class byte_reader { + NYsonPull::NInput::IStream& stream_; + StreamCounter stream_counter_; - public: - byte_reader(NYsonPull::NInput::IStream& stream) - : stream_(stream) - { - } + public: + byte_reader(NYsonPull::NInput::IStream& stream) + : stream_(stream) + { + } - // const-ness added to prevent direct stream mutation - const NYsonPull::NInput::IStream& stream() { - return stream_; - } + // const-ness added to prevent direct stream mutation + const NYsonPull::NInput::IStream& stream() { + return stream_; + } - template <typename... Args> - ATTRIBUTE(noinline, cold) - void fail[[noreturn]](const char* msg, Args&&... args) { - NYsonPull::NDetail::fail( - stream_counter_.info(), - msg, - std::forward<Args>(args)...); - } + template <typename... Args> + ATTRIBUTE(noinline, cold) + void fail[[noreturn]](const char* msg, Args&&... args) { + NYsonPull::NDetail::fail( + stream_counter_.info(), + msg, + std::forward<Args>(args)...); + } - template <bool AllowFinish> - void fill_buffer() { - stream_.fill_buffer(); + template <bool AllowFinish> + void fill_buffer() { + stream_.fill_buffer(); - if (!AllowFinish) { - auto& buf = stream_.buffer(); + if (!AllowFinish) { + auto& buf = stream_.buffer(); if (Y_UNLIKELY(buf.is_empty() && stream_.at_end())) { - fail("Premature end of stream"); - } - } - } + fail("Premature end of stream"); + } + } + } - void fill_buffer() { - return fill_buffer<true>(); + void fill_buffer() { + return fill_buffer<true>(); } - template <bool AllowFinish> - ui8 get_byte() { - fill_buffer<AllowFinish>(); - auto& buf = stream_.buffer(); - return !buf.is_empty() - ? *buf.pos() - : ui8{'\0'}; - } + template <bool AllowFinish> + ui8 get_byte() { + fill_buffer<AllowFinish>(); + auto& buf = stream_.buffer(); + return !buf.is_empty() + ? *buf.pos() + : ui8{'\0'}; + } - ui8 get_byte() { - return get_byte<true>(); - } + ui8 get_byte() { + return get_byte<true>(); + } - void advance(size_t bytes) { - auto& buf = stream_.buffer(); - stream_counter_.update( - buf.pos(), - buf.pos() + bytes); - buf.advance(bytes); - } - }; + void advance(size_t bytes) { + auto& buf = stream_.buffer(); + stream_counter_.update( + buf.pos(), + buf.pos() + bytes); + buf.advance(bytes); + } + }; } -} +} diff --git a/library/cpp/yson_pull/detail/byte_writer.h b/library/cpp/yson_pull/detail/byte_writer.h index 9a755912d7..dc1d4b4b96 100644 --- a/library/cpp/yson_pull/detail/byte_writer.h +++ b/library/cpp/yson_pull/detail/byte_writer.h @@ -9,69 +9,69 @@ #include <cstddef> #include <cstring> -namespace NYsonPull { - namespace NDetail { - template <class StreamCounter> - class byte_writer { - NYsonPull::NOutput::IStream& stream_; - StreamCounter stream_counter_; +namespace NYsonPull { + namespace NDetail { + template <class StreamCounter> + class byte_writer { + NYsonPull::NOutput::IStream& stream_; + StreamCounter stream_counter_; - public: - byte_writer(NYsonPull::NOutput::IStream& stream) - : stream_(stream) - { - } + public: + byte_writer(NYsonPull::NOutput::IStream& stream) + : stream_(stream) + { + } - // const-ness added to prevent direct stream mutation - const NYsonPull::NOutput::IStream& stream() { - return stream_; - } - const StreamCounter& counter() { - return stream_counter_; - } + // const-ness added to prevent direct stream mutation + const NYsonPull::NOutput::IStream& stream() { + return stream_; + } + const StreamCounter& counter() { + return stream_counter_; + } - void flush_buffer() { - stream_.flush_buffer(); - } + void flush_buffer() { + stream_.flush_buffer(); + } - void advance(size_t bytes) { - auto& buf = stream_.buffer(); - stream_counter_.update( - buf.pos(), - buf.pos() + bytes); - buf.advance(bytes); - } + void advance(size_t bytes) { + auto& buf = stream_.buffer(); + stream_counter_.update( + buf.pos(), + buf.pos() + bytes); + buf.advance(bytes); + } - void write(ui8 c) { - auto& buf = stream_.buffer(); + void write(ui8 c) { + auto& buf = stream_.buffer(); if (Y_LIKELY(!buf.is_full())) { - *buf.pos() = c; - advance(1); - } else { - auto ptr = reinterpret_cast<char*>(&c); - stream_counter_.update(&c, &c + 1); - stream_.flush_buffer({ptr, 1}); - } - } + *buf.pos() = c; + advance(1); + } else { + auto ptr = reinterpret_cast<char*>(&c); + stream_counter_.update(&c, &c + 1); + stream_.flush_buffer({ptr, 1}); + } + } - void write(const ui8* data, size_t size) { - auto& buf = stream_.buffer(); - auto free_buf = buf.available(); + void write(const ui8* data, size_t size) { + auto& buf = stream_.buffer(); + auto free_buf = buf.available(); if (Y_LIKELY(size < free_buf)) { - ::memcpy(buf.pos(), data, size); - advance(size); - } else { - if (!buf.is_full()) { - ::memcpy(buf.pos(), data, free_buf); - advance(free_buf); - data += free_buf; - size -= free_buf; - } - stream_counter_.update(data, data + size); - stream_.flush_buffer({reinterpret_cast<const char*>(data), - size}); - } + ::memcpy(buf.pos(), data, size); + advance(size); + } else { + if (!buf.is_full()) { + ::memcpy(buf.pos(), data, free_buf); + advance(free_buf); + data += free_buf; + size -= free_buf; + } + stream_counter_.update(data, data + size); + stream_.flush_buffer({reinterpret_cast<const char*>(data), + size}); + } } - }; + }; } -} +} diff --git a/library/cpp/yson_pull/detail/cescape.h b/library/cpp/yson_pull/detail/cescape.h index d12936e63c..1ea150e69a 100644 --- a/library/cpp/yson_pull/detail/cescape.h +++ b/library/cpp/yson_pull/detail/cescape.h @@ -35,109 +35,109 @@ * */ -namespace NYsonPull { - namespace NDetail { - namespace NCEscape { +namespace NYsonPull { + namespace NDetail { + namespace NCEscape { inline void encode(TString& dest, TStringBuf data) { - NImpl::escape_impl( - reinterpret_cast<const ui8*>(data.data()), - data.size(), - [&](const ui8* str, size_t size) { - dest.append( - reinterpret_cast<const char*>(str), - size); - }); - } + NImpl::escape_impl( + reinterpret_cast<const ui8*>(data.data()), + data.size(), + [&](const ui8* str, size_t size) { + dest.append( + reinterpret_cast<const char*>(str), + size); + }); + } - // dest must have at least 4*data.size() bytes available - inline size_t encode(ui8* dest, TStringBuf data) { - auto* dest_begin = dest; - NImpl::escape_impl( - reinterpret_cast<const ui8*>(data.data()), - data.size(), - [&](const ui8* str, size_t size) { - ::memcpy(dest, str, size); - dest += size; - }); - return dest - dest_begin; - } + // dest must have at least 4*data.size() bytes available + inline size_t encode(ui8* dest, TStringBuf data) { + auto* dest_begin = dest; + NImpl::escape_impl( + reinterpret_cast<const ui8*>(data.data()), + data.size(), + [&](const ui8* str, size_t size) { + ::memcpy(dest, str, size); + dest += size; + }); + return dest - dest_begin; + } - template <typename U> - void encode(byte_writer<U>& dest, TStringBuf data) { - auto& buffer = dest.stream().buffer(); + template <typename U> + void encode(byte_writer<U>& dest, TStringBuf data) { + auto& buffer = dest.stream().buffer(); if (Y_LIKELY(buffer.available() >= data.size() * 4)) { - auto size = encode(buffer.pos(), data); - dest.advance(size); - } else { - NImpl::escape_impl( - reinterpret_cast<const ui8*>(data.data()), - data.size(), - [&](const ui8* str, size_t size) { - dest.write(str, size); - }); - } - } + auto size = encode(buffer.pos(), data); + dest.advance(size); + } else { + NImpl::escape_impl( + reinterpret_cast<const ui8*>(data.data()), + data.size(), + [&](const ui8* str, size_t size) { + dest.write(str, size); + }); + } + } inline TString encode(TStringBuf data) { TString result; - result.reserve(data.size()); - encode(result, data); - return result; + result.reserve(data.size()); + encode(result, data); + return result; } inline void decode(TString& dest, TStringBuf data) { - NImpl::unescape_impl( - reinterpret_cast<const ui8*>(data.begin()), - reinterpret_cast<const ui8*>(data.end()), - [&](ui8 c) { - dest += c; - }, - [&](const ui8* p, size_t len) { - dest.append(reinterpret_cast<const char*>(p), len); - }); - } + NImpl::unescape_impl( + reinterpret_cast<const ui8*>(data.begin()), + reinterpret_cast<const ui8*>(data.end()), + [&](ui8 c) { + dest += c; + }, + [&](const ui8* p, size_t len) { + dest.append(reinterpret_cast<const char*>(p), len); + }); + } inline void decode_inplace(TVector<ui8>& data) { - auto* out = static_cast<ui8*>( - ::memchr(data.data(), '\\', data.size())); - if (out == nullptr) { - return; - } - NImpl::unescape_impl( - out, - data.data() + data.size(), - [&](ui8 c) { - *out++ = c; - }, - [&](const ui8* p, size_t len) { - ::memmove(out, p, len); - out += len; - }); - data.resize(out - &data[0]); - } + auto* out = static_cast<ui8*>( + ::memchr(data.data(), '\\', data.size())); + if (out == nullptr) { + return; + } + NImpl::unescape_impl( + out, + data.data() + data.size(), + [&](ui8 c) { + *out++ = c; + }, + [&](const ui8* p, size_t len) { + ::memmove(out, p, len); + out += len; + }); + data.resize(out - &data[0]); + } inline TString decode(TStringBuf data) { TString result; - result.reserve(data.size()); - decode(result, data); - return result; - } + result.reserve(data.size()); + decode(result, data); + return result; + } - ATTRIBUTE(noinline, cold) + ATTRIBUTE(noinline, cold) inline TString quote(TStringBuf str) { TString result; - result.reserve(str.size() + 16); - result += '"'; - encode(result, str); - result += '"'; - return result; - } + result.reserve(str.size() + 16); + result += '"'; + encode(result, str); + result += '"'; + return result; + } - ATTRIBUTE(noinline, cold) + ATTRIBUTE(noinline, cold) inline TString quote(ui8 ch) { - char c = ch; - return quote(TStringBuf(&c, 1)); - } - } - } // namespace NDetail -} + char c = ch; + return quote(TStringBuf(&c, 1)); + } + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/cescape_decode.h b/library/cpp/yson_pull/detail/cescape_decode.h index 8e10cf3892..2ee5dd9500 100644 --- a/library/cpp/yson_pull/detail/cescape_decode.h +++ b/library/cpp/yson_pull/detail/cescape_decode.h @@ -5,150 +5,150 @@ #include <algorithm> #include <cstring> -namespace NYsonPull { - namespace NDetail { - namespace NCEscape { - namespace NImpl { - inline ui8 as_digit(ui8 c) { - return c - ui8{'0'}; - } +namespace NYsonPull { + namespace NDetail { + namespace NCEscape { + namespace NImpl { + inline ui8 as_digit(ui8 c) { + return c - ui8{'0'}; + } - inline ui8 as_hexdigit(ui8 c) { - static constexpr ui8 hex_decode_map[256] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, - 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255}; + inline ui8 as_hexdigit(ui8 c) { + static constexpr ui8 hex_decode_map[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, + 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255}; - return hex_decode_map[c]; - } + return hex_decode_map[c]; + } - inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) { - auto digit = ui8{0}; - while (n-- && (digit = as_digit(*p)) < 8) { - result = result * 8 + digit; - ++p; - } - return p; - } + inline const ui8* read_oct(ui8& result, const ui8* p, ui8 n) { + auto digit = ui8{0}; + while (n-- && (digit = as_digit(*p)) < 8) { + result = result * 8 + digit; + ++p; + } + return p; + } - inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) { - auto digit = ui8{0}; - while (n-- && (digit = as_hexdigit(*p)) < 16) { - result = result * 16 + digit; - ++p; - } - return p; - } + inline const ui8* read_hex(ui8& result, const ui8* p, ui8 n) { + auto digit = ui8{0}; + while (n-- && (digit = as_hexdigit(*p)) < 16) { + result = result * 16 + digit; + ++p; + } + return p; + } - inline const ui8* unescape_char_and_advance( - ui8& result, - const ui8* p, - const ui8* end) { - switch (*p) { - default: - result = *p; - ++p; - break; - case 'b': - result = '\b'; - ++p; - break; - case 'f': - result = '\f'; - ++p; - break; - case 'n': - result = '\n'; - ++p; - break; - case 'r': - result = '\r'; - ++p; - break; - case 't': - result = '\t'; - ++p; - break; + inline const ui8* unescape_char_and_advance( + ui8& result, + const ui8* p, + const ui8* end) { + switch (*p) { + default: + result = *p; + ++p; + break; + case 'b': + result = '\b'; + ++p; + break; + case 'f': + result = '\f'; + ++p; + break; + case 'n': + result = '\n'; + ++p; + break; + case 'r': + result = '\r'; + ++p; + break; + case 't': + result = '\t'; + ++p; + break; - case 'x': { - ++p; - result = 0; - auto* next = read_hex( - result, - p, std::min<ptrdiff_t>(2, end - p)); - if (next > p) { - p = next; - } else { - result = 'x'; - } - } break; + case 'x': { + ++p; + result = 0; + auto* next = read_hex( + result, + p, std::min<ptrdiff_t>(2, end - p)); + if (next > p) { + p = next; + } else { + result = 'x'; + } + } break; - case '0': - case '1': - case '2': - case '3': - result = 0; - p = read_oct( - result, - p, std::min<ptrdiff_t>(3, end - p)); - break; + case '0': + case '1': + case '2': + case '3': + result = 0; + p = read_oct( + result, + p, std::min<ptrdiff_t>(3, end - p)); + break; - case '4': - case '5': - case '6': - case '7': - result = 0; - p = read_oct( - result, - p, std::min<ptrdiff_t>(2, end - p)); - break; - } - return p; - } + case '4': + case '5': + case '6': + case '7': + result = 0; + p = read_oct( + result, + p, std::min<ptrdiff_t>(2, end - p)); + break; + } + return p; + } - template <typename T, typename U> - inline void unescape_impl( - const ui8* p, - const ui8* end, - T&& consume_one, - U&& consume_span) { - while (p < end) { - auto* escaped = static_cast<const ui8*>( - ::memchr(p, '\\', end - p)); - if (escaped == nullptr) { - consume_span(p, end - p); - return; - } else { - consume_span(p, escaped - p); - auto c = ui8{'\\'}; - p = escaped + 1; - if (p < end) { - p = unescape_char_and_advance(c, p, end); - } - consume_one(c); - } - } - } + template <typename T, typename U> + inline void unescape_impl( + const ui8* p, + const ui8* end, + T&& consume_one, + U&& consume_span) { + while (p < end) { + auto* escaped = static_cast<const ui8*>( + ::memchr(p, '\\', end - p)); + if (escaped == nullptr) { + consume_span(p, end - p); + return; + } else { + consume_span(p, escaped - p); + auto c = ui8{'\\'}; + p = escaped + 1; + if (p < end) { + p = unescape_char_and_advance(c, p, end); + } + consume_one(c); + } + } + } } - } // namespace NCEscape - } // namespace NDetail -} + } // namespace NCEscape + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/cescape_encode.h b/library/cpp/yson_pull/detail/cescape_encode.h index d1dae0f96e..bf5765f1d9 100644 --- a/library/cpp/yson_pull/detail/cescape_encode.h +++ b/library/cpp/yson_pull/detail/cescape_encode.h @@ -6,109 +6,109 @@ // Turns UTF-8 strings into unreadable garbage for no known reason //#define CESCAPE_STRICT_ASCII -namespace NYsonPull { - namespace NDetail { - namespace NCEscape { - namespace NImpl { - inline ui8 hex_digit(ui8 value) { - constexpr ui8 hex_digits[] = "0123456789ABCDEF"; - return hex_digits[value]; - } +namespace NYsonPull { + namespace NDetail { + namespace NCEscape { + namespace NImpl { + inline ui8 hex_digit(ui8 value) { + constexpr ui8 hex_digits[] = "0123456789ABCDEF"; + return hex_digits[value]; + } - inline ui8 oct_digit(ui8 value) { - return '0' + value; - } + inline ui8 oct_digit(ui8 value) { + return '0' + value; + } - inline bool is_printable(ui8 c) { + inline bool is_printable(ui8 c) { #ifdef CESCAPE_STRICT_ASCII - return c >= 32 && c <= 126; + return c >= 32 && c <= 126; #else - return c >= 32; + return c >= 32; #endif - } + } - inline bool is_hex_digit(ui8 c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); - } + inline bool is_hex_digit(ui8 c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } - inline bool is_oct_digit(ui8 c) { - return c >= '0' && c <= '7'; - } + inline bool is_oct_digit(ui8 c) { + return c >= '0' && c <= '7'; + } - constexpr size_t ESCAPE_C_BUFFER_SIZE = 4; + constexpr size_t ESCAPE_C_BUFFER_SIZE = 4; - inline size_t escape_char( - ui8 c, - ui8 next, - ui8 r[ESCAPE_C_BUFFER_SIZE]) { - // (1) Printable characters go as-is, except backslash and double quote. - // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). - // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. - if (c == '\"') { - r[0] = '\\'; - r[1] = '\"'; - return 2; - } else if (c == '\\') { - r[0] = '\\'; - r[1] = '\\'; - return 2; - } else if (is_printable(c)) { - r[0] = c; - return 1; - } else if (c == '\r') { - r[0] = '\\'; - r[1] = 'r'; - return 2; - } else if (c == '\n') { - r[0] = '\\'; - r[1] = 'n'; - return 2; - } else if (c == '\t') { - r[0] = '\\'; - r[1] = 't'; - return 2; - } else if (c < 8 && !is_oct_digit(next)) { - r[0] = '\\'; - r[1] = oct_digit(c); - return 2; - } else if (!is_hex_digit(next)) { - r[0] = '\\'; - r[1] = 'x'; - r[2] = hex_digit((c & 0xF0) >> 4); - r[3] = hex_digit((c & 0x0F) >> 0); - return 4; - } else { - r[0] = '\\'; - r[1] = oct_digit((c & 0700) >> 6); - r[2] = oct_digit((c & 0070) >> 3); - r[3] = oct_digit((c & 0007) >> 0); - return 4; - } - } + inline size_t escape_char( + ui8 c, + ui8 next, + ui8 r[ESCAPE_C_BUFFER_SIZE]) { + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (is_printable(c)) { + r[0] = c; + return 1; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (c < 8 && !is_oct_digit(next)) { + r[0] = '\\'; + r[1] = oct_digit(c); + return 2; + } else if (!is_hex_digit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = hex_digit((c & 0xF0) >> 4); + r[3] = hex_digit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = oct_digit((c & 0700) >> 6); + r[2] = oct_digit((c & 0070) >> 3); + r[3] = oct_digit((c & 0007) >> 0); + return 4; + } + } - template <typename T> - inline void escape_impl(const ui8* str, size_t len, T&& consume) { - ui8 buffer[ESCAPE_C_BUFFER_SIZE]; + template <typename T> + inline void escape_impl(const ui8* str, size_t len, T&& consume) { + ui8 buffer[ESCAPE_C_BUFFER_SIZE]; - size_t i, j; - for (i = 0, j = 0; i < len; ++i) { - auto next_char = i + 1 < len ? str[i + 1] : 0; - size_t rlen = escape_char(str[i], next_char, buffer); + size_t i, j; + for (i = 0, j = 0; i < len; ++i) { + auto next_char = i + 1 < len ? str[i + 1] : 0; + size_t rlen = escape_char(str[i], next_char, buffer); - if (rlen > 1) { - consume(str + j, i - j); - j = i + 1; - consume(buffer, rlen); - } - } - - if (j > 0) { - consume(str + j, len - j); - } else { - consume(str, len); - } - } - } - } // namespace NCEscape - } // namespace NDetail -} + if (rlen > 1) { + consume(str + j, i - j); + j = i + 1; + consume(buffer, rlen); + } + } + + if (j > 0) { + consume(str + j, len - j); + } else { + consume(str, len); + } + } + } + } // namespace NCEscape + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/fail.h b/library/cpp/yson_pull/detail/fail.h index ad869af510..6937612d0b 100644 --- a/library/cpp/yson_pull/detail/fail.h +++ b/library/cpp/yson_pull/detail/fail.h @@ -6,15 +6,15 @@ #include <library/cpp/yson_pull/exceptions.h> #include <library/cpp/yson_pull/position_info.h> -namespace NYsonPull { - namespace NDetail { - template <typename... Args> - ATTRIBUTE(noreturn, noinline, cold) - void fail( - const TPositionInfo& info, - Args&&... args) { - auto formatted_message = format_string(std::forward<Args>(args)...); - throw NException::TBadInput(formatted_message, info); - } - } -} +namespace NYsonPull { + namespace NDetail { + template <typename... Args> + ATTRIBUTE(noreturn, noinline, cold) + void fail( + const TPositionInfo& info, + Args&&... args) { + auto formatted_message = format_string(std::forward<Args>(args)...); + throw NException::TBadInput(formatted_message, info); + } + } +} diff --git a/library/cpp/yson_pull/detail/format_string.h b/library/cpp/yson_pull/detail/format_string.h index 437dee23e4..683fd1bf36 100644 --- a/library/cpp/yson_pull/detail/format_string.h +++ b/library/cpp/yson_pull/detail/format_string.h @@ -4,23 +4,23 @@ #include <util/generic/string.h> #include <util/string/builder.h> -namespace NYsonPull { - namespace NDetail { - namespace NImpl { - inline void apply_args(TStringBuilder&) { - } +namespace NYsonPull { + namespace NDetail { + namespace NImpl { + inline void apply_args(TStringBuilder&) { + } - template <typename T, typename... Args> - inline void apply_args(TStringBuilder& builder, T&& arg, Args&&... args) { - apply_args(builder << arg, std::forward<Args>(args)...); - } - } + template <typename T, typename... Args> + inline void apply_args(TStringBuilder& builder, T&& arg, Args&&... args) { + apply_args(builder << arg, std::forward<Args>(args)...); + } + } - template <typename... Args> + template <typename... Args> TString format_string(Args&&... args) { - TStringBuilder builder; - NImpl::apply_args(builder, std::forward<Args>(args)...); + TStringBuilder builder; + NImpl::apply_args(builder, std::forward<Args>(args)...); return TString(std::move(builder)); - } + } } -} +} diff --git a/library/cpp/yson_pull/detail/input/buffered.h b/library/cpp/yson_pull/detail/input/buffered.h index a0e53cec04..9b1482577f 100644 --- a/library/cpp/yson_pull/detail/input/buffered.h +++ b/library/cpp/yson_pull/detail/input/buffered.h @@ -8,28 +8,28 @@ #include <cstdio> #include <memory> -namespace NYsonPull { - namespace NDetail { - namespace NInput { - class TBuffered: public NYsonPull::NInput::IStream { - TArrayHolder<ui8> buffer_; - size_t size_; +namespace NYsonPull { + namespace NDetail { + namespace NInput { + class TBuffered: public NYsonPull::NInput::IStream { + TArrayHolder<ui8> buffer_; + size_t size_; - public: - explicit TBuffered(size_t buffer_size) - : buffer_{new ui8[buffer_size]} - , size_{buffer_size} { - } + public: + explicit TBuffered(size_t buffer_size) + : buffer_{new ui8[buffer_size]} + , size_{buffer_size} { + } - protected: - ui8* buffer_data() const { - return buffer_.Get(); - } + protected: + ui8* buffer_data() const { + return buffer_.Get(); + } - size_t buffer_size() const { - return size_; - } - }; - } - } // namespace NDetail -} + size_t buffer_size() const { + return size_; + } + }; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/input/stdio_file.h b/library/cpp/yson_pull/detail/input/stdio_file.h index c27b736aa3..c412b7e59b 100644 --- a/library/cpp/yson_pull/detail/input/stdio_file.h +++ b/library/cpp/yson_pull/detail/input/stdio_file.h @@ -10,33 +10,33 @@ #include <cstdio> #include <memory> -namespace NYsonPull { - namespace NDetail { - namespace NInput { - class TStdioFile: public TBuffered { - FILE* file_; +namespace NYsonPull { + namespace NDetail { + namespace NInput { + class TStdioFile: public TBuffered { + FILE* file_; - public: - TStdioFile(FILE* file, size_t buffer_size) - : TBuffered(buffer_size) - , file_{file} { - } + public: + TStdioFile(FILE* file, size_t buffer_size) + : TBuffered(buffer_size) + , file_{file} { + } - protected: - result do_fill_buffer() override { - auto nread = ::fread(buffer_data(), 1, buffer_size(), file_); + protected: + result do_fill_buffer() override { + auto nread = ::fread(buffer_data(), 1, buffer_size(), file_); if (Y_UNLIKELY(nread == 0)) { if (ferror(file_)) { - throw NException::TSystemError(); - } + throw NException::TSystemError(); + } if (feof(file_)) { - return result::at_end; - } - } - buffer().reset(buffer_data(), buffer_data() + nread); - return result::have_more_data; - } - }; + return result::at_end; + } + } + buffer().reset(buffer_data(), buffer_data() + nread); + return result::have_more_data; + } + }; } - } // namespace NDetail -} + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/input/stream.h b/library/cpp/yson_pull/detail/input/stream.h index d93ac8eaf8..791cd5a3f5 100644 --- a/library/cpp/yson_pull/detail/input/stream.h +++ b/library/cpp/yson_pull/detail/input/stream.h @@ -40,12 +40,12 @@ namespace NYsonPull { } }; - template <typename TBuffered> + template <typename TBuffered> class TOwned: public TStreamBase { TBuffered Input; public: - template <typename... Args> + template <typename... Args> explicit TOwned(Args&&... args) : Input(std::forward<Args>(args)...) { @@ -65,5 +65,5 @@ namespace NYsonPull { } }; } - } // namespace NDetail -} + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/lexer_base.h b/library/cpp/yson_pull/detail/lexer_base.h index 54454f8c6f..572bdb3d18 100644 --- a/library/cpp/yson_pull/detail/lexer_base.h +++ b/library/cpp/yson_pull/detail/lexer_base.h @@ -12,209 +12,209 @@ #include <util/generic/vector.h> #include <util/string/cast.h> -namespace NYsonPull { - namespace NDetail { - template <bool EnableLinePositionInfo> - class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> { - using Base = byte_reader< - stream_counter<EnableLinePositionInfo>>; +namespace NYsonPull { + namespace NDetail { + template <bool EnableLinePositionInfo> + class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> { + using Base = byte_reader< + stream_counter<EnableLinePositionInfo>>; TVector<ui8> token_buffer_; - TMaybe<size_t> memory_limit_; - - public: - lexer_base( - NYsonPull::NInput::IStream& buffer, - TMaybe<size_t> memory_limit) - : Base(buffer) - , memory_limit_{memory_limit} { - } - - ATTRIBUTE(noinline, hot) - ui8 skip_space_and_get_byte() { - auto& buf = Base::stream().buffer(); + TMaybe<size_t> memory_limit_; + + public: + lexer_base( + NYsonPull::NInput::IStream& buffer, + TMaybe<size_t> memory_limit) + : Base(buffer) + , memory_limit_{memory_limit} { + } + + ATTRIBUTE(noinline, hot) + ui8 skip_space_and_get_byte() { + auto& buf = Base::stream().buffer(); if (Y_LIKELY(!buf.is_empty())) { - auto ch = *buf.pos(); + auto ch = *buf.pos(); if (Y_LIKELY(!is_space(ch))) { - return ch; - } - } - return skip_space_and_get_byte_fallback(); - } - - ATTRIBUTE(hot) - ui8 get_byte() { - auto& buf = Base::stream().buffer(); + return ch; + } + } + return skip_space_and_get_byte_fallback(); + } + + ATTRIBUTE(hot) + ui8 get_byte() { + auto& buf = Base::stream().buffer(); if (Y_LIKELY(!buf.is_empty())) { - return *buf.pos(); - } - return Base::get_byte(); + return *buf.pos(); + } + return Base::get_byte(); } - number read_numeric() { - token_buffer_.clear(); - auto type = number_type::int64; - while (true) { + number read_numeric() { + token_buffer_.clear(); + auto type = number_type::int64; + while (true) { auto ch = this->Base::template get_byte<true>(); - if (isdigit(ch) || ch == '+' || ch == '-') { - token_buffer_.push_back(ch); - } else if (ch == '.' || ch == 'e' || ch == 'E') { - token_buffer_.push_back(ch); - type = number_type::float64; - } else if (ch == 'u') { - token_buffer_.push_back(ch); - type = number_type::uint64; + if (isdigit(ch) || ch == '+' || ch == '-') { + token_buffer_.push_back(ch); + } else if (ch == '.' || ch == 'e' || ch == 'E') { + token_buffer_.push_back(ch); + type = number_type::float64; + } else if (ch == 'u') { + token_buffer_.push_back(ch); + type = number_type::uint64; } else if (Y_UNLIKELY(isalpha(ch))) { - COLD_BLOCK_BYVALUE - Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal"); - COLD_BLOCK_END - } else { - break; - } - check_memory_limit(); - Base::advance(1); - } - - auto str = token_buffer(); - try { - switch (type) { - case number_type::float64: - return FromString<double>(str); - case number_type::int64: - return FromString<i64>(str); - case number_type::uint64: - str.Chop(1); // 'u' suffix - return FromString<ui64>(str); - } + COLD_BLOCK_BYVALUE + Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal"); + COLD_BLOCK_END + } else { + break; + } + check_memory_limit(); + Base::advance(1); + } + + auto str = token_buffer(); + try { + switch (type) { + case number_type::float64: + return FromString<double>(str); + case number_type::int64: + return FromString<i64>(str); + case number_type::uint64: + str.Chop(1); // 'u' suffix + return FromString<ui64>(str); + } Y_UNREACHABLE(); - } catch (const std::exception& err) { - Base::fail(err.what()); - } + } catch (const std::exception& err) { + Base::fail(err.what()); + } } - TStringBuf read_quoted_string() { - auto count_trailing_slashes = [](ui8* begin, ui8* end) { - auto count = size_t{0}; - if (begin < end) { - for (auto p = end - 1; p >= begin && *p == '\\'; --p) { - ++count; - } - } - return count; - }; - - token_buffer_.clear(); - auto& buf = Base::stream().buffer(); - while (true) { + TStringBuf read_quoted_string() { + auto count_trailing_slashes = [](ui8* begin, ui8* end) { + auto count = size_t{0}; + if (begin < end) { + for (auto p = end - 1; p >= begin && *p == '\\'; --p) { + ++count; + } + } + return count; + }; + + token_buffer_.clear(); + auto& buf = Base::stream().buffer(); + while (true) { this->Base::template fill_buffer<false>(); - auto* quote = reinterpret_cast<const ui8*>( - ::memchr(buf.pos(), '"', buf.available())); - if (quote == nullptr) { - token_buffer_.insert( - token_buffer_.end(), - buf.pos(), - buf.end()); - Base::advance(buf.available()); - continue; - } - - token_buffer_.insert( - token_buffer_.end(), - buf.pos(), - quote); - Base::advance(quote - buf.pos() + 1); // +1 for the quote itself - - // We must count the number of '\' at the end of StringValue - // to check if it's not \" - int slash_count = count_trailing_slashes( - token_buffer_.data(), - token_buffer_.data() + token_buffer_.size()); - if (slash_count % 2 == 0) { - break; - } else { - token_buffer_.push_back('"'); - } - check_memory_limit(); + auto* quote = reinterpret_cast<const ui8*>( + ::memchr(buf.pos(), '"', buf.available())); + if (quote == nullptr) { + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + buf.end()); + Base::advance(buf.available()); + continue; + } + + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + quote); + Base::advance(quote - buf.pos() + 1); // +1 for the quote itself + + // We must count the number of '\' at the end of StringValue + // to check if it's not \" + int slash_count = count_trailing_slashes( + token_buffer_.data(), + token_buffer_.data() + token_buffer_.size()); + if (slash_count % 2 == 0) { + break; + } else { + token_buffer_.push_back('"'); + } + check_memory_limit(); } - - NCEscape::decode_inplace(token_buffer_); - return token_buffer(); + + NCEscape::decode_inplace(token_buffer_); + return token_buffer(); } - TStringBuf read_unquoted_string() { - token_buffer_.clear(); - while (true) { + TStringBuf read_unquoted_string() { + token_buffer_.clear(); + while (true) { auto ch = this->Base::template get_byte<true>(); - if (isalpha(ch) || isdigit(ch) || - ch == '_' || ch == '-' || ch == '%' || ch == '.') { - token_buffer_.push_back(ch); - } else { - break; - } - check_memory_limit(); - Base::advance(1); - } - return token_buffer(); + if (isalpha(ch) || isdigit(ch) || + ch == '_' || ch == '-' || ch == '%' || ch == '.') { + token_buffer_.push_back(ch); + } else { + break; + } + check_memory_limit(); + Base::advance(1); + } + return token_buffer(); } - ATTRIBUTE(noinline, hot) - TStringBuf read_binary_string() { - auto slength = NVarInt::read<i32>(*this); + ATTRIBUTE(noinline, hot) + TStringBuf read_binary_string() { + auto slength = NVarInt::read<i32>(*this); if (Y_UNLIKELY(slength < 0)) { - COLD_BLOCK_BYVALUE - Base::fail("Negative binary string literal length ", slength); - COLD_BLOCK_END - } - auto length = static_cast<ui32>(slength); + COLD_BLOCK_BYVALUE + Base::fail("Negative binary string literal length ", slength); + COLD_BLOCK_END + } + auto length = static_cast<ui32>(slength); - auto& buf = Base::stream().buffer(); + auto& buf = Base::stream().buffer(); if (Y_LIKELY(buf.available() >= length)) { - auto result = TStringBuf{ - reinterpret_cast<const char*>(buf.pos()), - length}; - Base::advance(length); - return result; - } else { // reading in Buffer - return read_binary_string_fallback(length); - } + auto result = TStringBuf{ + reinterpret_cast<const char*>(buf.pos()), + length}; + Base::advance(length); + return result; + } else { // reading in Buffer + return read_binary_string_fallback(length); + } } - ATTRIBUTE(noinline) - TStringBuf read_binary_string_fallback(size_t length) { - auto& buf = Base::stream().buffer(); - auto needToRead = length; - token_buffer_.clear(); - while (needToRead) { + ATTRIBUTE(noinline) + TStringBuf read_binary_string_fallback(size_t length) { + auto& buf = Base::stream().buffer(); + auto needToRead = length; + token_buffer_.clear(); + while (needToRead) { this->Base::template fill_buffer<false>(); - auto chunk_size = std::min(needToRead, buf.available()); - - token_buffer_.insert( - token_buffer_.end(), - buf.pos(), - buf.pos() + chunk_size); - check_memory_limit(); - needToRead -= chunk_size; - Base::advance(chunk_size); - } - return token_buffer(); + auto chunk_size = std::min(needToRead, buf.available()); + + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + buf.pos() + chunk_size); + check_memory_limit(); + needToRead -= chunk_size; + Base::advance(chunk_size); + } + return token_buffer(); } percent_scalar read_percent_scalar() { auto throw_incorrect_percent_scalar = [&]() { Base::fail("Incorrect %-literal prefix ", NCEscape::quote(token_buffer())); - }; + }; auto assert_literal = [&](TStringBuf literal) -> void { for (size_t i = 2; i < literal.size(); ++i) { token_buffer_.push_back(this->Base::template get_byte<false>()); - Base::advance(1); + Base::advance(1); if (Y_UNLIKELY(token_buffer_.back() != literal[i])) { throw_incorrect_percent_scalar(); - } - } - }; + } + } + }; - token_buffer_.clear(); + token_buffer_.clear(); token_buffer_.push_back(this->Base::template get_byte<false>()); Base::advance(1); @@ -237,107 +237,107 @@ namespace NYsonPull { default: throw_incorrect_percent_scalar(); } - + Y_UNREACHABLE(); } - i64 read_binary_int64() { - return NVarInt::read<i64>(*this); - } - - ui64 read_binary_uint64() { - return NVarInt::read<ui64>(*this); - } - - double read_binary_double() { - union { - double as_double; - ui8 as_bytes[sizeof(double)]; - } data; - static_assert(sizeof(data) == sizeof(double), "bad union size"); - - auto needToRead = sizeof(double); - - auto& buf = Base::stream().buffer(); - while (needToRead != 0) { - Base::fill_buffer(); - - auto chunk_size = std::min(needToRead, buf.available()); - if (chunk_size == 0) { - Base::fail("Error parsing binary double literal"); - } - std::copy( - buf.pos(), - buf.pos() + chunk_size, - data.as_bytes + (sizeof(double) - needToRead)); - needToRead -= chunk_size; - Base::advance(chunk_size); - } - return data.as_double; + i64 read_binary_int64() { + return NVarInt::read<i64>(*this); + } + + ui64 read_binary_uint64() { + return NVarInt::read<ui64>(*this); + } + + double read_binary_double() { + union { + double as_double; + ui8 as_bytes[sizeof(double)]; + } data; + static_assert(sizeof(data) == sizeof(double), "bad union size"); + + auto needToRead = sizeof(double); + + auto& buf = Base::stream().buffer(); + while (needToRead != 0) { + Base::fill_buffer(); + + auto chunk_size = std::min(needToRead, buf.available()); + if (chunk_size == 0) { + Base::fail("Error parsing binary double literal"); + } + std::copy( + buf.pos(), + buf.pos() + chunk_size, + data.as_bytes + (sizeof(double) - needToRead)); + needToRead -= chunk_size; + Base::advance(chunk_size); + } + return data.as_double; + } + + private: + static bool is_space(ui8 ch) { + static const ui8 lookupTable[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return lookupTable[ch]; } - private: - static bool is_space(ui8 ch) { - static const ui8 lookupTable[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return lookupTable[ch]; - } - - ATTRIBUTE(noinline, cold) - ui8 skip_space_and_get_byte_fallback() { - auto& buf = Base::stream().buffer(); - while (true) { - // FIXME - if (buf.is_empty()) { - if (Base::stream().at_end()) { - return '\0'; - } - Base::fill_buffer(); - } else { - if (!is_space(*buf.pos())) { - break; - } - Base::advance(1); - } + ATTRIBUTE(noinline, cold) + ui8 skip_space_and_get_byte_fallback() { + auto& buf = Base::stream().buffer(); + while (true) { + // FIXME + if (buf.is_empty()) { + if (Base::stream().at_end()) { + return '\0'; + } + Base::fill_buffer(); + } else { + if (!is_space(*buf.pos())) { + break; + } + Base::advance(1); + } } - return Base::get_byte(); - } - - void check_memory_limit() { + return Base::get_byte(); + } + + void check_memory_limit() { if (Y_UNLIKELY(memory_limit_ && token_buffer_.capacity() > *memory_limit_)) { - COLD_BLOCK_BYVALUE - Base::fail( - "Memory limit exceeded while parsing YSON stream: " - "allocated ", - token_buffer_.capacity(), - ", limit ", *memory_limit_); - COLD_BLOCK_END + COLD_BLOCK_BYVALUE + Base::fail( + "Memory limit exceeded while parsing YSON stream: " + "allocated ", + token_buffer_.capacity(), + ", limit ", *memory_limit_); + COLD_BLOCK_END } } - TStringBuf token_buffer() const { - auto* begin = reinterpret_cast<const char*>(token_buffer_.data()); - return {begin, token_buffer_.size()}; - } - }; + TStringBuf token_buffer() const { + auto* begin = reinterpret_cast<const char*>(token_buffer_.data()); + return {begin, token_buffer_.size()}; + } + }; } -} +} diff --git a/library/cpp/yson_pull/detail/macros.h b/library/cpp/yson_pull/detail/macros.h index bddebe1135..7243f9cfe1 100644 --- a/library/cpp/yson_pull/detail/macros.h +++ b/library/cpp/yson_pull/detail/macros.h @@ -9,16 +9,16 @@ #endif #if defined(__GNUC__) && !defined(__clang__) -#define COLD_BLOCK_BYVALUE [=]() ATTRIBUTE(noinline, cold) { -#define COLD_BLOCK_BYREF [&]() ATTRIBUTE(noinline, cold) { -#define COLD_BLOCK_END \ - } \ - (); +#define COLD_BLOCK_BYVALUE [=]() ATTRIBUTE(noinline, cold) { +#define COLD_BLOCK_BYREF [&]() ATTRIBUTE(noinline, cold) { +#define COLD_BLOCK_END \ + } \ + (); #else // Clang does not support gnu-style attributes on lambda functions yet -#define COLD_BLOCK_BYVALUE [=]() { -#define COLD_BLOCK_BYREF [&]() { -#define COLD_BLOCK_END \ - } \ - (); +#define COLD_BLOCK_BYVALUE [=]() { +#define COLD_BLOCK_BYREF [&]() { +#define COLD_BLOCK_END \ + } \ + (); #endif diff --git a/library/cpp/yson_pull/detail/number.h b/library/cpp/yson_pull/detail/number.h index b620082d9d..5595f55e05 100644 --- a/library/cpp/yson_pull/detail/number.h +++ b/library/cpp/yson_pull/detail/number.h @@ -2,36 +2,36 @@ #include <util/system/types.h> -namespace NYsonPull { - namespace NDetail { - enum class number_type { - float64, - uint64, - int64 - }; +namespace NYsonPull { + namespace NDetail { + enum class number_type { + float64, + uint64, + int64 + }; - struct number { - number_type type; - union { - double as_float64; - ui64 as_uint64; - i64 as_int64; - } value; + struct number { + number_type type; + union { + double as_float64; + ui64 as_uint64; + i64 as_int64; + } value; - number(double v) { - type = number_type::float64; - value.as_float64 = v; - } + number(double v) { + type = number_type::float64; + value.as_float64 = v; + } - number(i64 v) { - type = number_type::int64; - value.as_int64 = v; - } + number(i64 v) { + type = number_type::int64; + value.as_int64 = v; + } - number(ui64 v) { - type = number_type::uint64; - value.as_uint64 = v; - } - }; + number(ui64 v) { + type = number_type::uint64; + value.as_uint64 = v; + } + }; } -} +} diff --git a/library/cpp/yson_pull/detail/output/buffered.h b/library/cpp/yson_pull/detail/output/buffered.h index 033d8859e0..475cf34785 100644 --- a/library/cpp/yson_pull/detail/output/buffered.h +++ b/library/cpp/yson_pull/detail/output/buffered.h @@ -6,46 +6,46 @@ #include <util/generic/strbuf.h> -namespace NYsonPull { - namespace NDetail { - namespace NOutput { - template <typename T> - class TBuffered: public NYsonPull::NOutput::IStream { - TArrayHolder<ui8> buffer_; - size_t size_; - - public: - TBuffered(size_t buffer_size) - : buffer_{new ui8[buffer_size]} - , size_{buffer_size} { - reset_buffer(); - } - - protected: - void do_flush_buffer(TStringBuf extra) override { - auto& buf = buffer(); - if (!buf.is_empty()) { +namespace NYsonPull { + namespace NDetail { + namespace NOutput { + template <typename T> + class TBuffered: public NYsonPull::NOutput::IStream { + TArrayHolder<ui8> buffer_; + size_t size_; + + public: + TBuffered(size_t buffer_size) + : buffer_{new ui8[buffer_size]} + , size_{buffer_size} { + reset_buffer(); + } + + protected: + void do_flush_buffer(TStringBuf extra) override { + auto& buf = buffer(); + if (!buf.is_empty()) { do_write({reinterpret_cast<const char*>(buf.begin()), buf.used()}); - reset_buffer(); - } - if (extra.size() >= buf.available()) { - do_write(extra); + reset_buffer(); + } + if (extra.size() >= buf.available()) { + do_write(extra); } else if (extra.size() > 0) { - ::memcpy(buf.pos(), extra.data(), extra.size()); - buf.advance(extra.size()); - } - } - - private: - void do_write(TStringBuf data) { - // CRTP dispatch - static_cast<T*>(this)->write(data); - } - - void reset_buffer() { - buffer().reset(buffer_.Get(), buffer_.Get() + size_); - } - }; + ::memcpy(buf.pos(), extra.data(), extra.size()); + buf.advance(extra.size()); + } + } + + private: + void do_write(TStringBuf data) { + // CRTP dispatch + static_cast<T*>(this)->write(data); + } + + void reset_buffer() { + buffer().reset(buffer_.Get(), buffer_.Get() + size_); + } + }; } - } // namespace NDetail -} + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/output/stdio_file.h b/library/cpp/yson_pull/detail/output/stdio_file.h index 0b9cbba894..03f2b40dc5 100644 --- a/library/cpp/yson_pull/detail/output/stdio_file.h +++ b/library/cpp/yson_pull/detail/output/stdio_file.h @@ -8,26 +8,26 @@ #include <cstdio> -namespace NYsonPull { - namespace NDetail { - namespace NOutput { - class TStdioFile: public TBuffered<TStdioFile> { - FILE* file_; +namespace NYsonPull { + namespace NDetail { + namespace NOutput { + class TStdioFile: public TBuffered<TStdioFile> { + FILE* file_; - public: - TStdioFile(FILE* file, size_t buffer_size) - : TBuffered<TStdioFile>(buffer_size) - , file_(file) - { - } + public: + TStdioFile(FILE* file, size_t buffer_size) + : TBuffered<TStdioFile>(buffer_size) + , file_(file) + { + } - void write(TStringBuf data) { - auto nwritten = ::fwrite(data.data(), 1, data.size(), file_); + void write(TStringBuf data) { + auto nwritten = ::fwrite(data.data(), 1, data.size(), file_); if (Y_UNLIKELY(static_cast<size_t>(nwritten) != data.size())) { - throw NException::TSystemError(); - } - } - }; + throw NException::TSystemError(); + } + } + }; } - } // namespace NDetail -} + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/output/stream.h b/library/cpp/yson_pull/detail/output/stream.h index 39f3b33c4c..d4810f3353 100644 --- a/library/cpp/yson_pull/detail/output/stream.h +++ b/library/cpp/yson_pull/detail/output/stream.h @@ -27,12 +27,12 @@ namespace NYsonPull { } }; - template <typename TOutput> + template <typename TOutput> class TOwned: public TBuffered<TOwned<TOutput>> { TOutput Output; public: - template <typename... Args> + template <typename... Args> TOwned(size_t buffer_size, Args&&... args) : TBuffered<TOwned>(buffer_size) , Output(std::forward<Args>(args)...) @@ -52,5 +52,5 @@ namespace NYsonPull { } }; } - } // namespace NDetail -} + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/reader.h b/library/cpp/yson_pull/detail/reader.h index db1dcc2227..0e02396358 100644 --- a/library/cpp/yson_pull/detail/reader.h +++ b/library/cpp/yson_pull/detail/reader.h @@ -8,76 +8,76 @@ #include <util/generic/maybe.h> #include <util/generic/vector.h> -namespace NYsonPull { - namespace NDetail { - /*! \internal */ - //////////////////////////////////////////////////////////////////////////////// - - enum class special_token : ui8 { - // Special values: - // YSON - semicolon = 0, // ; - equals = 1, // = - hash = 2, // # - left_bracket = 3, // [ - right_bracket = 4, // ] - left_brace = 5, // { - right_brace = 6, // } - left_angle = 7, // < - right_angle = 8, // > - }; - - // char_class tree representation: - // Root = xb - // BinaryStringOrOtherSpecialToken = x0b - // BinaryString = 00b - // OtherSpecialToken = 10b - // Other = x1b - // BinaryScalar = xx01b - // BinaryInt64 = 0001b - // BinaryDouble = 0101b - // BinaryFalse = 1001b - // BinaryTrue = 1101b - // Other = xxx11b - // Quote = 00011b - // DigitOrMinus = 00111b - // String = 01011b - // Space = 01111b - // Plus = 10011b - // None = 10111b - // Percent = 11011b - enum class char_class : ui8 { - binary_string = 0, // = 00b - - special_token_mask = 2, // = 10b - semicolon = 2 + (0 << 2), - equals = 2 + (1 << 2), - hash = 2 + (2 << 2), - left_bracket = 2 + (3 << 2), - right_bracket = 2 + (4 << 2), - left_brace = 2 + (5 << 2), - right_brace = 2 + (6 << 2), - left_angle = 2 + (7 << 2), - right_angle = 2 + (8 << 2), - - binary_scalar_mask = 1, - binary_int64 = 1 + (0 << 2), // = 001b - binary_double = 1 + (1 << 2), // = 101b - binary_false = 1 + (2 << 2), // = 1001b - binary_true = 1 + (3 << 2), // = 1101b - binary_uint64 = 1 + (4 << 2), // = 10001b - - other_mask = 3, - quote = 3 + (0 << 2), // = 00011b - number = 3 + (1 << 2), // = 00111b - string = 3 + (2 << 2), // = 01011b - percent = 3 + (6 << 2), // = 11011b - none = 3 + (5 << 2), // = 10111b - }; +namespace NYsonPull { + namespace NDetail { + /*! \internal */ + //////////////////////////////////////////////////////////////////////////////// + + enum class special_token : ui8 { + // Special values: + // YSON + semicolon = 0, // ; + equals = 1, // = + hash = 2, // # + left_bracket = 3, // [ + right_bracket = 4, // ] + left_brace = 5, // { + right_brace = 6, // } + left_angle = 7, // < + right_angle = 8, // > + }; + + // char_class tree representation: + // Root = xb + // BinaryStringOrOtherSpecialToken = x0b + // BinaryString = 00b + // OtherSpecialToken = 10b + // Other = x1b + // BinaryScalar = xx01b + // BinaryInt64 = 0001b + // BinaryDouble = 0101b + // BinaryFalse = 1001b + // BinaryTrue = 1101b + // Other = xxx11b + // Quote = 00011b + // DigitOrMinus = 00111b + // String = 01011b + // Space = 01111b + // Plus = 10011b + // None = 10111b + // Percent = 11011b + enum class char_class : ui8 { + binary_string = 0, // = 00b + + special_token_mask = 2, // = 10b + semicolon = 2 + (0 << 2), + equals = 2 + (1 << 2), + hash = 2 + (2 << 2), + left_bracket = 2 + (3 << 2), + right_bracket = 2 + (4 << 2), + left_brace = 2 + (5 << 2), + right_brace = 2 + (6 << 2), + left_angle = 2 + (7 << 2), + right_angle = 2 + (8 << 2), + + binary_scalar_mask = 1, + binary_int64 = 1 + (0 << 2), // = 001b + binary_double = 1 + (1 << 2), // = 101b + binary_false = 1 + (2 << 2), // = 1001b + binary_true = 1 + (3 << 2), // = 1101b + binary_uint64 = 1 + (4 << 2), // = 10001b + + other_mask = 3, + quote = 3 + (0 << 2), // = 00011b + number = 3 + (1 << 2), // = 00111b + string = 3 + (2 << 2), // = 01011b + percent = 3 + (6 << 2), // = 11011b + none = 3 + (5 << 2), // = 10111b + }; #define CHAR_SUBCLASS(x) (static_cast<ui8>(x) >> 2) - inline char_class get_char_class(ui8 ch) { + inline char_class get_char_class(ui8 ch) { #define NN char_class::none #define BS char_class::binary_string #define BI char_class::binary_int64 @@ -91,70 +91,70 @@ namespace NYsonPull { #define QU char_class::quote #define PC char_class::percent #define TT(name) (static_cast<char_class>( \ - (static_cast<ui8>(special_token::name) << 2) | static_cast<ui8>(char_class::special_token_mask))) - - static constexpr char_class lookup[256] = - { - NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - - // 32 - SP, // ' ' - NN, // '!' - QU, // '"' - TT(hash), // '#' - NN, // '$' - PC, // '%' - NN, // '&' - NN, // "'" - NN, // '(' - NN, // ')' - NN, // '*' - NB, // '+' - NN, // ',' - NB, // '-' - NN, // '.' - NN, // '/' - - // 48 - NB, NB, NB, NB, NB, NB, NB, NB, NB, NB, // '0' - '9' - NN, // ':' - TT(semicolon), // ';' - TT(left_angle), // '<' - TT(equals), // '=' - TT(right_angle), // '>' - NN, // '?' - - // 64 - NN, // '@' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' - TT(left_bracket), // '[' - NN, // '\' - TT(right_bracket), // ']' - NN, // '^' - ST, // '_' - - // 96 - NN, // '`' - - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' - TT(left_brace), // '{' - NN, // '|' - TT(right_brace), // '}' - NN, // '~' - NN, // '^?' non-printable - // 128 - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; + (static_cast<ui8>(special_token::name) << 2) | static_cast<ui8>(char_class::special_token_mask))) + + static constexpr char_class lookup[256] = + { + NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + // 32 + SP, // ' ' + NN, // '!' + QU, // '"' + TT(hash), // '#' + NN, // '$' + PC, // '%' + NN, // '&' + NN, // "'" + NN, // '(' + NN, // ')' + NN, // '*' + NB, // '+' + NN, // ',' + NB, // '-' + NN, // '.' + NN, // '/' + + // 48 + NB, NB, NB, NB, NB, NB, NB, NB, NB, NB, // '0' - '9' + NN, // ':' + TT(semicolon), // ';' + TT(left_angle), // '<' + TT(equals), // '=' + TT(right_angle), // '>' + NN, // '?' + + // 64 + NN, // '@' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' + TT(left_bracket), // '[' + NN, // '\' + TT(right_bracket), // ']' + NN, // '^' + ST, // '_' + + // 96 + NN, // '`' + + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' + TT(left_brace), // '{' + NN, // '|' + TT(right_brace), // '}' + NN, // '~' + NN, // '^?' non-printable + // 128 + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; #undef NN #undef BS @@ -165,140 +165,140 @@ namespace NYsonPull { #undef ST #undef QU #undef TT - return lookup[ch]; - } - - template <bool EnableLinePositionInfo> - class gen_reader_impl { - enum class state { - delimiter = 0, //! expecting ';' or closing-char ('>', ']', '}') - maybe_value = 1, //! expecting a value or closing-char - maybe_key = 2, //! expecting a key or closing-char - equals = 3, //! expecting '=' (followed by value) - value = 4, //! expecting a value - value_noattr = 5, //! expecting a value w/o attrs (after attrs) - - // by design, rare states have numbers starting from first_rare_state - first_rare_state = 6, - before_begin = first_rare_state, //! before started reading the stream - before_end = first_rare_state + 1, //! Expecting end of stream - after_end = first_rare_state + 2, //! after end of stream - }; - - lexer_base<EnableLinePositionInfo> lexer_; - state state_; - TEvent event_; + return lookup[ch]; + } + + template <bool EnableLinePositionInfo> + class gen_reader_impl { + enum class state { + delimiter = 0, //! expecting ';' or closing-char ('>', ']', '}') + maybe_value = 1, //! expecting a value or closing-char + maybe_key = 2, //! expecting a key or closing-char + equals = 3, //! expecting '=' (followed by value) + value = 4, //! expecting a value + value_noattr = 5, //! expecting a value w/o attrs (after attrs) + + // by design, rare states have numbers starting from first_rare_state + first_rare_state = 6, + before_begin = first_rare_state, //! before started reading the stream + before_end = first_rare_state + 1, //! Expecting end of stream + after_end = first_rare_state + 2, //! after end of stream + }; + + lexer_base<EnableLinePositionInfo> lexer_; + state state_; + TEvent event_; TVector<EEventType> stack_; - EStreamType mode_; - - public: - gen_reader_impl( - NYsonPull::NInput::IStream& buffer, - EStreamType mode, - TMaybe<size_t> memoryLimit = {}) - : lexer_(buffer, memoryLimit) - , state_{state::before_begin} - , mode_{mode} { - } - - const TEvent& last_event() const { - return event_; - } - - ATTRIBUTE(hot) - const TEvent& next_event() { + EStreamType mode_; + + public: + gen_reader_impl( + NYsonPull::NInput::IStream& buffer, + EStreamType mode, + TMaybe<size_t> memoryLimit = {}) + : lexer_(buffer, memoryLimit) + , state_{state::before_begin} + , mode_{mode} { + } + + const TEvent& last_event() const { + return event_; + } + + ATTRIBUTE(hot) + const TEvent& next_event() { if (Y_LIKELY(state_ < state::first_rare_state)) { - // 'hot' handler for in-stream events - next_event_hot(); - } else { - // these events happen no more than once per stream - next_event_cold(); - } - return event_; - } - - private: - ATTRIBUTE(hot) - void next_event_hot() { - auto ch = lexer_.get_byte(); - auto cls = get_char_class(ch); + // 'hot' handler for in-stream events + next_event_hot(); + } else { + // these events happen no more than once per stream + next_event_cold(); + } + return event_; + } + + private: + ATTRIBUTE(hot) + void next_event_hot() { + auto ch = lexer_.get_byte(); + auto cls = get_char_class(ch); if (Y_UNLIKELY(cls == char_class::none)) { - ch = lexer_.skip_space_and_get_byte(); + ch = lexer_.skip_space_and_get_byte(); if (Y_UNLIKELY(ch == NSymbol::eof)) { - handle_eof(); - return; - } - cls = get_char_class(ch); - } - - // states maybe_value/value/value_noattr are distinguished - // later in state_value_special - switch (state_) { - case state::maybe_value: - state_value(ch, cls); - break; - case state::maybe_key: - state_maybe_key(ch, cls); - break; - case state::equals: - state_equals(ch); - break; - case state::value: - state_value(ch, cls); - break; - case state::value_noattr: - state_value(ch, cls); - break; - case state::delimiter: - state_delimiter(ch, cls); - break; - default: + handle_eof(); + return; + } + cls = get_char_class(ch); + } + + // states maybe_value/value/value_noattr are distinguished + // later in state_value_special + switch (state_) { + case state::maybe_value: + state_value(ch, cls); + break; + case state::maybe_key: + state_maybe_key(ch, cls); + break; + case state::equals: + state_equals(ch); + break; + case state::value: + state_value(ch, cls); + break; + case state::value_noattr: + state_value(ch, cls); + break; + case state::delimiter: + state_delimiter(ch, cls); + break; + default: Y_UNREACHABLE(); - } + } } - ATTRIBUTE(noinline, cold) - void next_event_cold() { - switch (state_) { - case state::before_begin: - state_before_begin(); - break; - case state::after_end: - lexer_.fail("Attempted read past stream end"); - case state::before_end: - state_before_end(); - break; - default: + ATTRIBUTE(noinline, cold) + void next_event_cold() { + switch (state_) { + case state::before_begin: + state_before_begin(); + break; + case state::after_end: + lexer_.fail("Attempted read past stream end"); + case state::before_end: + state_before_end(); + break; + default: Y_UNREACHABLE(); - } - } - - //! Present a scalar value for caller - template <typename T> - void yield(T value) { - event_ = TEvent{TScalar{value}}; - } - - //! Present a scalar value with non-scalar tag (i.e. key) - template <typename T> - void yield(EEventType type, T value) { - event_ = TEvent{type, TScalar{value}}; - } - - //! Present a value from number variant - void yield(const number& value) { - switch (value.type) { - case number_type::int64: - yield(value.value.as_int64); - break; - case number_type::uint64: - yield(value.value.as_uint64); - break; - case number_type::float64: - yield(value.value.as_float64); - break; - } - } + } + } + + //! Present a scalar value for caller + template <typename T> + void yield(T value) { + event_ = TEvent{TScalar{value}}; + } + + //! Present a scalar value with non-scalar tag (i.e. key) + template <typename T> + void yield(EEventType type, T value) { + event_ = TEvent{type, TScalar{value}}; + } + + //! Present a value from number variant + void yield(const number& value) { + switch (value.type) { + case number_type::int64: + yield(value.value.as_int64); + break; + case number_type::uint64: + yield(value.value.as_uint64); + break; + case number_type::float64: + yield(value.value.as_float64); + break; + } + } //! Present a value from %-literal variant void yield(const percent_scalar& value) { @@ -312,350 +312,350 @@ namespace NYsonPull { } } - //! Present a value-less event - void yield(EEventType type) { - event_ = TEvent{type}; - } + //! Present a value-less event + void yield(EEventType type) { + event_ = TEvent{type}; + } - //! Push the opening of a paired event - void push(EEventType type) { - stack_.push_back(type); - } + //! Push the opening of a paired event + void push(EEventType type) { + stack_.push_back(type); + } - //! Close the paired_event, verify that delimiters are well-formed - void pop(EEventType first, EEventType last) { + //! Close the paired_event, verify that delimiters are well-formed + void pop(EEventType first, EEventType last) { if (Y_UNLIKELY(stack_.empty() || stack_.back() != first)) { - pop_fail(first, last); - return; - } - stack_.pop_back(); + pop_fail(first, last); + return; + } + stack_.pop_back(); - yield(last); - switch (first) { + yield(last); + switch (first) { case EEventType::BeginList: - next(state::delimiter); - break; + next(state::delimiter); + break; case EEventType::BeginMap: - next(state::delimiter); - break; + next(state::delimiter); + break; case EEventType::BeginAttributes: - next(state::value_noattr); - break; + next(state::value_noattr); + break; case EEventType::BeginStream: - next(state::after_end); - break; + next(state::after_end); + break; - default: + default: Y_UNREACHABLE(); - } + } if (Y_UNLIKELY(mode_ == EStreamType::Node && stack_.size() == 1 && state_ == state::delimiter)) { - next(state::before_end); - } - } - - ATTRIBUTE(noinline, cold) - void pop_fail(EEventType first, EEventType last) { - if (stack_.empty()) { - lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but event stack is empty"); - } else { - lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but '", stack_.back(), "' is found."); - } - } - - //! Transition to new_state - void next(state new_state) { - state_ = new_state; - } - - bool in_map() { + next(state::before_end); + } + } + + ATTRIBUTE(noinline, cold) + void pop_fail(EEventType first, EEventType last) { + if (stack_.empty()) { + lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but event stack is empty"); + } else { + lexer_.fail("Unpaired events: expected opening '", first, "' for '", last, "', but '", stack_.back(), "' is found."); + } + } + + //! Transition to new_state + void next(state new_state) { + state_ = new_state; + } + + bool in_map() { return (stack_.back() == EEventType::BeginMap) || (stack_.back() == EEventType::BeginAttributes) || (stack_.back() == EEventType::BeginStream && mode_ == EStreamType::MapFragment); - } - - ATTRIBUTE(noinline, cold) - void handle_eof() { - switch (state_) { - case state::maybe_value: - case state::maybe_key: - case state::delimiter: - case state::before_end: + } + + ATTRIBUTE(noinline, cold) + void handle_eof() { + switch (state_) { + case state::maybe_value: + case state::maybe_key: + case state::delimiter: + case state::before_end: pop(EEventType::BeginStream, EEventType::EndStream); - return; + return; - default: - lexer_.fail("Unexpected end of stream"); - } - } + default: + lexer_.fail("Unexpected end of stream"); + } + } - ATTRIBUTE(noinline, cold) - void state_before_begin() { + ATTRIBUTE(noinline, cold) + void state_before_begin() { push(EEventType::BeginStream); yield(EEventType::BeginStream); - switch (mode_) { - case EStreamType::Node: - next(state::value); - break; - case EStreamType::ListFragment: - next(state::maybe_value); - break; - case EStreamType::MapFragment: - next(state::maybe_key); - break; - default: + switch (mode_) { + case EStreamType::Node: + next(state::value); + break; + case EStreamType::ListFragment: + next(state::maybe_value); + break; + case EStreamType::MapFragment: + next(state::maybe_key); + break; + default: Y_UNREACHABLE(); - } - } - - ATTRIBUTE(noinline, cold) - void state_before_end() { - auto ch = lexer_.skip_space_and_get_byte(); - if (ch == NSymbol::eof) { - handle_eof(); - } else { - lexer_.fail("Expected stream end, but found ", NCEscape::quote(ch)); - } - } - - ATTRIBUTE(hot) - void state_delimiter(ui8 ch, char_class cls) { + } + } + + ATTRIBUTE(noinline, cold) + void state_before_end() { + auto ch = lexer_.skip_space_and_get_byte(); + if (ch == NSymbol::eof) { + handle_eof(); + } else { + lexer_.fail("Expected stream end, but found ", NCEscape::quote(ch)); + } + } + + ATTRIBUTE(hot) + void state_delimiter(ui8 ch, char_class cls) { if (Y_LIKELY(ch == NSymbol::item_separator)) { - lexer_.advance(1); - next(in_map() ? state::maybe_key : state::maybe_value); - // immediately read next value - next_event_hot(); - return; - } - state_delimiter_fallback(ch, cls); - } - - ATTRIBUTE(noinline, hot) - void state_delimiter_fallback(ui8 ch, char_class cls) { - auto cls_bits = static_cast<ui8>(cls); - if ((cls_bits & 3) == static_cast<ui8>(char_class::special_token_mask)) { - auto token = static_cast<special_token>(cls_bits >> 2); - lexer_.advance(1); - switch (token) { - /* // handled in the fast track + lexer_.advance(1); + next(in_map() ? state::maybe_key : state::maybe_value); + // immediately read next value + next_event_hot(); + return; + } + state_delimiter_fallback(ch, cls); + } + + ATTRIBUTE(noinline, hot) + void state_delimiter_fallback(ui8 ch, char_class cls) { + auto cls_bits = static_cast<ui8>(cls); + if ((cls_bits & 3) == static_cast<ui8>(char_class::special_token_mask)) { + auto token = static_cast<special_token>(cls_bits >> 2); + lexer_.advance(1); + switch (token) { + /* // handled in the fast track case special_token::semicolon: next(in_map()? state::maybe_key : state::maybe_value); // immediately read next value return next_event(); */ - case special_token::right_bracket: + case special_token::right_bracket: pop(EEventType::BeginList, EEventType::EndList); - return; + return; - case special_token::right_brace: + case special_token::right_brace: pop(EEventType::BeginMap, EEventType::EndMap); - return; + return; - case special_token::right_angle: + case special_token::right_angle: pop(EEventType::BeginAttributes, EEventType::EndAttributes); - return; - - default: - break; - } - } - - COLD_BLOCK_BYVALUE - lexer_.fail( - "Unexpected ", NCEscape::quote(ch), ", expected one of ", - NCEscape::quote(NSymbol::item_separator), ", ", - NCEscape::quote(NSymbol::end_list), ", ", - NCEscape::quote(NSymbol::end_map), ", ", - NCEscape::quote(NSymbol::end_attributes)); - COLD_BLOCK_END + return; + + default: + break; + } + } + + COLD_BLOCK_BYVALUE + lexer_.fail( + "Unexpected ", NCEscape::quote(ch), ", expected one of ", + NCEscape::quote(NSymbol::item_separator), ", ", + NCEscape::quote(NSymbol::end_list), ", ", + NCEscape::quote(NSymbol::end_map), ", ", + NCEscape::quote(NSymbol::end_attributes)); + COLD_BLOCK_END } - ATTRIBUTE(noinline, hot) - void state_maybe_key(ui8 ch, char_class cls) { - auto key = TStringBuf{}; - // Keys are always strings, put binary-string key into fast lane + ATTRIBUTE(noinline, hot) + void state_maybe_key(ui8 ch, char_class cls) { + auto key = TStringBuf{}; + // Keys are always strings, put binary-string key into fast lane if (Y_LIKELY(ch == NSymbol::string_marker)) { lexer_.advance(1); - key = lexer_.read_binary_string(); - } else { - switch (cls) { - case char_class::quote: - lexer_.advance(1); - key = lexer_.read_quoted_string(); - break; - - case char_class::string: - key = lexer_.read_unquoted_string(); - break; - - case char_class::right_brace: - lexer_.advance(1); + key = lexer_.read_binary_string(); + } else { + switch (cls) { + case char_class::quote: + lexer_.advance(1); + key = lexer_.read_quoted_string(); + break; + + case char_class::string: + key = lexer_.read_unquoted_string(); + break; + + case char_class::right_brace: + lexer_.advance(1); pop(EEventType::BeginMap, EEventType::EndMap); - return; + return; - case char_class::right_angle: - lexer_.advance(1); + case char_class::right_angle: + lexer_.advance(1); pop(EEventType::BeginAttributes, EEventType::EndAttributes); - return; - - default: - COLD_BLOCK_BYVALUE - lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected key string"); - COLD_BLOCK_END - } - } - + return; + + default: + COLD_BLOCK_BYVALUE + lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected key string"); + COLD_BLOCK_END + } + } + yield(EEventType::Key, key); - next(state::equals); - } - - ATTRIBUTE(hot) - void state_equals(ui8 ch) { - // skip '=' + next(state::equals); + } + + ATTRIBUTE(hot) + void state_equals(ui8 ch) { + // skip '=' if (Y_UNLIKELY(ch != NSymbol::key_value_separator)) { COLD_BLOCK_BYVALUE - lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected ", NCEscape::quote(NSymbol::key_value_separator)); + lexer_.fail("Unexpected ", NCEscape::quote(ch), ", expected ", NCEscape::quote(NSymbol::key_value_separator)); COLD_BLOCK_END - } - lexer_.advance(1); - next(state::value); - // immediately read the following value - // (this symbol yields no result) - next_event_hot(); + } + lexer_.advance(1); + next(state::value); + // immediately read the following value + // (this symbol yields no result) + next_event_hot(); + } + + ATTRIBUTE(noinline, hot) + void state_value(ui8 ch, char_class cls) { + auto cls_bits = static_cast<ui8>(cls); + if (cls_bits & 1) { // Other = x1b + if (cls_bits & (1 << 1)) { // Other = xxx11b + state_value_text_scalar(cls); + } else { // BinaryScalar = x01b + state_value_binary_scalar(cls); + } + next(state::delimiter); + } else { // BinaryStringOrOtherSpecialToken = x0b + lexer_.advance(1); + if (cls_bits & 1 << 1) { + // special token + auto token = static_cast<special_token>(cls_bits >> 2); + state_value_special(token, ch); + } else { + // binary string + yield(lexer_.read_binary_string()); + next(state::delimiter); + } + } } - ATTRIBUTE(noinline, hot) - void state_value(ui8 ch, char_class cls) { - auto cls_bits = static_cast<ui8>(cls); - if (cls_bits & 1) { // Other = x1b - if (cls_bits & (1 << 1)) { // Other = xxx11b - state_value_text_scalar(cls); - } else { // BinaryScalar = x01b - state_value_binary_scalar(cls); - } - next(state::delimiter); - } else { // BinaryStringOrOtherSpecialToken = x0b - lexer_.advance(1); - if (cls_bits & 1 << 1) { - // special token - auto token = static_cast<special_token>(cls_bits >> 2); - state_value_special(token, ch); - } else { - // binary string - yield(lexer_.read_binary_string()); - next(state::delimiter); - } - } - } - - ATTRIBUTE(noinline) - void state_value_special(special_token token, ui8 ch) { - // Value starters are always accepted values - switch (token) { - case special_token::hash: - yield(TScalar{}); - next(state::delimiter); - return; - - case special_token::left_bracket: + ATTRIBUTE(noinline) + void state_value_special(special_token token, ui8 ch) { + // Value starters are always accepted values + switch (token) { + case special_token::hash: + yield(TScalar{}); + next(state::delimiter); + return; + + case special_token::left_bracket: push(EEventType::BeginList); yield(EEventType::BeginList); - next(state::maybe_value); - return; + next(state::maybe_value); + return; - case special_token::left_brace: + case special_token::left_brace: push(EEventType::BeginMap); yield(EEventType::BeginMap); - next(state::maybe_key); - return; + next(state::maybe_key); + return; - default: - break; - } + default: + break; + } - // ...closing-chars are only allowed in maybe_value state - if (state_ == state::maybe_value) { - switch (token) { - case special_token::right_bracket: + // ...closing-chars are only allowed in maybe_value state + if (state_ == state::maybe_value) { + switch (token) { + case special_token::right_bracket: pop(EEventType::BeginList, EEventType::EndList); - return; + return; - case special_token::right_brace: + case special_token::right_brace: pop(EEventType::BeginMap, EEventType::EndMap); - return; - - // right_angle is impossible in maybe_value state - // (only in delimiter, maybe_key) - - default: - break; - } - } - - // attributes are not allowed after attributes (thus, value_noattr state) - if (state_ != state::value_noattr && token == special_token::left_angle) { + return; + + // right_angle is impossible in maybe_value state + // (only in delimiter, maybe_key) + + default: + break; + } + } + + // attributes are not allowed after attributes (thus, value_noattr state) + if (state_ != state::value_noattr && token == special_token::left_angle) { push(EEventType::BeginAttributes); yield(EEventType::BeginAttributes); - next(state::maybe_key); + next(state::maybe_key); return; - } + } - COLD_BLOCK_BYVALUE - lexer_.fail("Unexpected ", NCEscape::quote(ch)); - COLD_BLOCK_END + COLD_BLOCK_BYVALUE + lexer_.fail("Unexpected ", NCEscape::quote(ch)); + COLD_BLOCK_END } - ATTRIBUTE(hot) - void state_value_binary_scalar(char_class cls) { - lexer_.advance(1); - switch (cls) { - case char_class::binary_double: - yield(lexer_.read_binary_double()); - break; + ATTRIBUTE(hot) + void state_value_binary_scalar(char_class cls) { + lexer_.advance(1); + switch (cls) { + case char_class::binary_double: + yield(lexer_.read_binary_double()); + break; - case char_class::binary_int64: - yield(lexer_.read_binary_int64()); - break; + case char_class::binary_int64: + yield(lexer_.read_binary_int64()); + break; - case char_class::binary_uint64: - yield(lexer_.read_binary_uint64()); - break; + case char_class::binary_uint64: + yield(lexer_.read_binary_uint64()); + break; - case char_class::binary_false: - yield(false); - break; + case char_class::binary_false: + yield(false); + break; - case char_class::binary_true: - yield(true); - break; + case char_class::binary_true: + yield(true); + break; - default: + default: Y_UNREACHABLE(); - } - } - - ATTRIBUTE(noinline) - void state_value_text_scalar(char_class cls) { - switch (cls) { - case char_class::quote: - lexer_.advance(1); - yield(lexer_.read_quoted_string()); - break; - - case char_class::number: - yield(lexer_.read_numeric()); - break; - - case char_class::string: - yield(lexer_.read_unquoted_string()); - break; - - case char_class::percent: - lexer_.advance(1); + } + } + + ATTRIBUTE(noinline) + void state_value_text_scalar(char_class cls) { + switch (cls) { + case char_class::quote: + lexer_.advance(1); + yield(lexer_.read_quoted_string()); + break; + + case char_class::number: + yield(lexer_.read_numeric()); + break; + + case char_class::string: + yield(lexer_.read_unquoted_string()); + break; + + case char_class::percent: + lexer_.advance(1); yield(lexer_.read_percent_scalar()); - break; + break; case char_class::none: COLD_BLOCK_BYVALUE @@ -663,15 +663,15 @@ namespace NYsonPull { COLD_BLOCK_END break; - default: + default: Y_UNREACHABLE(); - } - } - }; - - class reader_impl: public gen_reader_impl<false> { - public: - using gen_reader_impl<false>::gen_reader_impl; - }; + } + } + }; + + class reader_impl: public gen_reader_impl<false> { + public: + using gen_reader_impl<false>::gen_reader_impl; + }; } -} +} diff --git a/library/cpp/yson_pull/detail/stream_counter.h b/library/cpp/yson_pull/detail/stream_counter.h index 81146a32f2..3b41b27eb6 100644 --- a/library/cpp/yson_pull/detail/stream_counter.h +++ b/library/cpp/yson_pull/detail/stream_counter.h @@ -4,48 +4,48 @@ #include <cstddef> -namespace NYsonPull { - namespace NDetail { - template <bool EnableLinePositionInfo> - class stream_counter; - - template <> - class stream_counter<true> { - private: - size_t offset_ = 0; - size_t line_ = 1; - size_t column_ = 1; - - public: - TPositionInfo info() const { - return {offset_, line_, column_}; - } - - void update(const ui8* begin, const ui8* end) { - offset_ += end - begin; - for (auto current = begin; current != end; ++current) { - ++column_; - if (*current == '\n') { //TODO: memchr - ++line_; - column_ = 1; - } - } +namespace NYsonPull { + namespace NDetail { + template <bool EnableLinePositionInfo> + class stream_counter; + + template <> + class stream_counter<true> { + private: + size_t offset_ = 0; + size_t line_ = 1; + size_t column_ = 1; + + public: + TPositionInfo info() const { + return {offset_, line_, column_}; } - }; - - template <> - class stream_counter<false> { - private: - size_t offset_ = 0; - - public: - TPositionInfo info() const { - return {offset_, {}, {}}; - } - - void update(const ui8* begin, const ui8* end) { - offset_ += end - begin; - } - }; + + void update(const ui8* begin, const ui8* end) { + offset_ += end - begin; + for (auto current = begin; current != end; ++current) { + ++column_; + if (*current == '\n') { //TODO: memchr + ++line_; + column_ = 1; + } + } + } + }; + + template <> + class stream_counter<false> { + private: + size_t offset_ = 0; + + public: + TPositionInfo info() const { + return {offset_, {}, {}}; + } + + void update(const ui8* begin, const ui8* end) { + offset_ += end - begin; + } + }; } -} +} diff --git a/library/cpp/yson_pull/detail/symbols.h b/library/cpp/yson_pull/detail/symbols.h index dedf437535..fe94bb9c41 100644 --- a/library/cpp/yson_pull/detail/symbols.h +++ b/library/cpp/yson_pull/detail/symbols.h @@ -3,53 +3,53 @@ #include <util/generic/strbuf.h> #include <util/system/types.h> -namespace NYsonPull { - namespace NDetail { - namespace NSymbol { +namespace NYsonPull { + namespace NDetail { + namespace NSymbol { #define SYM(name, value) constexpr ui8 name = value - //! Indicates the beginning of a list. - SYM(begin_list, '['); - //! Indicates the end of a list. - SYM(end_list, ']'); - - //! Indicates the beginning of a map. - SYM(begin_map, '{'); - //! Indicates the end of a map. - SYM(end_map, '}'); - - //! Indicates the beginning of an attribute map. - SYM(begin_attributes, '<'); - //! Indicates the end of an attribute map. - SYM(end_attributes, '>'); - - //! Separates items in lists and pairs in maps or attribute maps. - SYM(item_separator, ';'); - //! Separates keys from values in maps and attribute maps. - SYM(key_value_separator, '='); - - //! Indicates an entity. - SYM(entity, '#'); - //! Indicates end of stream. - SYM(eof, '\0'); - - //! Marks the beginning of a binary string literal. - SYM(string_marker, '\x01'); - //! Marks the beginning of a binary int64 literal. - SYM(int64_marker, '\x02'); - //! Marks the beginning of a binary uint64 literal. - SYM(uint64_marker, '\x06'); - //! Marks the beginning of a binary double literal. - SYM(double_marker, '\x03'); - //! Marks a binary `false' boolean value. - SYM(false_marker, '\x04'); - //! Marks a binary `true' boolean value. - SYM(true_marker, '\x05'); - - //! Text string quote symbol - SYM(quote, '"'); + //! Indicates the beginning of a list. + SYM(begin_list, '['); + //! Indicates the end of a list. + SYM(end_list, ']'); + + //! Indicates the beginning of a map. + SYM(begin_map, '{'); + //! Indicates the end of a map. + SYM(end_map, '}'); + + //! Indicates the beginning of an attribute map. + SYM(begin_attributes, '<'); + //! Indicates the end of an attribute map. + SYM(end_attributes, '>'); + + //! Separates items in lists and pairs in maps or attribute maps. + SYM(item_separator, ';'); + //! Separates keys from values in maps and attribute maps. + SYM(key_value_separator, '='); + + //! Indicates an entity. + SYM(entity, '#'); + //! Indicates end of stream. + SYM(eof, '\0'); + + //! Marks the beginning of a binary string literal. + SYM(string_marker, '\x01'); + //! Marks the beginning of a binary int64 literal. + SYM(int64_marker, '\x02'); + //! Marks the beginning of a binary uint64 literal. + SYM(uint64_marker, '\x06'); + //! Marks the beginning of a binary double literal. + SYM(double_marker, '\x03'); + //! Marks a binary `false' boolean value. + SYM(false_marker, '\x04'); + //! Marks a binary `true' boolean value. + SYM(true_marker, '\x05'); + + //! Text string quote symbol + SYM(quote, '"'); #undef SYM - } - } -} + } + } +} diff --git a/library/cpp/yson_pull/detail/traits.h b/library/cpp/yson_pull/detail/traits.h index d08b261514..869a3b9c44 100644 --- a/library/cpp/yson_pull/detail/traits.h +++ b/library/cpp/yson_pull/detail/traits.h @@ -2,28 +2,28 @@ #include <type_traits> -namespace NYsonPull { - namespace NDetail { - namespace NTraits { - template <typename T, typename U> - using if_signed = typename std::enable_if< - std::is_signed<T>::value, - U>::type; +namespace NYsonPull { + namespace NDetail { + namespace NTraits { + template <typename T, typename U> + using if_signed = typename std::enable_if< + std::is_signed<T>::value, + U>::type; - template <typename T, typename U> - using if_unsigned = typename std::enable_if< - std::is_unsigned<T>::value, - U>::type; + template <typename T, typename U> + using if_unsigned = typename std::enable_if< + std::is_unsigned<T>::value, + U>::type; - template <typename T> - using to_unsigned = typename std::enable_if< - std::is_signed<T>::value, - typename std::make_unsigned<T>::type>::type; + template <typename T> + using to_unsigned = typename std::enable_if< + std::is_signed<T>::value, + typename std::make_unsigned<T>::type>::type; - template <typename T> - using to_signed = typename std::enable_if< - std::is_unsigned<T>::value, - typename std::make_signed<T>::type>::type; - } - } // namespace NDetail -} + template <typename T> + using to_signed = typename std::enable_if< + std::is_unsigned<T>::value, + typename std::make_signed<T>::type>::type; + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/varint.h b/library/cpp/yson_pull/detail/varint.h index e75bed9a3e..38bf45d925 100644 --- a/library/cpp/yson_pull/detail/varint.h +++ b/library/cpp/yson_pull/detail/varint.h @@ -10,251 +10,251 @@ #include <cstddef> #include <type_traits> -namespace NYsonPull { - namespace NDetail { - namespace NVarInt { - namespace NImpl { - template <typename T> - constexpr inline size_t max_size() { - return (8 * sizeof(T) - 1) / 7 + 1; - } +namespace NYsonPull { + namespace NDetail { + namespace NVarInt { + namespace NImpl { + template <typename T> + constexpr inline size_t max_size() { + return (8 * sizeof(T) - 1) / 7 + 1; + } - template <typename T> - inline size_t write(ui64 value, T&& consume) { - auto stop = false; - auto nwritten = size_t{0}; - while (!stop) { - ++nwritten; - auto byte = static_cast<ui8>(value | 0x80); - value >>= 7; - if (value == 0) { - stop = true; - byte &= 0x7F; - } - consume(byte); - } - return nwritten; - } + template <typename T> + inline size_t write(ui64 value, T&& consume) { + auto stop = false; + auto nwritten = size_t{0}; + while (!stop) { + ++nwritten; + auto byte = static_cast<ui8>(value | 0x80); + value >>= 7; + if (value == 0) { + stop = true; + byte &= 0x7F; + } + consume(byte); + } + return nwritten; + } - template <typename U> - inline bool read_fast(byte_reader<U>& reader, ui64* value) { - auto& buf = reader.stream().buffer(); - auto* ptr = buf.pos(); - ui32 b; + template <typename U> + inline bool read_fast(byte_reader<U>& reader, ui64* value) { + auto& buf = reader.stream().buffer(); + auto* ptr = buf.pos(); + ui32 b; - // Splitting into 32-bit pieces gives better performance on 32-bit - // processors. - ui32 part0 = 0, part1 = 0, part2 = 0; + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + ui32 part0 = 0, part1 = 0, part2 = 0; - b = *(ptr++); - part0 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part2 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part2 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; + b = *(ptr++); + part0 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; - // We have overrun the maximum size of a Varint (10 bytes). The data - // must be corrupt. - return false; + // We have overrun the maximum size of a Varint (10 bytes). The data + // must be corrupt. + return false; - done: - reader.advance(ptr - buf.pos()); - *value = (static_cast<ui64>(part0)) | (static_cast<ui64>(part1) << 28) | (static_cast<ui64>(part2) << 56); - return true; - } + done: + reader.advance(ptr - buf.pos()); + *value = (static_cast<ui64>(part0)) | (static_cast<ui64>(part1) << 28) | (static_cast<ui64>(part2) << 56); + return true; + } - template <typename U> - inline bool read_fast(byte_reader<U>& reader, ui32* value) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - auto& buf = reader.stream().buffer(); - auto* ptr = buf.pos(); - ui32 b; - ui32 result; + template <typename U> + inline bool read_fast(byte_reader<U>& reader, ui32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + auto& buf = reader.stream().buffer(); + auto* ptr = buf.pos(); + ui32 b; + ui32 result; - b = *(ptr++); - result = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= b << 28; - if (!(b & 0x80)) - goto done; + b = *(ptr++); + result = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= b << 28; + if (!(b & 0x80)) + goto done; - // FIXME - // If the input is larger than 32 bits, we still need to read it all - // and discard the high-order bits. + // FIXME + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. - for (size_t i = 0; i < max_size<ui64>() - max_size<ui32>(); i++) { - b = *(ptr++); - if (!(b & 0x80)) - goto done; - } + for (size_t i = 0; i < max_size<ui64>() - max_size<ui32>(); i++) { + b = *(ptr++); + if (!(b & 0x80)) + goto done; + } - // We have overrun the maximum size of a Varint (10 bytes). Assume - // the data is corrupt. - return false; + // We have overrun the maximum size of a Varint (10 bytes). Assume + // the data is corrupt. + return false; - done: - reader.advance(ptr - buf.pos()); - *value = result; - return true; - } + done: + reader.advance(ptr - buf.pos()); + *value = result; + return true; + } - template <typename U> - inline bool read_slow(byte_reader<U>& reader, ui64* value) { - // Slow path: This read might cross the end of the buffer, so we - // need to check and refresh the buffer if and when it does. + template <typename U> + inline bool read_slow(byte_reader<U>& reader, ui64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. - auto& buf = reader.stream().buffer(); - ui64 result = 0; - int count = 0; - ui32 b; + auto& buf = reader.stream().buffer(); + ui64 result = 0; + int count = 0; + ui32 b; - do { - if (count == max_size<ui64>()) { - return false; - } - reader.fill_buffer(); - if (reader.stream().at_end()) { - return false; - } - b = *buf.pos(); - result |= static_cast<ui64>(b & 0x7F) << (7 * count); - reader.advance(1); - ++count; - } while (b & 0x80); + do { + if (count == max_size<ui64>()) { + return false; + } + reader.fill_buffer(); + if (reader.stream().at_end()) { + return false; + } + b = *buf.pos(); + result |= static_cast<ui64>(b & 0x7F) << (7 * count); + reader.advance(1); + ++count; + } while (b & 0x80); - *value = result; - return true; - } + *value = result; + return true; + } - template <typename U> - inline bool read_slow(byte_reader<U>& reader, ui32* value) { - ui64 result; - // fallback to 64-bit reading - if (read_slow(reader, &result) && result <= std::numeric_limits<ui32>::max()) { - *value = static_cast<ui32>(result); - return true; - } + template <typename U> + inline bool read_slow(byte_reader<U>& reader, ui32* value) { + ui64 result; + // fallback to 64-bit reading + if (read_slow(reader, &result) && result <= std::numeric_limits<ui32>::max()) { + *value = static_cast<ui32>(result); + return true; + } - return false; - } + return false; + } - // Following functions is an adaptation - // of Protobuf code from coded_stream.cc - template <typename T, typename U> - inline bool read_dispatch(byte_reader<U>& reader, T* value) { - auto& buf = reader.stream().buffer(); + // Following functions is an adaptation + // of Protobuf code from coded_stream.cc + template <typename T, typename U> + inline bool read_dispatch(byte_reader<U>& reader, T* value) { + auto& buf = reader.stream().buffer(); // NOTE: checking for 64-bit max_size(), since 32-bit // read_fast() might fallback to 64-bit reading if (buf.available() >= max_size<ui64>() || - // Optimization: If the Varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. - (!buf.is_empty() && !(buf.end()[-1] & 0x80))) - { - return read_fast(reader, value); - } else { - // Really slow case: we will incur the cost of an extra function call here, - // but moving this out of line reduces the size of this function, which - // improves the common case. In micro benchmarks, this is worth about 10-15% - return read_slow(reader, value); - } - } + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (!buf.is_empty() && !(buf.end()[-1] & 0x80))) + { + return read_fast(reader, value); + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return read_slow(reader, value); + } + } - } + } - // Various functions to read/write varints. + // Various functions to read/write varints. - // Returns the number of bytes written. - template <typename T> - inline NTraits::if_unsigned<T, size_t> write(ui8* data, T value) { - return NImpl::write( - static_cast<ui64>(value), - [&](ui8 byte) { *data++ = byte; }); - } + // Returns the number of bytes written. + template <typename T> + inline NTraits::if_unsigned<T, size_t> write(ui8* data, T value) { + return NImpl::write( + static_cast<ui64>(value), + [&](ui8 byte) { *data++ = byte; }); + } - template <typename T> - inline NTraits::if_signed<T, size_t> write(ui8* data, T value) { - return NImpl::write( - static_cast<ui64>(NZigZag::encode(value)), - [&](ui8 byte) { *data++ = byte; }); - } + template <typename T> + inline NTraits::if_signed<T, size_t> write(ui8* data, T value) { + return NImpl::write( + static_cast<ui64>(NZigZag::encode(value)), + [&](ui8 byte) { *data++ = byte; }); + } - template <typename T, typename U> - inline void write(byte_writer<U>& stream, T value) { - ui8 data[NImpl::max_size<T>()]; - auto size = write(data, value); - stream.write(data, size); - } + template <typename T, typename U> + inline void write(byte_writer<U>& stream, T value) { + ui8 data[NImpl::max_size<T>()]; + auto size = write(data, value); + stream.write(data, size); + } - template <typename T, typename U> - inline NTraits::if_unsigned<T, T> read(byte_reader<U>& reader) { - auto value = T{}; - auto& buf = reader.stream().buffer(); - if (!buf.is_empty() && *buf.pos() < 0x80) { - value = *buf.pos(); - reader.advance(1); - return value; - } + template <typename T, typename U> + inline NTraits::if_unsigned<T, T> read(byte_reader<U>& reader) { + auto value = T{}; + auto& buf = reader.stream().buffer(); + if (!buf.is_empty() && *buf.pos() < 0x80) { + value = *buf.pos(); + reader.advance(1); + return value; + } if (Y_UNLIKELY(!NImpl::read_dispatch(reader, &value))) { - reader.fail("Error parsing varint value"); - } - return value; - } + reader.fail("Error parsing varint value"); + } + return value; + } - template <typename T, typename U> - inline NTraits::if_signed<T, T> read(byte_reader<U>& reader) { - return NZigZag::decode( - read<NTraits::to_unsigned<T>>(reader)); - } - } - } // namespace NDetail -} + template <typename T, typename U> + inline NTraits::if_signed<T, T> read(byte_reader<U>& reader) { + return NZigZag::decode( + read<NTraits::to_unsigned<T>>(reader)); + } + } + } // namespace NDetail +} diff --git a/library/cpp/yson_pull/detail/writer.h b/library/cpp/yson_pull/detail/writer.h index 2c9ab68199..b24b994292 100644 --- a/library/cpp/yson_pull/detail/writer.h +++ b/library/cpp/yson_pull/detail/writer.h @@ -18,439 +18,439 @@ #include <cmath> -namespace NYsonPull { - namespace NDetail { - class writer: public IConsumer { - enum class state { - maybe_key, - maybe_value, - value, - value_noattr, - before_begin, - before_end, - after_end, - }; - - byte_writer<stream_counter<false>> stream_; +namespace NYsonPull { + namespace NDetail { + class writer: public IConsumer { + enum class state { + maybe_key, + maybe_value, + value, + value_noattr, + before_begin, + before_end, + after_end, + }; + + byte_writer<stream_counter<false>> stream_; TVector<EEventType> stack_; - bool need_item_separator_ = false; - EStreamType mode_ = EStreamType::ListFragment; - state state_ = state::before_begin; + bool need_item_separator_ = false; + EStreamType mode_ = EStreamType::ListFragment; + state state_ = state::before_begin; - public: + public: void OnBeginStream() override { update_state(EEventType::BeginStream); - } + } void OnEndStream() override { update_state(EEventType::EndStream); - stream_.flush_buffer(); - } + stream_.flush_buffer(); + } void OnBeginList() override { - begin_node(); - write(NSymbol::begin_list); + begin_node(); + write(NSymbol::begin_list); update_state(EEventType::BeginList); - begin_collection(collection_type::list); - } + begin_collection(collection_type::list); + } void OnEndList() override { update_state(EEventType::EndList); - end_collection(collection_type::list); - write(NSymbol::end_list); - end_node(); - } + end_collection(collection_type::list); + write(NSymbol::end_list); + end_node(); + } void OnBeginMap() override { - begin_node(); - write(NSymbol::begin_map); + begin_node(); + write(NSymbol::begin_map); update_state(EEventType::BeginMap); - begin_collection(collection_type::map); - } + begin_collection(collection_type::map); + } void OnEndMap() override { update_state(EEventType::EndMap); - end_collection(collection_type::map); - write(NSymbol::end_map); - end_node(); - } + end_collection(collection_type::map); + write(NSymbol::end_map); + end_node(); + } void OnBeginAttributes() override { - begin_node(); - write(NSymbol::begin_attributes); + begin_node(); + write(NSymbol::begin_attributes); update_state(EEventType::BeginAttributes); - begin_collection(collection_type::attributes); - } + begin_collection(collection_type::attributes); + } void OnEndAttributes() override { update_state(EEventType::EndAttributes); - end_collection(collection_type::attributes); - write(NSymbol::end_attributes); - // no end_node - } + end_collection(collection_type::attributes); + write(NSymbol::end_attributes); + // no end_node + } void OnEntity() override { begin_node(); update_state(EEventType::Scalar); - write(NSymbol::entity); - end_node(); - } - - protected: - enum class collection_type { - list, - map, - attributes, - }; - - writer(NYsonPull::NOutput::IStream& stream, EStreamType mode) - : stream_(stream) - , mode_{mode} { - } - - bool need_item_separator() const { - return need_item_separator_; - } - void need_item_separator(bool value) { - need_item_separator_ = value; - } - - size_t depth() const { - Y_ASSERT(!stack_.empty()); - if (mode_ == EStreamType::Node) { - return stack_.size() - 1; - } else { - return stack_.size() - 2; - } - } - EStreamType mode() const { - return mode_; - } - - void write(ui8 c) { - stream_.write(c); - } - - void write(TStringBuf value) { - write_raw(value.data(), value.size()); - } - - void write_raw(const void* ptr, size_t len) { - stream_.write(static_cast<const ui8*>(ptr), len); - } - - template <typename T> - void write_varint(T value) { - NVarInt::write(stream_, value); - } - - void write_escaped_string(TStringBuf value) { - write(NSymbol::quote); - NCEscape::encode(stream_, value); - write(NSymbol::quote); - } - - void push(EEventType type) { - stack_.push_back(type); - } - - void pop(EEventType type) { - if (stack_.empty()) { - fail("Unpaired events: empty event stack"); + write(NSymbol::entity); + end_node(); + } + + protected: + enum class collection_type { + list, + map, + attributes, + }; + + writer(NYsonPull::NOutput::IStream& stream, EStreamType mode) + : stream_(stream) + , mode_{mode} { + } + + bool need_item_separator() const { + return need_item_separator_; + } + void need_item_separator(bool value) { + need_item_separator_ = value; + } + + size_t depth() const { + Y_ASSERT(!stack_.empty()); + if (mode_ == EStreamType::Node) { + return stack_.size() - 1; + } else { + return stack_.size() - 2; + } + } + EStreamType mode() const { + return mode_; + } + + void write(ui8 c) { + stream_.write(c); + } + + void write(TStringBuf value) { + write_raw(value.data(), value.size()); + } + + void write_raw(const void* ptr, size_t len) { + stream_.write(static_cast<const ui8*>(ptr), len); + } + + template <typename T> + void write_varint(T value) { + NVarInt::write(stream_, value); + } + + void write_escaped_string(TStringBuf value) { + write(NSymbol::quote); + NCEscape::encode(stream_, value); + write(NSymbol::quote); + } + + void push(EEventType type) { + stack_.push_back(type); + } + + void pop(EEventType type) { + if (stack_.empty()) { + fail("Unpaired events: empty event stack"); } - if (stack_.back() != type) { - fail("Unpaired events: expected ", type, ", got ", stack_.back()); + if (stack_.back() != type) { + fail("Unpaired events: expected ", type, ", got ", stack_.back()); } - stack_.pop_back(); - } + stack_.pop_back(); + } - void update_state(EEventType event) { - switch (state_) { - case state::before_begin: + void update_state(EEventType event) { + switch (state_) { + case state::before_begin: if (event != EEventType::BeginStream) { - fail("Expected begin_stream, got ", event); - } - begin_stream(); - return; + fail("Expected begin_stream, got ", event); + } + begin_stream(); + return; - case state::before_end: + case state::before_end: if (event != EEventType::EndStream) { - fail("Expected end_stream, got ", event); - } + fail("Expected end_stream, got ", event); + } end_stream(); return; - case state::after_end: - fail("Attempted write past stream end"); + case state::after_end: + fail("Attempted write past stream end"); - case state::maybe_key: + case state::maybe_key: if (event == EEventType::Key) { - state_ = state::value; - return; - } + state_ = state::value; + return; + } - switch (event) { + switch (event) { case EEventType::EndStream: - end_stream(); - return; + end_stream(); + return; case EEventType::EndMap: pop(EEventType::BeginMap); - next_state(); - return; + next_state(); + return; case EEventType::EndAttributes: pop(EEventType::BeginAttributes); - state_ = state::value_noattr; - return; + state_ = state::value_noattr; + return; - default: - fail("Unexpected event ", event, " in maybe_key"); - } - break; + default: + fail("Unexpected event ", event, " in maybe_key"); + } + break; - case state::maybe_value: - switch (event) { + case state::maybe_value: + switch (event) { case EEventType::EndList: pop(EEventType::BeginList); - next_state(); - return; + next_state(); + return; case EEventType::EndStream: - end_stream(); - return; + end_stream(); + return; - default: - break; - } + default: + break; + } [[fallthrough]]; - case state::value: + case state::value: if (event == EEventType::BeginAttributes) { push(EEventType::BeginAttributes); - next_state(); - return; - } + next_state(); + return; + } [[fallthrough]]; - case state::value_noattr: - switch (event) { + case state::value_noattr: + switch (event) { case EEventType::Scalar: - next_state(); - return; - + next_state(); + return; + case EEventType::BeginList: push(EEventType::BeginList); - next_state(); - return; - + next_state(); + return; + case EEventType::BeginMap: push(EEventType::BeginMap); - next_state(); - return; - - default: - fail("Unexpected event ", event, " (in value_*)"); - } - break; + next_state(); + return; + + default: + fail("Unexpected event ", event, " (in value_*)"); + } + break; } - } + } - void next_state() { - Y_ASSERT(!stack_.empty()); - switch (stack_.back()) { + void next_state() { + Y_ASSERT(!stack_.empty()); + switch (stack_.back()) { case EEventType::BeginMap: case EEventType::BeginAttributes: - state_ = state::maybe_key; - break; + state_ = state::maybe_key; + break; case EEventType::BeginList: - state_ = state::maybe_value; - break; + state_ = state::maybe_value; + break; case EEventType::BeginStream: - state_ = state::before_end; - break; + state_ = state::before_end; + break; - default: + default: Y_UNREACHABLE(); - } - } + } + } - void begin_stream() { + void begin_stream() { push(EEventType::BeginStream); - switch (mode_) { - case EStreamType::ListFragment: + switch (mode_) { + case EStreamType::ListFragment: push(EEventType::BeginList); - state_ = state::maybe_value; - break; + state_ = state::maybe_value; + break; - case EStreamType::MapFragment: + case EStreamType::MapFragment: push(EEventType::BeginMap); - state_ = state::maybe_key; - break; - - case EStreamType::Node: - state_ = state::value; - break; - } - } - - void end_stream() { - switch (mode_) { - case EStreamType::ListFragment: + state_ = state::maybe_key; + break; + + case EStreamType::Node: + state_ = state::value; + break; + } + } + + void end_stream() { + switch (mode_) { + case EStreamType::ListFragment: pop(EEventType::BeginList); - break; + break; - case EStreamType::MapFragment: + case EStreamType::MapFragment: pop(EEventType::BeginMap); - break; + break; - case EStreamType::Node: - break; - } + case EStreamType::Node: + break; + } pop(EEventType::BeginStream); - state_ = state::after_end; - } - - virtual void begin_node() { - if (need_item_separator_) { - write(NSymbol::item_separator); - } - } - - virtual void end_node() { - need_item_separator_ = true; - } - - virtual void begin_key() { - begin_node(); - } - - virtual void end_key() { - need_item_separator_ = false; - write(NSymbol::key_value_separator); - } - - virtual void begin_collection(collection_type type) { - Y_UNUSED(type); - need_item_separator_ = false; - } - - virtual void end_collection(collection_type type) { - need_item_separator_ = (type != collection_type::attributes); - } - - template <typename... Args> - ATTRIBUTE(noinline, cold) - void fail[[noreturn]](const char* msg, Args&&... args) { - auto formatted_message = format_string( - msg, - std::forward<Args>(args)...); - throw NException::TBadOutput( - formatted_message, - stream_.counter().info()); - } - }; + state_ = state::after_end; + } + + virtual void begin_node() { + if (need_item_separator_) { + write(NSymbol::item_separator); + } + } + + virtual void end_node() { + need_item_separator_ = true; + } + + virtual void begin_key() { + begin_node(); + } + + virtual void end_key() { + need_item_separator_ = false; + write(NSymbol::key_value_separator); + } + + virtual void begin_collection(collection_type type) { + Y_UNUSED(type); + need_item_separator_ = false; + } + + virtual void end_collection(collection_type type) { + need_item_separator_ = (type != collection_type::attributes); + } + + template <typename... Args> + ATTRIBUTE(noinline, cold) + void fail[[noreturn]](const char* msg, Args&&... args) { + auto formatted_message = format_string( + msg, + std::forward<Args>(args)...); + throw NException::TBadOutput( + formatted_message, + stream_.counter().info()); + } + }; class TBinaryWriterImpl final: public writer { - public: + public: TBinaryWriterImpl(NYsonPull::NOutput::IStream& stream, EStreamType mode) - : writer(stream, mode) - { - } + : writer(stream, mode) + { + } void OnScalarBoolean(bool value) override { update_state(EEventType::Scalar); - begin_node(); - write(value ? NSymbol::true_marker : NSymbol::false_marker); - end_node(); - } + begin_node(); + write(value ? NSymbol::true_marker : NSymbol::false_marker); + end_node(); + } void OnScalarInt64(i64 value) override { update_state(EEventType::Scalar); - begin_node(); - write(NSymbol::int64_marker); - write_varint(value); - end_node(); - } + begin_node(); + write(NSymbol::int64_marker); + write_varint(value); + end_node(); + } void OnScalarUInt64(ui64 value) override { update_state(EEventType::Scalar); - begin_node(); - write(NSymbol::uint64_marker); - write_varint(value); - end_node(); - } + begin_node(); + write(NSymbol::uint64_marker); + write_varint(value); + end_node(); + } void OnScalarFloat64(double value) override { update_state(EEventType::Scalar); - begin_node(); - write(NSymbol::double_marker); - write_raw(&value, sizeof value); - end_node(); - } + begin_node(); + write(NSymbol::double_marker); + write_raw(&value, sizeof value); + end_node(); + } void OnScalarString(TStringBuf value) override { update_state(EEventType::Scalar); - begin_node(); - write(NSymbol::string_marker); - write_varint(static_cast<i32>(value.size())); - write_raw(value.data(), value.size()); - end_node(); - } + begin_node(); + write(NSymbol::string_marker); + write_varint(static_cast<i32>(value.size())); + write_raw(value.data(), value.size()); + end_node(); + } void OnKey(TStringBuf name) override { update_state(EEventType::Key); - begin_key(); - write(NSymbol::string_marker); - write_varint(static_cast<i32>(name.size())); - write_raw(name.data(), name.size()); - end_key(); - } - }; + begin_key(); + write(NSymbol::string_marker); + write_varint(static_cast<i32>(name.size())); + write_raw(name.data(), name.size()); + end_key(); + } + }; class TTextWriterImpl: public writer { - public: + public: TTextWriterImpl(NYsonPull::NOutput::IStream& stream, EStreamType mode) - : writer(stream, mode) - { - } + : writer(stream, mode) + { + } void OnScalarBoolean(bool value) override { update_state(EEventType::Scalar); - begin_node(); + begin_node(); write(value ? percent_scalar::true_literal : percent_scalar::false_literal); - end_node(); - } + end_node(); + } void OnScalarInt64(i64 value) override { update_state(EEventType::Scalar); - char buf[32]; + char buf[32]; auto len = ::snprintf(buf, sizeof(buf), "%" PRIi64, value); - begin_node(); - write_raw(buf, len); - end_node(); - } + begin_node(); + write_raw(buf, len); + end_node(); + } void OnScalarUInt64(ui64 value) override { update_state(EEventType::Scalar); - char buf[32]; + char buf[32]; auto len = ::snprintf(buf, sizeof(buf), "%" PRIu64, value); - begin_node(); - write_raw(buf, len); - write('u'); - end_node(); - } + begin_node(); + write_raw(buf, len); + write('u'); + end_node(); + } void OnScalarFloat64(double value) override { update_state(EEventType::Scalar); @@ -469,98 +469,98 @@ namespace NYsonPull { write(percent_scalar::negative_inf_literal); } - end_node(); - } + end_node(); + } void OnScalarString(TStringBuf value) override { update_state(EEventType::Scalar); - begin_node(); - write_escaped_string(value); - end_node(); - } + begin_node(); + write_escaped_string(value); + end_node(); + } void OnKey(TStringBuf name) override { update_state(EEventType::Key); - begin_key(); - write_escaped_string(name); - end_key(); - } - - protected: - void begin_node() override { - if (need_item_separator()) { - write(NSymbol::item_separator); - write(' '); - } - } - - void end_node() override { - if (mode() != EStreamType::Node && depth() == 0) { - write(NSymbol::item_separator); - write('\n'); - need_item_separator(false); - } else { - writer::end_node(); - } - } - - void end_key() override { - write(' '); - writer::end_key(); - write(' '); - } - }; + begin_key(); + write_escaped_string(name); + end_key(); + } + + protected: + void begin_node() override { + if (need_item_separator()) { + write(NSymbol::item_separator); + write(' '); + } + } + + void end_node() override { + if (mode() != EStreamType::Node && depth() == 0) { + write(NSymbol::item_separator); + write('\n'); + need_item_separator(false); + } else { + writer::end_node(); + } + } + + void end_key() override { + write(' '); + writer::end_key(); + write(' '); + } + }; class TPrettyWriterImpl final: public TTextWriterImpl { - size_t indent_size_; + size_t indent_size_; - public: + public: TPrettyWriterImpl( - NYsonPull::NOutput::IStream& stream, - EStreamType mode, - size_t indent_size) + NYsonPull::NOutput::IStream& stream, + EStreamType mode, + size_t indent_size) : TTextWriterImpl(stream, mode) - , indent_size_{indent_size} { - } - - protected: - void begin_node() override { - if (need_item_separator()) { - write(NSymbol::item_separator); - newline(); - } - } - - void begin_collection(collection_type type) override { + , indent_size_{indent_size} { + } + + protected: + void begin_node() override { + if (need_item_separator()) { + write(NSymbol::item_separator); + newline(); + } + } + + void begin_collection(collection_type type) override { TTextWriterImpl::begin_collection(type); - newline(); - } + newline(); + } - void end_collection(collection_type type) override { + void end_collection(collection_type type) override { TTextWriterImpl::end_collection(type); - newline(); - } - - void newline() { - write('\n'); - indent(depth()); - } - - void indent(size_t count) { - for (size_t i = 0; i < count * indent_size_; ++i) { - write(' '); - } - } - }; - - template <typename T, typename... Args> - NYsonPull::TWriter make_writer( - THolder<NYsonPull::NOutput::IStream> stream, - Args&&... args) { - auto impl = MakeHolder<T>(*stream, std::forward<Args>(args)...); - return NYsonPull::TWriter(std::move(stream), std::move(impl)); + newline(); + } + + void newline() { + write('\n'); + indent(depth()); + } + + void indent(size_t count) { + for (size_t i = 0; i < count * indent_size_; ++i) { + write(' '); + } + } + }; + + template <typename T, typename... Args> + NYsonPull::TWriter make_writer( + THolder<NYsonPull::NOutput::IStream> stream, + Args&&... args) { + auto impl = MakeHolder<T>(*stream, std::forward<Args>(args)...); + return NYsonPull::TWriter(std::move(stream), std::move(impl)); } } -} +} diff --git a/library/cpp/yson_pull/detail/zigzag.h b/library/cpp/yson_pull/detail/zigzag.h index 4fca549f49..98fcac0e9f 100644 --- a/library/cpp/yson_pull/detail/zigzag.h +++ b/library/cpp/yson_pull/detail/zigzag.h @@ -2,23 +2,23 @@ #include "traits.h" -namespace NYsonPull { - namespace NDetail { - namespace NZigZag { - //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x| +namespace NYsonPull { + namespace NDetail { + namespace NZigZag { + //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x| template <typename TSigned> inline NTraits::to_unsigned<TSigned> encode(TSigned x) { using TUnsigned = NTraits::to_unsigned<TSigned>; constexpr auto rshift = sizeof(TSigned) * 8 - 1; return (static_cast<TUnsigned>(x) << 1) ^ static_cast<TUnsigned>(x >> rshift); - } + } template <typename TUnsigned> inline NTraits::to_signed<TUnsigned> decode(TUnsigned x) { using TSigned = NTraits::to_signed<TUnsigned>; return static_cast<TSigned>(x >> 1) ^ -static_cast<TSigned>(x & 1); - } - } - } // namespace NDetail -} + } + } + } // namespace NDetail +} |