diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/yson_pull/detail/lexer_base.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/yson_pull/detail/lexer_base.h')
-rw-r--r-- | library/cpp/yson_pull/detail/lexer_base.h | 518 |
1 files changed, 259 insertions, 259 deletions
diff --git a/library/cpp/yson_pull/detail/lexer_base.h b/library/cpp/yson_pull/detail/lexer_base.h index 572bdb3d18..54454f8c6f 100644 --- a/library/cpp/yson_pull/detail/lexer_base.h +++ b/library/cpp/yson_pull/detail/lexer_base.h @@ -12,209 +12,209 @@ #include <util/generic/vector.h> #include <util/string/cast.h> -namespace NYsonPull { - namespace NDetail { - template <bool EnableLinePositionInfo> - class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> { - using Base = byte_reader< - stream_counter<EnableLinePositionInfo>>; +namespace NYsonPull { + namespace NDetail { + template <bool EnableLinePositionInfo> + class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> { + using Base = byte_reader< + stream_counter<EnableLinePositionInfo>>; TVector<ui8> token_buffer_; - TMaybe<size_t> memory_limit_; - - public: - lexer_base( - NYsonPull::NInput::IStream& buffer, - TMaybe<size_t> memory_limit) - : Base(buffer) - , memory_limit_{memory_limit} { - } - - ATTRIBUTE(noinline, hot) - ui8 skip_space_and_get_byte() { - auto& buf = Base::stream().buffer(); + TMaybe<size_t> memory_limit_; + + public: + lexer_base( + NYsonPull::NInput::IStream& buffer, + TMaybe<size_t> memory_limit) + : Base(buffer) + , memory_limit_{memory_limit} { + } + + ATTRIBUTE(noinline, hot) + ui8 skip_space_and_get_byte() { + auto& buf = Base::stream().buffer(); if (Y_LIKELY(!buf.is_empty())) { - auto ch = *buf.pos(); + auto ch = *buf.pos(); if (Y_LIKELY(!is_space(ch))) { - return ch; - } - } - return skip_space_and_get_byte_fallback(); - } - - ATTRIBUTE(hot) - ui8 get_byte() { - auto& buf = Base::stream().buffer(); + return ch; + } + } + return skip_space_and_get_byte_fallback(); + } + + ATTRIBUTE(hot) + ui8 get_byte() { + auto& buf = Base::stream().buffer(); if (Y_LIKELY(!buf.is_empty())) { - return *buf.pos(); - } - return Base::get_byte(); + return *buf.pos(); + } + return Base::get_byte(); } - number read_numeric() { - token_buffer_.clear(); - auto type = number_type::int64; - while (true) { + number read_numeric() { + token_buffer_.clear(); + auto type = number_type::int64; + while (true) { auto ch = this->Base::template get_byte<true>(); - if (isdigit(ch) || ch == '+' || ch == '-') { - token_buffer_.push_back(ch); - } else if (ch == '.' || ch == 'e' || ch == 'E') { - token_buffer_.push_back(ch); - type = number_type::float64; - } else if (ch == 'u') { - token_buffer_.push_back(ch); - type = number_type::uint64; + if (isdigit(ch) || ch == '+' || ch == '-') { + token_buffer_.push_back(ch); + } else if (ch == '.' || ch == 'e' || ch == 'E') { + token_buffer_.push_back(ch); + type = number_type::float64; + } else if (ch == 'u') { + token_buffer_.push_back(ch); + type = number_type::uint64; } else if (Y_UNLIKELY(isalpha(ch))) { - COLD_BLOCK_BYVALUE - Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal"); - COLD_BLOCK_END - } else { - break; - } - check_memory_limit(); - Base::advance(1); - } - - auto str = token_buffer(); - try { - switch (type) { - case number_type::float64: - return FromString<double>(str); - case number_type::int64: - return FromString<i64>(str); - case number_type::uint64: - str.Chop(1); // 'u' suffix - return FromString<ui64>(str); - } + COLD_BLOCK_BYVALUE + Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal"); + COLD_BLOCK_END + } else { + break; + } + check_memory_limit(); + Base::advance(1); + } + + auto str = token_buffer(); + try { + switch (type) { + case number_type::float64: + return FromString<double>(str); + case number_type::int64: + return FromString<i64>(str); + case number_type::uint64: + str.Chop(1); // 'u' suffix + return FromString<ui64>(str); + } Y_UNREACHABLE(); - } catch (const std::exception& err) { - Base::fail(err.what()); - } + } catch (const std::exception& err) { + Base::fail(err.what()); + } } - TStringBuf read_quoted_string() { - auto count_trailing_slashes = [](ui8* begin, ui8* end) { - auto count = size_t{0}; - if (begin < end) { - for (auto p = end - 1; p >= begin && *p == '\\'; --p) { - ++count; - } - } - return count; - }; - - token_buffer_.clear(); - auto& buf = Base::stream().buffer(); - while (true) { + TStringBuf read_quoted_string() { + auto count_trailing_slashes = [](ui8* begin, ui8* end) { + auto count = size_t{0}; + if (begin < end) { + for (auto p = end - 1; p >= begin && *p == '\\'; --p) { + ++count; + } + } + return count; + }; + + token_buffer_.clear(); + auto& buf = Base::stream().buffer(); + while (true) { this->Base::template fill_buffer<false>(); - auto* quote = reinterpret_cast<const ui8*>( - ::memchr(buf.pos(), '"', buf.available())); - if (quote == nullptr) { - token_buffer_.insert( - token_buffer_.end(), - buf.pos(), - buf.end()); - Base::advance(buf.available()); - continue; - } - - token_buffer_.insert( - token_buffer_.end(), - buf.pos(), - quote); - Base::advance(quote - buf.pos() + 1); // +1 for the quote itself - - // We must count the number of '\' at the end of StringValue - // to check if it's not \" - int slash_count = count_trailing_slashes( - token_buffer_.data(), - token_buffer_.data() + token_buffer_.size()); - if (slash_count % 2 == 0) { - break; - } else { - token_buffer_.push_back('"'); - } - check_memory_limit(); + auto* quote = reinterpret_cast<const ui8*>( + ::memchr(buf.pos(), '"', buf.available())); + if (quote == nullptr) { + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + buf.end()); + Base::advance(buf.available()); + continue; + } + + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + quote); + Base::advance(quote - buf.pos() + 1); // +1 for the quote itself + + // We must count the number of '\' at the end of StringValue + // to check if it's not \" + int slash_count = count_trailing_slashes( + token_buffer_.data(), + token_buffer_.data() + token_buffer_.size()); + if (slash_count % 2 == 0) { + break; + } else { + token_buffer_.push_back('"'); + } + check_memory_limit(); } - - NCEscape::decode_inplace(token_buffer_); - return token_buffer(); + + NCEscape::decode_inplace(token_buffer_); + return token_buffer(); } - TStringBuf read_unquoted_string() { - token_buffer_.clear(); - while (true) { + TStringBuf read_unquoted_string() { + token_buffer_.clear(); + while (true) { auto ch = this->Base::template get_byte<true>(); - if (isalpha(ch) || isdigit(ch) || - ch == '_' || ch == '-' || ch == '%' || ch == '.') { - token_buffer_.push_back(ch); - } else { - break; - } - check_memory_limit(); - Base::advance(1); - } - return token_buffer(); + if (isalpha(ch) || isdigit(ch) || + ch == '_' || ch == '-' || ch == '%' || ch == '.') { + token_buffer_.push_back(ch); + } else { + break; + } + check_memory_limit(); + Base::advance(1); + } + return token_buffer(); } - ATTRIBUTE(noinline, hot) - TStringBuf read_binary_string() { - auto slength = NVarInt::read<i32>(*this); + ATTRIBUTE(noinline, hot) + TStringBuf read_binary_string() { + auto slength = NVarInt::read<i32>(*this); if (Y_UNLIKELY(slength < 0)) { - COLD_BLOCK_BYVALUE - Base::fail("Negative binary string literal length ", slength); - COLD_BLOCK_END - } - auto length = static_cast<ui32>(slength); + COLD_BLOCK_BYVALUE + Base::fail("Negative binary string literal length ", slength); + COLD_BLOCK_END + } + auto length = static_cast<ui32>(slength); - auto& buf = Base::stream().buffer(); + auto& buf = Base::stream().buffer(); if (Y_LIKELY(buf.available() >= length)) { - auto result = TStringBuf{ - reinterpret_cast<const char*>(buf.pos()), - length}; - Base::advance(length); - return result; - } else { // reading in Buffer - return read_binary_string_fallback(length); - } + auto result = TStringBuf{ + reinterpret_cast<const char*>(buf.pos()), + length}; + Base::advance(length); + return result; + } else { // reading in Buffer + return read_binary_string_fallback(length); + } } - ATTRIBUTE(noinline) - TStringBuf read_binary_string_fallback(size_t length) { - auto& buf = Base::stream().buffer(); - auto needToRead = length; - token_buffer_.clear(); - while (needToRead) { + ATTRIBUTE(noinline) + TStringBuf read_binary_string_fallback(size_t length) { + auto& buf = Base::stream().buffer(); + auto needToRead = length; + token_buffer_.clear(); + while (needToRead) { this->Base::template fill_buffer<false>(); - auto chunk_size = std::min(needToRead, buf.available()); - - token_buffer_.insert( - token_buffer_.end(), - buf.pos(), - buf.pos() + chunk_size); - check_memory_limit(); - needToRead -= chunk_size; - Base::advance(chunk_size); - } - return token_buffer(); + auto chunk_size = std::min(needToRead, buf.available()); + + token_buffer_.insert( + token_buffer_.end(), + buf.pos(), + buf.pos() + chunk_size); + check_memory_limit(); + needToRead -= chunk_size; + Base::advance(chunk_size); + } + return token_buffer(); } percent_scalar read_percent_scalar() { auto throw_incorrect_percent_scalar = [&]() { Base::fail("Incorrect %-literal prefix ", NCEscape::quote(token_buffer())); - }; + }; auto assert_literal = [&](TStringBuf literal) -> void { for (size_t i = 2; i < literal.size(); ++i) { token_buffer_.push_back(this->Base::template get_byte<false>()); - Base::advance(1); + Base::advance(1); if (Y_UNLIKELY(token_buffer_.back() != literal[i])) { throw_incorrect_percent_scalar(); - } - } - }; + } + } + }; - token_buffer_.clear(); + token_buffer_.clear(); token_buffer_.push_back(this->Base::template get_byte<false>()); Base::advance(1); @@ -237,107 +237,107 @@ namespace NYsonPull { default: throw_incorrect_percent_scalar(); } - + Y_UNREACHABLE(); } - i64 read_binary_int64() { - return NVarInt::read<i64>(*this); - } - - ui64 read_binary_uint64() { - return NVarInt::read<ui64>(*this); - } - - double read_binary_double() { - union { - double as_double; - ui8 as_bytes[sizeof(double)]; - } data; - static_assert(sizeof(data) == sizeof(double), "bad union size"); - - auto needToRead = sizeof(double); - - auto& buf = Base::stream().buffer(); - while (needToRead != 0) { - Base::fill_buffer(); - - auto chunk_size = std::min(needToRead, buf.available()); - if (chunk_size == 0) { - Base::fail("Error parsing binary double literal"); - } - std::copy( - buf.pos(), - buf.pos() + chunk_size, - data.as_bytes + (sizeof(double) - needToRead)); - needToRead -= chunk_size; - Base::advance(chunk_size); - } - return data.as_double; - } - - private: - static bool is_space(ui8 ch) { - static const ui8 lookupTable[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return lookupTable[ch]; + i64 read_binary_int64() { + return NVarInt::read<i64>(*this); + } + + ui64 read_binary_uint64() { + return NVarInt::read<ui64>(*this); + } + + double read_binary_double() { + union { + double as_double; + ui8 as_bytes[sizeof(double)]; + } data; + static_assert(sizeof(data) == sizeof(double), "bad union size"); + + auto needToRead = sizeof(double); + + auto& buf = Base::stream().buffer(); + while (needToRead != 0) { + Base::fill_buffer(); + + auto chunk_size = std::min(needToRead, buf.available()); + if (chunk_size == 0) { + Base::fail("Error parsing binary double literal"); + } + std::copy( + buf.pos(), + buf.pos() + chunk_size, + data.as_bytes + (sizeof(double) - needToRead)); + needToRead -= chunk_size; + Base::advance(chunk_size); + } + return data.as_double; } - ATTRIBUTE(noinline, cold) - ui8 skip_space_and_get_byte_fallback() { - auto& buf = Base::stream().buffer(); - while (true) { - // FIXME - if (buf.is_empty()) { - if (Base::stream().at_end()) { - return '\0'; - } - Base::fill_buffer(); - } else { - if (!is_space(*buf.pos())) { - break; - } - Base::advance(1); - } + private: + static bool is_space(ui8 ch) { + static const ui8 lookupTable[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return lookupTable[ch]; + } + + ATTRIBUTE(noinline, cold) + ui8 skip_space_and_get_byte_fallback() { + auto& buf = Base::stream().buffer(); + while (true) { + // FIXME + if (buf.is_empty()) { + if (Base::stream().at_end()) { + return '\0'; + } + Base::fill_buffer(); + } else { + if (!is_space(*buf.pos())) { + break; + } + Base::advance(1); + } } - return Base::get_byte(); - } - - void check_memory_limit() { + return Base::get_byte(); + } + + void check_memory_limit() { if (Y_UNLIKELY(memory_limit_ && token_buffer_.capacity() > *memory_limit_)) { - COLD_BLOCK_BYVALUE - Base::fail( - "Memory limit exceeded while parsing YSON stream: " - "allocated ", - token_buffer_.capacity(), - ", limit ", *memory_limit_); - COLD_BLOCK_END + COLD_BLOCK_BYVALUE + Base::fail( + "Memory limit exceeded while parsing YSON stream: " + "allocated ", + token_buffer_.capacity(), + ", limit ", *memory_limit_); + COLD_BLOCK_END } } - TStringBuf token_buffer() const { - auto* begin = reinterpret_cast<const char*>(token_buffer_.data()); - return {begin, token_buffer_.size()}; - } - }; + TStringBuf token_buffer() const { + auto* begin = reinterpret_cast<const char*>(token_buffer_.data()); + return {begin, token_buffer_.size()}; + } + }; } -} +} |