aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yson_pull/detail/lexer_base.h
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/yson_pull/detail/lexer_base.h
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/yson_pull/detail/lexer_base.h')
-rw-r--r--library/cpp/yson_pull/detail/lexer_base.h518
1 files changed, 259 insertions, 259 deletions
diff --git a/library/cpp/yson_pull/detail/lexer_base.h b/library/cpp/yson_pull/detail/lexer_base.h
index 572bdb3d18..54454f8c6f 100644
--- a/library/cpp/yson_pull/detail/lexer_base.h
+++ b/library/cpp/yson_pull/detail/lexer_base.h
@@ -12,209 +12,209 @@
#include <util/generic/vector.h>
#include <util/string/cast.h>
-namespace NYsonPull {
- namespace NDetail {
- template <bool EnableLinePositionInfo>
- class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> {
- using Base = byte_reader<
- stream_counter<EnableLinePositionInfo>>;
+namespace NYsonPull {
+ namespace NDetail {
+ template <bool EnableLinePositionInfo>
+ class lexer_base: public byte_reader<stream_counter<EnableLinePositionInfo>> {
+ using Base = byte_reader<
+ stream_counter<EnableLinePositionInfo>>;
TVector<ui8> token_buffer_;
- TMaybe<size_t> memory_limit_;
-
- public:
- lexer_base(
- NYsonPull::NInput::IStream& buffer,
- TMaybe<size_t> memory_limit)
- : Base(buffer)
- , memory_limit_{memory_limit} {
- }
-
- ATTRIBUTE(noinline, hot)
- ui8 skip_space_and_get_byte() {
- auto& buf = Base::stream().buffer();
+ TMaybe<size_t> memory_limit_;
+
+ public:
+ lexer_base(
+ NYsonPull::NInput::IStream& buffer,
+ TMaybe<size_t> memory_limit)
+ : Base(buffer)
+ , memory_limit_{memory_limit} {
+ }
+
+ ATTRIBUTE(noinline, hot)
+ ui8 skip_space_and_get_byte() {
+ auto& buf = Base::stream().buffer();
if (Y_LIKELY(!buf.is_empty())) {
- auto ch = *buf.pos();
+ auto ch = *buf.pos();
if (Y_LIKELY(!is_space(ch))) {
- return ch;
- }
- }
- return skip_space_and_get_byte_fallback();
- }
-
- ATTRIBUTE(hot)
- ui8 get_byte() {
- auto& buf = Base::stream().buffer();
+ return ch;
+ }
+ }
+ return skip_space_and_get_byte_fallback();
+ }
+
+ ATTRIBUTE(hot)
+ ui8 get_byte() {
+ auto& buf = Base::stream().buffer();
if (Y_LIKELY(!buf.is_empty())) {
- return *buf.pos();
- }
- return Base::get_byte();
+ return *buf.pos();
+ }
+ return Base::get_byte();
}
- number read_numeric() {
- token_buffer_.clear();
- auto type = number_type::int64;
- while (true) {
+ number read_numeric() {
+ token_buffer_.clear();
+ auto type = number_type::int64;
+ while (true) {
auto ch = this->Base::template get_byte<true>();
- if (isdigit(ch) || ch == '+' || ch == '-') {
- token_buffer_.push_back(ch);
- } else if (ch == '.' || ch == 'e' || ch == 'E') {
- token_buffer_.push_back(ch);
- type = number_type::float64;
- } else if (ch == 'u') {
- token_buffer_.push_back(ch);
- type = number_type::uint64;
+ if (isdigit(ch) || ch == '+' || ch == '-') {
+ token_buffer_.push_back(ch);
+ } else if (ch == '.' || ch == 'e' || ch == 'E') {
+ token_buffer_.push_back(ch);
+ type = number_type::float64;
+ } else if (ch == 'u') {
+ token_buffer_.push_back(ch);
+ type = number_type::uint64;
} else if (Y_UNLIKELY(isalpha(ch))) {
- COLD_BLOCK_BYVALUE
- Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal");
- COLD_BLOCK_END
- } else {
- break;
- }
- check_memory_limit();
- Base::advance(1);
- }
-
- auto str = token_buffer();
- try {
- switch (type) {
- case number_type::float64:
- return FromString<double>(str);
- case number_type::int64:
- return FromString<i64>(str);
- case number_type::uint64:
- str.Chop(1); // 'u' suffix
- return FromString<ui64>(str);
- }
+ COLD_BLOCK_BYVALUE
+ Base::fail("Unexpected ", NCEscape::quote(ch), " in numeric literal");
+ COLD_BLOCK_END
+ } else {
+ break;
+ }
+ check_memory_limit();
+ Base::advance(1);
+ }
+
+ auto str = token_buffer();
+ try {
+ switch (type) {
+ case number_type::float64:
+ return FromString<double>(str);
+ case number_type::int64:
+ return FromString<i64>(str);
+ case number_type::uint64:
+ str.Chop(1); // 'u' suffix
+ return FromString<ui64>(str);
+ }
Y_UNREACHABLE();
- } catch (const std::exception& err) {
- Base::fail(err.what());
- }
+ } catch (const std::exception& err) {
+ Base::fail(err.what());
+ }
}
- TStringBuf read_quoted_string() {
- auto count_trailing_slashes = [](ui8* begin, ui8* end) {
- auto count = size_t{0};
- if (begin < end) {
- for (auto p = end - 1; p >= begin && *p == '\\'; --p) {
- ++count;
- }
- }
- return count;
- };
-
- token_buffer_.clear();
- auto& buf = Base::stream().buffer();
- while (true) {
+ TStringBuf read_quoted_string() {
+ auto count_trailing_slashes = [](ui8* begin, ui8* end) {
+ auto count = size_t{0};
+ if (begin < end) {
+ for (auto p = end - 1; p >= begin && *p == '\\'; --p) {
+ ++count;
+ }
+ }
+ return count;
+ };
+
+ token_buffer_.clear();
+ auto& buf = Base::stream().buffer();
+ while (true) {
this->Base::template fill_buffer<false>();
- auto* quote = reinterpret_cast<const ui8*>(
- ::memchr(buf.pos(), '"', buf.available()));
- if (quote == nullptr) {
- token_buffer_.insert(
- token_buffer_.end(),
- buf.pos(),
- buf.end());
- Base::advance(buf.available());
- continue;
- }
-
- token_buffer_.insert(
- token_buffer_.end(),
- buf.pos(),
- quote);
- Base::advance(quote - buf.pos() + 1); // +1 for the quote itself
-
- // We must count the number of '\' at the end of StringValue
- // to check if it's not \"
- int slash_count = count_trailing_slashes(
- token_buffer_.data(),
- token_buffer_.data() + token_buffer_.size());
- if (slash_count % 2 == 0) {
- break;
- } else {
- token_buffer_.push_back('"');
- }
- check_memory_limit();
+ auto* quote = reinterpret_cast<const ui8*>(
+ ::memchr(buf.pos(), '"', buf.available()));
+ if (quote == nullptr) {
+ token_buffer_.insert(
+ token_buffer_.end(),
+ buf.pos(),
+ buf.end());
+ Base::advance(buf.available());
+ continue;
+ }
+
+ token_buffer_.insert(
+ token_buffer_.end(),
+ buf.pos(),
+ quote);
+ Base::advance(quote - buf.pos() + 1); // +1 for the quote itself
+
+ // We must count the number of '\' at the end of StringValue
+ // to check if it's not \"
+ int slash_count = count_trailing_slashes(
+ token_buffer_.data(),
+ token_buffer_.data() + token_buffer_.size());
+ if (slash_count % 2 == 0) {
+ break;
+ } else {
+ token_buffer_.push_back('"');
+ }
+ check_memory_limit();
}
-
- NCEscape::decode_inplace(token_buffer_);
- return token_buffer();
+
+ NCEscape::decode_inplace(token_buffer_);
+ return token_buffer();
}
- TStringBuf read_unquoted_string() {
- token_buffer_.clear();
- while (true) {
+ TStringBuf read_unquoted_string() {
+ token_buffer_.clear();
+ while (true) {
auto ch = this->Base::template get_byte<true>();
- if (isalpha(ch) || isdigit(ch) ||
- ch == '_' || ch == '-' || ch == '%' || ch == '.') {
- token_buffer_.push_back(ch);
- } else {
- break;
- }
- check_memory_limit();
- Base::advance(1);
- }
- return token_buffer();
+ if (isalpha(ch) || isdigit(ch) ||
+ ch == '_' || ch == '-' || ch == '%' || ch == '.') {
+ token_buffer_.push_back(ch);
+ } else {
+ break;
+ }
+ check_memory_limit();
+ Base::advance(1);
+ }
+ return token_buffer();
}
- ATTRIBUTE(noinline, hot)
- TStringBuf read_binary_string() {
- auto slength = NVarInt::read<i32>(*this);
+ ATTRIBUTE(noinline, hot)
+ TStringBuf read_binary_string() {
+ auto slength = NVarInt::read<i32>(*this);
if (Y_UNLIKELY(slength < 0)) {
- COLD_BLOCK_BYVALUE
- Base::fail("Negative binary string literal length ", slength);
- COLD_BLOCK_END
- }
- auto length = static_cast<ui32>(slength);
+ COLD_BLOCK_BYVALUE
+ Base::fail("Negative binary string literal length ", slength);
+ COLD_BLOCK_END
+ }
+ auto length = static_cast<ui32>(slength);
- auto& buf = Base::stream().buffer();
+ auto& buf = Base::stream().buffer();
if (Y_LIKELY(buf.available() >= length)) {
- auto result = TStringBuf{
- reinterpret_cast<const char*>(buf.pos()),
- length};
- Base::advance(length);
- return result;
- } else { // reading in Buffer
- return read_binary_string_fallback(length);
- }
+ auto result = TStringBuf{
+ reinterpret_cast<const char*>(buf.pos()),
+ length};
+ Base::advance(length);
+ return result;
+ } else { // reading in Buffer
+ return read_binary_string_fallback(length);
+ }
}
- ATTRIBUTE(noinline)
- TStringBuf read_binary_string_fallback(size_t length) {
- auto& buf = Base::stream().buffer();
- auto needToRead = length;
- token_buffer_.clear();
- while (needToRead) {
+ ATTRIBUTE(noinline)
+ TStringBuf read_binary_string_fallback(size_t length) {
+ auto& buf = Base::stream().buffer();
+ auto needToRead = length;
+ token_buffer_.clear();
+ while (needToRead) {
this->Base::template fill_buffer<false>();
- auto chunk_size = std::min(needToRead, buf.available());
-
- token_buffer_.insert(
- token_buffer_.end(),
- buf.pos(),
- buf.pos() + chunk_size);
- check_memory_limit();
- needToRead -= chunk_size;
- Base::advance(chunk_size);
- }
- return token_buffer();
+ auto chunk_size = std::min(needToRead, buf.available());
+
+ token_buffer_.insert(
+ token_buffer_.end(),
+ buf.pos(),
+ buf.pos() + chunk_size);
+ check_memory_limit();
+ needToRead -= chunk_size;
+ Base::advance(chunk_size);
+ }
+ return token_buffer();
}
percent_scalar read_percent_scalar() {
auto throw_incorrect_percent_scalar = [&]() {
Base::fail("Incorrect %-literal prefix ", NCEscape::quote(token_buffer()));
- };
+ };
auto assert_literal = [&](TStringBuf literal) -> void {
for (size_t i = 2; i < literal.size(); ++i) {
token_buffer_.push_back(this->Base::template get_byte<false>());
- Base::advance(1);
+ Base::advance(1);
if (Y_UNLIKELY(token_buffer_.back() != literal[i])) {
throw_incorrect_percent_scalar();
- }
- }
- };
+ }
+ }
+ };
- token_buffer_.clear();
+ token_buffer_.clear();
token_buffer_.push_back(this->Base::template get_byte<false>());
Base::advance(1);
@@ -237,107 +237,107 @@ namespace NYsonPull {
default:
throw_incorrect_percent_scalar();
}
-
+
Y_UNREACHABLE();
}
- i64 read_binary_int64() {
- return NVarInt::read<i64>(*this);
- }
-
- ui64 read_binary_uint64() {
- return NVarInt::read<ui64>(*this);
- }
-
- double read_binary_double() {
- union {
- double as_double;
- ui8 as_bytes[sizeof(double)];
- } data;
- static_assert(sizeof(data) == sizeof(double), "bad union size");
-
- auto needToRead = sizeof(double);
-
- auto& buf = Base::stream().buffer();
- while (needToRead != 0) {
- Base::fill_buffer();
-
- auto chunk_size = std::min(needToRead, buf.available());
- if (chunk_size == 0) {
- Base::fail("Error parsing binary double literal");
- }
- std::copy(
- buf.pos(),
- buf.pos() + chunk_size,
- data.as_bytes + (sizeof(double) - needToRead));
- needToRead -= chunk_size;
- Base::advance(chunk_size);
- }
- return data.as_double;
- }
-
- private:
- static bool is_space(ui8 ch) {
- static const ui8 lookupTable[] =
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
- return lookupTable[ch];
+ i64 read_binary_int64() {
+ return NVarInt::read<i64>(*this);
+ }
+
+ ui64 read_binary_uint64() {
+ return NVarInt::read<ui64>(*this);
+ }
+
+ double read_binary_double() {
+ union {
+ double as_double;
+ ui8 as_bytes[sizeof(double)];
+ } data;
+ static_assert(sizeof(data) == sizeof(double), "bad union size");
+
+ auto needToRead = sizeof(double);
+
+ auto& buf = Base::stream().buffer();
+ while (needToRead != 0) {
+ Base::fill_buffer();
+
+ auto chunk_size = std::min(needToRead, buf.available());
+ if (chunk_size == 0) {
+ Base::fail("Error parsing binary double literal");
+ }
+ std::copy(
+ buf.pos(),
+ buf.pos() + chunk_size,
+ data.as_bytes + (sizeof(double) - needToRead));
+ needToRead -= chunk_size;
+ Base::advance(chunk_size);
+ }
+ return data.as_double;
}
- ATTRIBUTE(noinline, cold)
- ui8 skip_space_and_get_byte_fallback() {
- auto& buf = Base::stream().buffer();
- while (true) {
- // FIXME
- if (buf.is_empty()) {
- if (Base::stream().at_end()) {
- return '\0';
- }
- Base::fill_buffer();
- } else {
- if (!is_space(*buf.pos())) {
- break;
- }
- Base::advance(1);
- }
+ private:
+ static bool is_space(ui8 ch) {
+ static const ui8 lookupTable[] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ return lookupTable[ch];
+ }
+
+ ATTRIBUTE(noinline, cold)
+ ui8 skip_space_and_get_byte_fallback() {
+ auto& buf = Base::stream().buffer();
+ while (true) {
+ // FIXME
+ if (buf.is_empty()) {
+ if (Base::stream().at_end()) {
+ return '\0';
+ }
+ Base::fill_buffer();
+ } else {
+ if (!is_space(*buf.pos())) {
+ break;
+ }
+ Base::advance(1);
+ }
}
- return Base::get_byte();
- }
-
- void check_memory_limit() {
+ return Base::get_byte();
+ }
+
+ void check_memory_limit() {
if (Y_UNLIKELY(memory_limit_ && token_buffer_.capacity() > *memory_limit_)) {
- COLD_BLOCK_BYVALUE
- Base::fail(
- "Memory limit exceeded while parsing YSON stream: "
- "allocated ",
- token_buffer_.capacity(),
- ", limit ", *memory_limit_);
- COLD_BLOCK_END
+ COLD_BLOCK_BYVALUE
+ Base::fail(
+ "Memory limit exceeded while parsing YSON stream: "
+ "allocated ",
+ token_buffer_.capacity(),
+ ", limit ", *memory_limit_);
+ COLD_BLOCK_END
}
}
- TStringBuf token_buffer() const {
- auto* begin = reinterpret_cast<const char*>(token_buffer_.data());
- return {begin, token_buffer_.size()};
- }
- };
+ TStringBuf token_buffer() const {
+ auto* begin = reinterpret_cast<const char*>(token_buffer_.data());
+ return {begin, token_buffer_.size()};
+ }
+ };
}
-}
+}