diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/yson/detail.h | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/yson/detail.h')
-rw-r--r-- | library/cpp/yson/detail.h | 1504 |
1 files changed, 752 insertions, 752 deletions
diff --git a/library/cpp/yson/detail.h b/library/cpp/yson/detail.h index 47f2d3fa3a..27f5e8ffff 100644 --- a/library/cpp/yson/detail.h +++ b/library/cpp/yson/detail.h @@ -11,796 +11,796 @@ #include <util/stream/input.h> namespace NYson { - namespace NDetail { - //////////////////////////////////////////////////////////////////////////////// - - //! Indicates the beginning of a list. - const char BeginListSymbol = '['; - //! Indicates the end of a list. - const char EndListSymbol = ']'; - - //! Indicates the beginning of a map. - const char BeginMapSymbol = '{'; - //! Indicates the end of a map. - const char EndMapSymbol = '}'; - - //! Indicates the beginning of an attribute map. - const char BeginAttributesSymbol = '<'; - //! Indicates the end of an attribute map. - const char EndAttributesSymbol = '>'; - - //! Separates items in lists. - const char ListItemSeparatorSymbol = ';'; - //! Separates items in maps, attributes. - const char KeyedItemSeparatorSymbol = ';'; - //! Separates keys from values in maps. - const char KeyValueSeparatorSymbol = '='; - - //! Indicates an entity. - const char EntitySymbol = '#'; - - //! Indicates end of stream. - const char EndSymbol = '\0'; - - //! Marks the beginning of a binary string literal. - const char StringMarker = '\x01'; - //! Marks the beginning of a binary i64 literal. - const char Int64Marker = '\x02'; - //! Marks the beginning of a binary double literal. - const char DoubleMarker = '\x03'; - //! Marks true and false values of boolean. - const char FalseMarker = '\x04'; - const char TrueMarker = '\x05'; - //! Marks the beginning of a binary ui64 literal. - const char Uint64Marker = '\x06'; - - //////////////////////////////////////////////////////////////////////////////// - - template <bool EnableLinePositionInfo> - class TPositionInfo; - - template <> - class TPositionInfo<true> { - private: - int Offset; - int Line; - int Column; - - public: - TPositionInfo() - : Offset(0) - , Line(1) - , Column(1) - { - } - - void OnRangeConsumed(const char* begin, const char* end) { - Offset += end - begin; - for (auto current = begin; current != end; ++current) { - ++Column; - if (*current == '\n') { //TODO: memchr - ++Line; - Column = 1; - } - } - } - }; - - template <> - class TPositionInfo<false> { - private: - int Offset; - - public: - TPositionInfo() - : Offset(0) - { - } - - void OnRangeConsumed(const char* begin, const char* end) { - Offset += end - begin; - } - }; - - template <class TBlockStream, class TPositionBase> - class TCharStream - : public TBlockStream, - public TPositionBase { - public: - TCharStream(const TBlockStream& blockStream) - : TBlockStream(blockStream) - { - } - - bool IsEmpty() const { - return TBlockStream::Begin() == TBlockStream::End(); - } - - template <bool AllowFinish> - void Refresh() { - while (IsEmpty() && !TBlockStream::IsFinished()) { - TBlockStream::RefreshBlock(); - } - if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) { - ythrow TYsonException() << "Premature end of yson stream"; - } - } - - void Refresh() { - return Refresh<false>(); - } - - template <bool AllowFinish> - char GetChar() { - Refresh<AllowFinish>(); - return !IsEmpty() ? *TBlockStream::Begin() : '\0'; - } - - char GetChar() { - return GetChar<false>(); - } - - void Advance(size_t bytes) { - TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes); - TBlockStream::Advance(bytes); - } - - size_t Length() const { - return TBlockStream::End() - TBlockStream::Begin(); - } - }; - - template <class TBaseStream> - class TCodedStream - : public TBaseStream { - private: - static const int MaxVarintBytes = 10; - static const int MaxVarint32Bytes = 5; - - const ui8* BeginByte() const { - return reinterpret_cast<const ui8*>(TBaseStream::Begin()); - } - - const ui8* EndByte() const { - return reinterpret_cast<const ui8*>(TBaseStream::End()); - } - - // Following functions is an adaptation Protobuf code from coded_stream.cc - bool ReadVarint32FromArray(ui32* value) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - const ui8* ptr = BeginByte(); - ui32 b; - ui32 result; - - b = *(ptr++); - result = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= b << 28; - if (!(b & 0x80)) - goto done; - - // If the input is larger than 32 bits, we still need to read it all - // and discard the high-order bits. - - for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) { - b = *(ptr++); - if (!(b & 0x80)) - goto done; - } - - // We have overrun the maximum size of a Varint (10 bytes). Assume - // the data is corrupt. - return false; - - done: - TBaseStream::Advance(ptr - BeginByte()); - *value = result; - return true; - } - - bool ReadVarint32Fallback(ui32* value) { - if (BeginByte() + MaxVarint32Bytes <= EndByte() || - // Optimization: If the Varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. - (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) - { - return ReadVarint32FromArray(value); - } else { - // Really slow case: we will incur the cost of an extra function call here, - // but moving this out of line reduces the size of this function, which - // improves the common case. In micro benchmarks, this is worth about 10-15% - return ReadVarint32Slow(value); - } - } - - bool ReadVarint32Slow(ui32* value) { - ui64 result; - // Directly invoke ReadVarint64Fallback, since we already tried to optimize - // for one-byte Varints. - if (ReadVarint64Fallback(&result)) { - *value = static_cast<ui32>(result); - return true; - } else { - return false; - } - } - - bool ReadVarint64Slow(ui64* value) { - // Slow path: This read might cross the end of the buffer, so we - // need to check and refresh the buffer if and when it does. - - ui64 result = 0; - int count = 0; - ui32 b; - - do { - if (count == MaxVarintBytes) { - return false; - } - while (BeginByte() == EndByte()) { - TBaseStream::Refresh(); - } - b = *BeginByte(); - result |= static_cast<ui64>(b & 0x7F) << (7 * count); - TBaseStream::Advance(1); - ++count; - } while (b & 0x80); - - *value = result; - return true; - } - - bool ReadVarint64Fallback(ui64* value) { - if (BeginByte() + MaxVarintBytes <= EndByte() || - // Optimization: If the Varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. - (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) - { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - - const ui8* ptr = BeginByte(); - ui32 b; - - // Splitting into 32-bit pieces gives better performance on 32-bit - // processors. - ui32 part0 = 0, part1 = 0, part2 = 0; - - b = *(ptr++); - part0 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part2 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part2 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - - // We have overrun the maximum size of a Varint (10 bytes). The data - // must be corrupt. - return false; - - done: - TBaseStream::Advance(ptr - BeginByte()); - *value = (static_cast<ui64>(part0)) | - (static_cast<ui64>(part1) << 28) | - (static_cast<ui64>(part2) << 56); - return true; - } else { - return ReadVarint64Slow(value); - } - } - - public: - TCodedStream(const TBaseStream& baseStream) - : TBaseStream(baseStream) - { - } - - bool ReadVarint64(ui64* value) { - if (BeginByte() < EndByte() && *BeginByte() < 0x80) { - *value = *BeginByte(); - TBaseStream::Advance(1); - return true; - } else { - return ReadVarint64Fallback(value); - } - } - - bool ReadVarint32(ui32* value) { - if (BeginByte() < EndByte() && *BeginByte() < 0x80) { - *value = *BeginByte(); - TBaseStream::Advance(1); - return true; - } else { - return ReadVarint32Fallback(value); - } - } - }; - - enum ENumericResult { - Int64 = 0, - Uint64 = 1, - Double = 2 - }; - - template <class TBlockStream, bool EnableLinePositionInfo> - class TLexerBase - : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> { - private: - using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>; - TVector<char> Buffer_; - TMaybe<ui64> MemoryLimit_; - - void CheckMemoryLimit() { - if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) { + namespace NDetail { + //////////////////////////////////////////////////////////////////////////////// + + //! Indicates the beginning of a list. + const char BeginListSymbol = '['; + //! Indicates the end of a list. + const char EndListSymbol = ']'; + + //! Indicates the beginning of a map. + const char BeginMapSymbol = '{'; + //! Indicates the end of a map. + const char EndMapSymbol = '}'; + + //! Indicates the beginning of an attribute map. + const char BeginAttributesSymbol = '<'; + //! Indicates the end of an attribute map. + const char EndAttributesSymbol = '>'; + + //! Separates items in lists. + const char ListItemSeparatorSymbol = ';'; + //! Separates items in maps, attributes. + const char KeyedItemSeparatorSymbol = ';'; + //! Separates keys from values in maps. + const char KeyValueSeparatorSymbol = '='; + + //! Indicates an entity. + const char EntitySymbol = '#'; + + //! Indicates end of stream. + const char EndSymbol = '\0'; + + //! Marks the beginning of a binary string literal. + const char StringMarker = '\x01'; + //! Marks the beginning of a binary i64 literal. + const char Int64Marker = '\x02'; + //! Marks the beginning of a binary double literal. + const char DoubleMarker = '\x03'; + //! Marks true and false values of boolean. + const char FalseMarker = '\x04'; + const char TrueMarker = '\x05'; + //! Marks the beginning of a binary ui64 literal. + const char Uint64Marker = '\x06'; + + //////////////////////////////////////////////////////////////////////////////// + + template <bool EnableLinePositionInfo> + class TPositionInfo; + + template <> + class TPositionInfo<true> { + private: + int Offset; + int Line; + int Column; + + public: + TPositionInfo() + : Offset(0) + , Line(1) + , Column(1) + { + } + + void OnRangeConsumed(const char* begin, const char* end) { + Offset += end - begin; + for (auto current = begin; current != end; ++current) { + ++Column; + if (*current == '\n') { //TODO: memchr + ++Line; + Column = 1; + } + } + } + }; + + template <> + class TPositionInfo<false> { + private: + int Offset; + + public: + TPositionInfo() + : Offset(0) + { + } + + void OnRangeConsumed(const char* begin, const char* end) { + Offset += end - begin; + } + }; + + template <class TBlockStream, class TPositionBase> + class TCharStream + : public TBlockStream, + public TPositionBase { + public: + TCharStream(const TBlockStream& blockStream) + : TBlockStream(blockStream) + { + } + + bool IsEmpty() const { + return TBlockStream::Begin() == TBlockStream::End(); + } + + template <bool AllowFinish> + void Refresh() { + while (IsEmpty() && !TBlockStream::IsFinished()) { + TBlockStream::RefreshBlock(); + } + if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) { + ythrow TYsonException() << "Premature end of yson stream"; + } + } + + void Refresh() { + return Refresh<false>(); + } + + template <bool AllowFinish> + char GetChar() { + Refresh<AllowFinish>(); + return !IsEmpty() ? *TBlockStream::Begin() : '\0'; + } + + char GetChar() { + return GetChar<false>(); + } + + void Advance(size_t bytes) { + TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes); + TBlockStream::Advance(bytes); + } + + size_t Length() const { + return TBlockStream::End() - TBlockStream::Begin(); + } + }; + + template <class TBaseStream> + class TCodedStream + : public TBaseStream { + private: + static const int MaxVarintBytes = 10; + static const int MaxVarint32Bytes = 5; + + const ui8* BeginByte() const { + return reinterpret_cast<const ui8*>(TBaseStream::Begin()); + } + + const ui8* EndByte() const { + return reinterpret_cast<const ui8*>(TBaseStream::End()); + } + + // Following functions is an adaptation Protobuf code from coded_stream.cc + bool ReadVarint32FromArray(ui32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + const ui8* ptr = BeginByte(); + ui32 b; + ui32 result; + + b = *(ptr++); + result = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= b << 28; + if (!(b & 0x80)) + goto done; + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + + for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) { + b = *(ptr++); + if (!(b & 0x80)) + goto done; + } + + // We have overrun the maximum size of a Varint (10 bytes). Assume + // the data is corrupt. + return false; + + done: + TBaseStream::Advance(ptr - BeginByte()); + *value = result; + return true; + } + + bool ReadVarint32Fallback(ui32* value) { + if (BeginByte() + MaxVarint32Bytes <= EndByte() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) + { + return ReadVarint32FromArray(value); + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarint32Slow(value); + } + } + + bool ReadVarint32Slow(ui32* value) { + ui64 result; + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte Varints. + if (ReadVarint64Fallback(&result)) { + *value = static_cast<ui32>(result); + return true; + } else { + return false; + } + } + + bool ReadVarint64Slow(ui64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + ui64 result = 0; + int count = 0; + ui32 b; + + do { + if (count == MaxVarintBytes) { + return false; + } + while (BeginByte() == EndByte()) { + TBaseStream::Refresh(); + } + b = *BeginByte(); + result |= static_cast<ui64>(b & 0x7F) << (7 * count); + TBaseStream::Advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; + } + + bool ReadVarint64Fallback(ui64* value) { + if (BeginByte() + MaxVarintBytes <= EndByte() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) + { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + + const ui8* ptr = BeginByte(); + ui32 b; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + ui32 part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); + part0 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + + // We have overrun the maximum size of a Varint (10 bytes). The data + // must be corrupt. + return false; + + done: + TBaseStream::Advance(ptr - BeginByte()); + *value = (static_cast<ui64>(part0)) | + (static_cast<ui64>(part1) << 28) | + (static_cast<ui64>(part2) << 56); + return true; + } else { + return ReadVarint64Slow(value); + } + } + + public: + TCodedStream(const TBaseStream& baseStream) + : TBaseStream(baseStream) + { + } + + bool ReadVarint64(ui64* value) { + if (BeginByte() < EndByte() && *BeginByte() < 0x80) { + *value = *BeginByte(); + TBaseStream::Advance(1); + return true; + } else { + return ReadVarint64Fallback(value); + } + } + + bool ReadVarint32(ui32* value) { + if (BeginByte() < EndByte() && *BeginByte() < 0x80) { + *value = *BeginByte(); + TBaseStream::Advance(1); + return true; + } else { + return ReadVarint32Fallback(value); + } + } + }; + + enum ENumericResult { + Int64 = 0, + Uint64 = 1, + Double = 2 + }; + + template <class TBlockStream, bool EnableLinePositionInfo> + class TLexerBase + : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> { + private: + using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>; + TVector<char> Buffer_; + TMaybe<ui64> MemoryLimit_; + + void CheckMemoryLimit() { + if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) { ythrow TYsonException() << "Memory limit exceeded while parsing YSON stream: allocated " << Buffer_.capacity() << ", limit " << (*MemoryLimit_); - } - } - - public: - TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) - : TBaseStream(blockStream) - , MemoryLimit_(memoryLimit) - { - } - - protected: - /// Lexer routines - - template <bool AllowFinish> - ENumericResult ReadNumeric(TStringBuf* value) { - Buffer_.clear(); - ENumericResult result = ENumericResult::Int64; - while (true) { - char ch = TBaseStream::template GetChar<AllowFinish>(); - if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-' - Buffer_.push_back(ch); - } else if (ch == '.' || ch == 'e' || ch == 'E') { - Buffer_.push_back(ch); - result = ENumericResult::Double; - } else if (ch == 'u') { - Buffer_.push_back(ch); - result = ENumericResult::Uint64; - } else if (isalpha(ch)) { + } + } + + public: + TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) + : TBaseStream(blockStream) + , MemoryLimit_(memoryLimit) + { + } + + protected: + /// Lexer routines + + template <bool AllowFinish> + ENumericResult ReadNumeric(TStringBuf* value) { + Buffer_.clear(); + ENumericResult result = ENumericResult::Int64; + while (true) { + char ch = TBaseStream::template GetChar<AllowFinish>(); + if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-' + Buffer_.push_back(ch); + } else if (ch == '.' || ch == 'e' || ch == 'E') { + Buffer_.push_back(ch); + result = ENumericResult::Double; + } else if (ch == 'u') { + Buffer_.push_back(ch); + result = ENumericResult::Uint64; + } else if (isalpha(ch)) { ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal"; - } else { - break; - } - CheckMemoryLimit(); - TBaseStream::Advance(1); - } + } else { + break; + } + CheckMemoryLimit(); + TBaseStream::Advance(1); + } - *value = TStringBuf(Buffer_.data(), Buffer_.size()); - return result; + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + return result; } - template <bool AllowFinish> - double ReadNanOrInf() { + template <bool AllowFinish> + double ReadNanOrInf() { static const TStringBuf nanString = "nan"; static const TStringBuf infString = "inf"; static const TStringBuf plusInfString = "+inf"; static const TStringBuf minusInfString = "-inf"; - TStringBuf expectedString; - double expectedValue; - char ch = TBaseStream::template GetChar<AllowFinish>(); - switch (ch) { - case '+': - expectedString = plusInfString; - expectedValue = std::numeric_limits<double>::infinity(); - break; - case '-': - expectedString = minusInfString; - expectedValue = -std::numeric_limits<double>::infinity(); - break; - case 'i': - expectedString = infString; - expectedValue = std::numeric_limits<double>::infinity(); - break; - case 'n': - expectedString = nanString; - expectedValue = std::numeric_limits<double>::quiet_NaN(); - break; - default: - ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'"; - } + TStringBuf expectedString; + double expectedValue; + char ch = TBaseStream::template GetChar<AllowFinish>(); + switch (ch) { + case '+': + expectedString = plusInfString; + expectedValue = std::numeric_limits<double>::infinity(); + break; + case '-': + expectedString = minusInfString; + expectedValue = -std::numeric_limits<double>::infinity(); + break; + case 'i': + expectedString = infString; + expectedValue = std::numeric_limits<double>::infinity(); + break; + case 'n': + expectedString = nanString; + expectedValue = std::numeric_limits<double>::quiet_NaN(); + break; + default: + ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'"; + } for (size_t i = 0; i < expectedString.size(); ++i) { - if (expectedString[i] != ch) { - ythrow TYsonException() - << "Incorrect %-literal prefix " - << "'" << expectedString.SubStr(0, i) << ch << "'," - << "expected " << expectedString; - } - TBaseStream::Advance(1); - ch = TBaseStream::template GetChar<AllowFinish>(); - } - - return expectedValue; - } - - void ReadQuotedString(TStringBuf* value) { - Buffer_.clear(); - while (true) { - if (TBaseStream::IsEmpty()) { - TBaseStream::Refresh(); - } - char ch = *TBaseStream::Begin(); - TBaseStream::Advance(1); - if (ch != '"') { - Buffer_.push_back(ch); - } else { - // We must count the number of '\' at the end of StringValue - // to check if it's not \" - int slashCount = 0; - int length = Buffer_.size(); - while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') { - ++slashCount; - } - if (slashCount % 2 == 0) { - break; - } else { - Buffer_.push_back(ch); - } - } - CheckMemoryLimit(); - } - - auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size()); - Buffer_.clear(); - Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size()); - CheckMemoryLimit(); - *value = TStringBuf(Buffer_.data(), Buffer_.size()); - } - - template <bool AllowFinish> - void ReadUnquotedString(TStringBuf* value) { - Buffer_.clear(); - while (true) { - char ch = TBaseStream::template GetChar<AllowFinish>(); - if (isalpha(ch) || isdigit(ch) || - ch == '_' || ch == '-' || ch == '%' || ch == '.') { - Buffer_.push_back(ch); - } else { - break; - } - CheckMemoryLimit(); - TBaseStream::Advance(1); + if (expectedString[i] != ch) { + ythrow TYsonException() + << "Incorrect %-literal prefix " + << "'" << expectedString.SubStr(0, i) << ch << "'," + << "expected " << expectedString; + } + TBaseStream::Advance(1); + ch = TBaseStream::template GetChar<AllowFinish>(); + } + + return expectedValue; + } + + void ReadQuotedString(TStringBuf* value) { + Buffer_.clear(); + while (true) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + } + char ch = *TBaseStream::Begin(); + TBaseStream::Advance(1); + if (ch != '"') { + Buffer_.push_back(ch); + } else { + // We must count the number of '\' at the end of StringValue + // to check if it's not \" + int slashCount = 0; + int length = Buffer_.size(); + while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') { + ++slashCount; + } + if (slashCount % 2 == 0) { + break; + } else { + Buffer_.push_back(ch); + } + } + CheckMemoryLimit(); + } + + auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size()); + Buffer_.clear(); + Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size()); + CheckMemoryLimit(); + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + + template <bool AllowFinish> + void ReadUnquotedString(TStringBuf* value) { + Buffer_.clear(); + while (true) { + char ch = TBaseStream::template GetChar<AllowFinish>(); + if (isalpha(ch) || isdigit(ch) || + ch == '_' || ch == '-' || ch == '%' || ch == '.') { + Buffer_.push_back(ch); + } else { + break; + } + CheckMemoryLimit(); + TBaseStream::Advance(1); } - *value = TStringBuf(Buffer_.data(), Buffer_.size()); + *value = TStringBuf(Buffer_.data(), Buffer_.size()); } - void ReadUnquotedString(TStringBuf* value) { - return ReadUnquotedString<false>(value); + void ReadUnquotedString(TStringBuf* value) { + return ReadUnquotedString<false>(value); } - void ReadBinaryString(TStringBuf* value) { - ui32 ulength = 0; - if (!TBaseStream::ReadVarint32(&ulength)) { - ythrow TYsonException() << "Error parsing varint value"; - } + void ReadBinaryString(TStringBuf* value) { + ui32 ulength = 0; + if (!TBaseStream::ReadVarint32(&ulength)) { + ythrow TYsonException() << "Error parsing varint value"; + } - i32 length = ZigZagDecode32(ulength); - if (length < 0) { + i32 length = ZigZagDecode32(ulength); + if (length < 0) { ythrow TYsonException() << "Negative binary string literal length " << length; - } - - if (TBaseStream::Begin() + length <= TBaseStream::End()) { - *value = TStringBuf(TBaseStream::Begin(), length); - TBaseStream::Advance(length); - } else { // reading in Buffer - size_t needToRead = length; - Buffer_.clear(); - while (needToRead) { - if (TBaseStream::IsEmpty()) { - TBaseStream::Refresh(); - continue; - } - size_t readingBytes = Min(needToRead, TBaseStream::Length()); - - Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes); - CheckMemoryLimit(); - needToRead -= readingBytes; - TBaseStream::Advance(readingBytes); - } - *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + + if (TBaseStream::Begin() + length <= TBaseStream::End()) { + *value = TStringBuf(TBaseStream::Begin(), length); + TBaseStream::Advance(length); + } else { // reading in Buffer + size_t needToRead = length; + Buffer_.clear(); + while (needToRead) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + continue; + } + size_t readingBytes = Min(needToRead, TBaseStream::Length()); + + Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes); + CheckMemoryLimit(); + needToRead -= readingBytes; + TBaseStream::Advance(readingBytes); + } + *value = TStringBuf(Buffer_.data(), Buffer_.size()); } } - template <bool AllowFinish> - bool ReadBoolean() { - Buffer_.clear(); + template <bool AllowFinish> + bool ReadBoolean() { + Buffer_.clear(); static TStringBuf trueString = "true"; static TStringBuf falseString = "false"; - auto throwIncorrectBoolean = [&]() { + auto throwIncorrectBoolean = [&]() { ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size()); - }; + }; Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); TBaseStream::Advance(1); - if (Buffer_[0] == trueString[0]) { + if (Buffer_[0] == trueString[0]) { for (size_t i = 1; i < trueString.size(); ++i) { - Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); - TBaseStream::Advance(1); - if (Buffer_.back() != trueString[i]) { - throwIncorrectBoolean(); - } - } - return true; - } else if (Buffer_[0] == falseString[0]) { + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_.back() != trueString[i]) { + throwIncorrectBoolean(); + } + } + return true; + } else if (Buffer_[0] == falseString[0]) { for (size_t i = 1; i < falseString.size(); ++i) { - Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); - TBaseStream::Advance(1); - if (Buffer_.back() != falseString[i]) { - throwIncorrectBoolean(); - } - } - return false; - } else { + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_.back() != falseString[i]) { + throwIncorrectBoolean(); + } + } + return false; + } else { throwIncorrectBoolean(); } - - Y_FAIL("unreachable"); - ; - } - - void ReadBinaryInt64(i64* result) { - ui64 uvalue; - if (!TBaseStream::ReadVarint64(&uvalue)) { - ythrow TYsonException() << "Error parsing varint value"; + + Y_FAIL("unreachable"); + ; + } + + void ReadBinaryInt64(i64* result) { + ui64 uvalue; + if (!TBaseStream::ReadVarint64(&uvalue)) { + ythrow TYsonException() << "Error parsing varint value"; + } + *result = ZigZagDecode64(uvalue); + } + + void ReadBinaryUint64(ui64* result) { + ui64 uvalue; + if (!TBaseStream::ReadVarint64(&uvalue)) { + ythrow TYsonException() << "Error parsing varint value"; } - *result = ZigZagDecode64(uvalue); - } - - void ReadBinaryUint64(ui64* result) { - ui64 uvalue; - if (!TBaseStream::ReadVarint64(&uvalue)) { - ythrow TYsonException() << "Error parsing varint value"; - } - *result = uvalue; - } - - void ReadBinaryDouble(double* value) { - size_t needToRead = sizeof(double); - - while (needToRead != 0) { - if (TBaseStream::IsEmpty()) { - TBaseStream::Refresh(); - continue; - } - - size_t chunkSize = Min(needToRead, TBaseStream::Length()); - if (chunkSize == 0) { - ythrow TYsonException() << "Error parsing binary double literal"; - } - std::copy( - TBaseStream::Begin(), - TBaseStream::Begin() + chunkSize, - reinterpret_cast<char*>(value) + (sizeof(double) - needToRead)); - needToRead -= chunkSize; - TBaseStream::Advance(chunkSize); - } - } - - /// Helpers - void SkipCharToken(char symbol) { - char ch = SkipSpaceAndGetChar(); - if (ch != symbol) { + *result = uvalue; + } + + void ReadBinaryDouble(double* value) { + size_t needToRead = sizeof(double); + + while (needToRead != 0) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + continue; + } + + size_t chunkSize = Min(needToRead, TBaseStream::Length()); + if (chunkSize == 0) { + ythrow TYsonException() << "Error parsing binary double literal"; + } + std::copy( + TBaseStream::Begin(), + TBaseStream::Begin() + chunkSize, + reinterpret_cast<char*>(value) + (sizeof(double) - needToRead)); + needToRead -= chunkSize; + TBaseStream::Advance(chunkSize); + } + } + + /// Helpers + void SkipCharToken(char symbol) { + char ch = SkipSpaceAndGetChar(); + if (ch != symbol) { ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'"; - } - - TBaseStream::Advance(1); - } - - static bool IsSpaceFast(char ch) { - static const ui8 lookupTable[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return lookupTable[static_cast<ui8>(ch)]; - } - - template <bool AllowFinish> - char SkipSpaceAndGetChar() { - if (!TBaseStream::IsEmpty()) { - char ch = *TBaseStream::Begin(); - if (!IsSpaceFast(ch)) { - return ch; - } - } - return SkipSpaceAndGetCharFallback<AllowFinish>(); - } - - char SkipSpaceAndGetChar() { - return SkipSpaceAndGetChar<false>(); - } - - template <bool AllowFinish> - char SkipSpaceAndGetCharFallback() { - while (true) { - if (TBaseStream::IsEmpty()) { - if (TBaseStream::IsFinished()) { - return '\0'; - } - TBaseStream::template Refresh<AllowFinish>(); - continue; - } - if (!IsSpaceFast(*TBaseStream::Begin())) { - break; - } - TBaseStream::Advance(1); } - return TBaseStream::template GetChar<AllowFinish>(); - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - } - - //////////////////////////////////////////////////////////////////////////////// - - class TStringReader { - private: - const char* BeginPtr; - const char* EndPtr; - - public: - TStringReader() - : BeginPtr(nullptr) - , EndPtr(nullptr) - { - } - - TStringReader(const char* begin, const char* end) - : BeginPtr(begin) - , EndPtr(end) - { - } - - const char* Begin() const { - return BeginPtr; - } - - const char* End() const { - return EndPtr; - } - - void RefreshBlock() { - Y_FAIL("unreachable"); - } - - void Advance(size_t bytes) { - BeginPtr += bytes; - } - - bool IsFinished() const { - return true; - } - - void SetBuffer(const char* begin, const char* end) { - BeginPtr = begin; - EndPtr = end; - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - class TStreamReader { - public: - TStreamReader( - IInputStream* stream, - char* buffer, - size_t bufferSize) - : Stream(stream) - , Buffer(buffer) - , BufferSize(bufferSize) - { - BeginPtr = EndPtr = Buffer; - FinishFlag = false; - } - - const char* Begin() const { - return BeginPtr; - } - - const char* End() const { - return EndPtr; - } - - void RefreshBlock() { - size_t bytes = Stream->Read(Buffer, BufferSize); - BeginPtr = Buffer; - EndPtr = Buffer + bytes; - FinishFlag = (bytes == 0); - } - - void Advance(size_t bytes) { - BeginPtr += bytes; - } - - bool IsFinished() const { - return FinishFlag; - } - - private: - IInputStream* Stream; - char* Buffer; - size_t BufferSize; - - const char* BeginPtr; - const char* EndPtr; - bool FinishFlag; - }; - - //////////////////////////////////////////////////////////////////////////////// + + TBaseStream::Advance(1); + } + + static bool IsSpaceFast(char ch) { + static const ui8 lookupTable[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return lookupTable[static_cast<ui8>(ch)]; + } + + template <bool AllowFinish> + char SkipSpaceAndGetChar() { + if (!TBaseStream::IsEmpty()) { + char ch = *TBaseStream::Begin(); + if (!IsSpaceFast(ch)) { + return ch; + } + } + return SkipSpaceAndGetCharFallback<AllowFinish>(); + } + + char SkipSpaceAndGetChar() { + return SkipSpaceAndGetChar<false>(); + } + + template <bool AllowFinish> + char SkipSpaceAndGetCharFallback() { + while (true) { + if (TBaseStream::IsEmpty()) { + if (TBaseStream::IsFinished()) { + return '\0'; + } + TBaseStream::template Refresh<AllowFinish>(); + continue; + } + if (!IsSpaceFast(*TBaseStream::Begin())) { + break; + } + TBaseStream::Advance(1); + } + return TBaseStream::template GetChar<AllowFinish>(); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + } + + //////////////////////////////////////////////////////////////////////////////// + + class TStringReader { + private: + const char* BeginPtr; + const char* EndPtr; + + public: + TStringReader() + : BeginPtr(nullptr) + , EndPtr(nullptr) + { + } + + TStringReader(const char* begin, const char* end) + : BeginPtr(begin) + , EndPtr(end) + { + } + + const char* Begin() const { + return BeginPtr; + } + + const char* End() const { + return EndPtr; + } + + void RefreshBlock() { + Y_FAIL("unreachable"); + } + + void Advance(size_t bytes) { + BeginPtr += bytes; + } + + bool IsFinished() const { + return true; + } + + void SetBuffer(const char* begin, const char* end) { + BeginPtr = begin; + EndPtr = end; + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + class TStreamReader { + public: + TStreamReader( + IInputStream* stream, + char* buffer, + size_t bufferSize) + : Stream(stream) + , Buffer(buffer) + , BufferSize(bufferSize) + { + BeginPtr = EndPtr = Buffer; + FinishFlag = false; + } + + const char* Begin() const { + return BeginPtr; + } + + const char* End() const { + return EndPtr; + } + + void RefreshBlock() { + size_t bytes = Stream->Read(Buffer, BufferSize); + BeginPtr = Buffer; + EndPtr = Buffer + bytes; + FinishFlag = (bytes == 0); + } + + void Advance(size_t bytes) { + BeginPtr += bytes; + } + + bool IsFinished() const { + return FinishFlag; + } + + private: + IInputStream* Stream; + char* Buffer; + size_t BufferSize; + + const char* BeginPtr; + const char* EndPtr; + bool FinishFlag; + }; + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson |