diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/yson | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/yson')
-rw-r--r-- | library/cpp/yson/consumer.cpp | 8 | ||||
-rw-r--r-- | library/cpp/yson/consumer.h | 4 | ||||
-rw-r--r-- | library/cpp/yson/detail.h | 1504 | ||||
-rw-r--r-- | library/cpp/yson/format.h | 24 | ||||
-rw-r--r-- | library/cpp/yson/json/json_writer.cpp | 320 | ||||
-rw-r--r-- | library/cpp/yson/json/json_writer.h | 102 | ||||
-rw-r--r-- | library/cpp/yson/json/yson2json_adapter.h | 2 | ||||
-rw-r--r-- | library/cpp/yson/lexer.cpp | 58 | ||||
-rw-r--r-- | library/cpp/yson/lexer.h | 22 | ||||
-rw-r--r-- | library/cpp/yson/lexer_detail.h | 492 | ||||
-rw-r--r-- | library/cpp/yson/parser.cpp | 244 | ||||
-rw-r--r-- | library/cpp/yson/parser.h | 86 | ||||
-rw-r--r-- | library/cpp/yson/parser_detail.h | 646 | ||||
-rw-r--r-- | library/cpp/yson/public.h | 8 | ||||
-rw-r--r-- | library/cpp/yson/token.cpp | 418 | ||||
-rw-r--r-- | library/cpp/yson/token.h | 168 | ||||
-rw-r--r-- | library/cpp/yson/tokenizer.cpp | 66 | ||||
-rw-r--r-- | library/cpp/yson/tokenizer.h | 32 | ||||
-rw-r--r-- | library/cpp/yson/varint.cpp | 102 | ||||
-rw-r--r-- | library/cpp/yson/varint.h | 22 | ||||
-rw-r--r-- | library/cpp/yson/writer.cpp | 510 | ||||
-rw-r--r-- | library/cpp/yson/writer.h | 104 | ||||
-rw-r--r-- | library/cpp/yson/zigzag.h | 38 |
23 files changed, 2490 insertions, 2490 deletions
diff --git a/library/cpp/yson/consumer.cpp b/library/cpp/yson/consumer.cpp index 6f9f975bb6..40ae452978 100644 --- a/library/cpp/yson/consumer.cpp +++ b/library/cpp/yson/consumer.cpp @@ -4,12 +4,12 @@ namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// void TYsonConsumerBase::OnRaw(TStringBuf str, NYT::NYson::EYsonType type) { ParseYsonStringBuffer(str, this, type); - } + } + + //////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////// - } // namespace NYson diff --git a/library/cpp/yson/consumer.h b/library/cpp/yson/consumer.h index 52d160c6cc..d5a9d66335 100644 --- a/library/cpp/yson/consumer.h +++ b/library/cpp/yson/consumer.h @@ -6,8 +6,8 @@ #include <util/system/defaults.h> namespace NYson { - struct TYsonConsumerBase + struct TYsonConsumerBase : public virtual NYT::NYson::IYsonConsumer { void OnRaw(TStringBuf ysonNode, NYT::NYson::EYsonType type) override; - }; + }; } // namespace NYson diff --git a/library/cpp/yson/detail.h b/library/cpp/yson/detail.h index 47f2d3fa3a..27f5e8ffff 100644 --- a/library/cpp/yson/detail.h +++ b/library/cpp/yson/detail.h @@ -11,796 +11,796 @@ #include <util/stream/input.h> namespace NYson { - namespace NDetail { - //////////////////////////////////////////////////////////////////////////////// - - //! Indicates the beginning of a list. - const char BeginListSymbol = '['; - //! Indicates the end of a list. - const char EndListSymbol = ']'; - - //! Indicates the beginning of a map. - const char BeginMapSymbol = '{'; - //! Indicates the end of a map. - const char EndMapSymbol = '}'; - - //! Indicates the beginning of an attribute map. - const char BeginAttributesSymbol = '<'; - //! Indicates the end of an attribute map. - const char EndAttributesSymbol = '>'; - - //! Separates items in lists. - const char ListItemSeparatorSymbol = ';'; - //! Separates items in maps, attributes. - const char KeyedItemSeparatorSymbol = ';'; - //! Separates keys from values in maps. - const char KeyValueSeparatorSymbol = '='; - - //! Indicates an entity. - const char EntitySymbol = '#'; - - //! Indicates end of stream. - const char EndSymbol = '\0'; - - //! Marks the beginning of a binary string literal. - const char StringMarker = '\x01'; - //! Marks the beginning of a binary i64 literal. - const char Int64Marker = '\x02'; - //! Marks the beginning of a binary double literal. - const char DoubleMarker = '\x03'; - //! Marks true and false values of boolean. - const char FalseMarker = '\x04'; - const char TrueMarker = '\x05'; - //! Marks the beginning of a binary ui64 literal. - const char Uint64Marker = '\x06'; - - //////////////////////////////////////////////////////////////////////////////// - - template <bool EnableLinePositionInfo> - class TPositionInfo; - - template <> - class TPositionInfo<true> { - private: - int Offset; - int Line; - int Column; - - public: - TPositionInfo() - : Offset(0) - , Line(1) - , Column(1) - { - } - - void OnRangeConsumed(const char* begin, const char* end) { - Offset += end - begin; - for (auto current = begin; current != end; ++current) { - ++Column; - if (*current == '\n') { //TODO: memchr - ++Line; - Column = 1; - } - } - } - }; - - template <> - class TPositionInfo<false> { - private: - int Offset; - - public: - TPositionInfo() - : Offset(0) - { - } - - void OnRangeConsumed(const char* begin, const char* end) { - Offset += end - begin; - } - }; - - template <class TBlockStream, class TPositionBase> - class TCharStream - : public TBlockStream, - public TPositionBase { - public: - TCharStream(const TBlockStream& blockStream) - : TBlockStream(blockStream) - { - } - - bool IsEmpty() const { - return TBlockStream::Begin() == TBlockStream::End(); - } - - template <bool AllowFinish> - void Refresh() { - while (IsEmpty() && !TBlockStream::IsFinished()) { - TBlockStream::RefreshBlock(); - } - if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) { - ythrow TYsonException() << "Premature end of yson stream"; - } - } - - void Refresh() { - return Refresh<false>(); - } - - template <bool AllowFinish> - char GetChar() { - Refresh<AllowFinish>(); - return !IsEmpty() ? *TBlockStream::Begin() : '\0'; - } - - char GetChar() { - return GetChar<false>(); - } - - void Advance(size_t bytes) { - TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes); - TBlockStream::Advance(bytes); - } - - size_t Length() const { - return TBlockStream::End() - TBlockStream::Begin(); - } - }; - - template <class TBaseStream> - class TCodedStream - : public TBaseStream { - private: - static const int MaxVarintBytes = 10; - static const int MaxVarint32Bytes = 5; - - const ui8* BeginByte() const { - return reinterpret_cast<const ui8*>(TBaseStream::Begin()); - } - - const ui8* EndByte() const { - return reinterpret_cast<const ui8*>(TBaseStream::End()); - } - - // Following functions is an adaptation Protobuf code from coded_stream.cc - bool ReadVarint32FromArray(ui32* value) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - const ui8* ptr = BeginByte(); - ui32 b; - ui32 result; - - b = *(ptr++); - result = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - result |= b << 28; - if (!(b & 0x80)) - goto done; - - // If the input is larger than 32 bits, we still need to read it all - // and discard the high-order bits. - - for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) { - b = *(ptr++); - if (!(b & 0x80)) - goto done; - } - - // We have overrun the maximum size of a Varint (10 bytes). Assume - // the data is corrupt. - return false; - - done: - TBaseStream::Advance(ptr - BeginByte()); - *value = result; - return true; - } - - bool ReadVarint32Fallback(ui32* value) { - if (BeginByte() + MaxVarint32Bytes <= EndByte() || - // Optimization: If the Varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. - (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) - { - return ReadVarint32FromArray(value); - } else { - // Really slow case: we will incur the cost of an extra function call here, - // but moving this out of line reduces the size of this function, which - // improves the common case. In micro benchmarks, this is worth about 10-15% - return ReadVarint32Slow(value); - } - } - - bool ReadVarint32Slow(ui32* value) { - ui64 result; - // Directly invoke ReadVarint64Fallback, since we already tried to optimize - // for one-byte Varints. - if (ReadVarint64Fallback(&result)) { - *value = static_cast<ui32>(result); - return true; - } else { - return false; - } - } - - bool ReadVarint64Slow(ui64* value) { - // Slow path: This read might cross the end of the buffer, so we - // need to check and refresh the buffer if and when it does. - - ui64 result = 0; - int count = 0; - ui32 b; - - do { - if (count == MaxVarintBytes) { - return false; - } - while (BeginByte() == EndByte()) { - TBaseStream::Refresh(); - } - b = *BeginByte(); - result |= static_cast<ui64>(b & 0x7F) << (7 * count); - TBaseStream::Advance(1); - ++count; - } while (b & 0x80); - - *value = result; - return true; - } - - bool ReadVarint64Fallback(ui64* value) { - if (BeginByte() + MaxVarintBytes <= EndByte() || - // Optimization: If the Varint ends at exactly the end of the buffer, - // we can detect that and still use the fast path. - (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) - { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - - const ui8* ptr = BeginByte(); - ui32 b; - - // Splitting into 32-bit pieces gives better performance on 32-bit - // processors. - ui32 part0 = 0, part1 = 0, part2 = 0; - - b = *(ptr++); - part0 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part0 |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 14; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part1 |= (b & 0x7F) << 21; - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part2 = (b & 0x7F); - if (!(b & 0x80)) - goto done; - b = *(ptr++); - part2 |= (b & 0x7F) << 7; - if (!(b & 0x80)) - goto done; - - // We have overrun the maximum size of a Varint (10 bytes). The data - // must be corrupt. - return false; - - done: - TBaseStream::Advance(ptr - BeginByte()); - *value = (static_cast<ui64>(part0)) | - (static_cast<ui64>(part1) << 28) | - (static_cast<ui64>(part2) << 56); - return true; - } else { - return ReadVarint64Slow(value); - } - } - - public: - TCodedStream(const TBaseStream& baseStream) - : TBaseStream(baseStream) - { - } - - bool ReadVarint64(ui64* value) { - if (BeginByte() < EndByte() && *BeginByte() < 0x80) { - *value = *BeginByte(); - TBaseStream::Advance(1); - return true; - } else { - return ReadVarint64Fallback(value); - } - } - - bool ReadVarint32(ui32* value) { - if (BeginByte() < EndByte() && *BeginByte() < 0x80) { - *value = *BeginByte(); - TBaseStream::Advance(1); - return true; - } else { - return ReadVarint32Fallback(value); - } - } - }; - - enum ENumericResult { - Int64 = 0, - Uint64 = 1, - Double = 2 - }; - - template <class TBlockStream, bool EnableLinePositionInfo> - class TLexerBase - : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> { - private: - using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>; - TVector<char> Buffer_; - TMaybe<ui64> MemoryLimit_; - - void CheckMemoryLimit() { - if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) { + namespace NDetail { + //////////////////////////////////////////////////////////////////////////////// + + //! Indicates the beginning of a list. + const char BeginListSymbol = '['; + //! Indicates the end of a list. + const char EndListSymbol = ']'; + + //! Indicates the beginning of a map. + const char BeginMapSymbol = '{'; + //! Indicates the end of a map. + const char EndMapSymbol = '}'; + + //! Indicates the beginning of an attribute map. + const char BeginAttributesSymbol = '<'; + //! Indicates the end of an attribute map. + const char EndAttributesSymbol = '>'; + + //! Separates items in lists. + const char ListItemSeparatorSymbol = ';'; + //! Separates items in maps, attributes. + const char KeyedItemSeparatorSymbol = ';'; + //! Separates keys from values in maps. + const char KeyValueSeparatorSymbol = '='; + + //! Indicates an entity. + const char EntitySymbol = '#'; + + //! Indicates end of stream. + const char EndSymbol = '\0'; + + //! Marks the beginning of a binary string literal. + const char StringMarker = '\x01'; + //! Marks the beginning of a binary i64 literal. + const char Int64Marker = '\x02'; + //! Marks the beginning of a binary double literal. + const char DoubleMarker = '\x03'; + //! Marks true and false values of boolean. + const char FalseMarker = '\x04'; + const char TrueMarker = '\x05'; + //! Marks the beginning of a binary ui64 literal. + const char Uint64Marker = '\x06'; + + //////////////////////////////////////////////////////////////////////////////// + + template <bool EnableLinePositionInfo> + class TPositionInfo; + + template <> + class TPositionInfo<true> { + private: + int Offset; + int Line; + int Column; + + public: + TPositionInfo() + : Offset(0) + , Line(1) + , Column(1) + { + } + + void OnRangeConsumed(const char* begin, const char* end) { + Offset += end - begin; + for (auto current = begin; current != end; ++current) { + ++Column; + if (*current == '\n') { //TODO: memchr + ++Line; + Column = 1; + } + } + } + }; + + template <> + class TPositionInfo<false> { + private: + int Offset; + + public: + TPositionInfo() + : Offset(0) + { + } + + void OnRangeConsumed(const char* begin, const char* end) { + Offset += end - begin; + } + }; + + template <class TBlockStream, class TPositionBase> + class TCharStream + : public TBlockStream, + public TPositionBase { + public: + TCharStream(const TBlockStream& blockStream) + : TBlockStream(blockStream) + { + } + + bool IsEmpty() const { + return TBlockStream::Begin() == TBlockStream::End(); + } + + template <bool AllowFinish> + void Refresh() { + while (IsEmpty() && !TBlockStream::IsFinished()) { + TBlockStream::RefreshBlock(); + } + if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) { + ythrow TYsonException() << "Premature end of yson stream"; + } + } + + void Refresh() { + return Refresh<false>(); + } + + template <bool AllowFinish> + char GetChar() { + Refresh<AllowFinish>(); + return !IsEmpty() ? *TBlockStream::Begin() : '\0'; + } + + char GetChar() { + return GetChar<false>(); + } + + void Advance(size_t bytes) { + TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes); + TBlockStream::Advance(bytes); + } + + size_t Length() const { + return TBlockStream::End() - TBlockStream::Begin(); + } + }; + + template <class TBaseStream> + class TCodedStream + : public TBaseStream { + private: + static const int MaxVarintBytes = 10; + static const int MaxVarint32Bytes = 5; + + const ui8* BeginByte() const { + return reinterpret_cast<const ui8*>(TBaseStream::Begin()); + } + + const ui8* EndByte() const { + return reinterpret_cast<const ui8*>(TBaseStream::End()); + } + + // Following functions is an adaptation Protobuf code from coded_stream.cc + bool ReadVarint32FromArray(ui32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + const ui8* ptr = BeginByte(); + ui32 b; + ui32 result; + + b = *(ptr++); + result = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= b << 28; + if (!(b & 0x80)) + goto done; + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + + for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) { + b = *(ptr++); + if (!(b & 0x80)) + goto done; + } + + // We have overrun the maximum size of a Varint (10 bytes). Assume + // the data is corrupt. + return false; + + done: + TBaseStream::Advance(ptr - BeginByte()); + *value = result; + return true; + } + + bool ReadVarint32Fallback(ui32* value) { + if (BeginByte() + MaxVarint32Bytes <= EndByte() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) + { + return ReadVarint32FromArray(value); + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarint32Slow(value); + } + } + + bool ReadVarint32Slow(ui32* value) { + ui64 result; + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte Varints. + if (ReadVarint64Fallback(&result)) { + *value = static_cast<ui32>(result); + return true; + } else { + return false; + } + } + + bool ReadVarint64Slow(ui64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + ui64 result = 0; + int count = 0; + ui32 b; + + do { + if (count == MaxVarintBytes) { + return false; + } + while (BeginByte() == EndByte()) { + TBaseStream::Refresh(); + } + b = *BeginByte(); + result |= static_cast<ui64>(b & 0x7F) << (7 * count); + TBaseStream::Advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; + } + + bool ReadVarint64Fallback(ui64* value) { + if (BeginByte() + MaxVarintBytes <= EndByte() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) + { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + + const ui8* ptr = BeginByte(); + ui32 b; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + ui32 part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); + part0 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + + // We have overrun the maximum size of a Varint (10 bytes). The data + // must be corrupt. + return false; + + done: + TBaseStream::Advance(ptr - BeginByte()); + *value = (static_cast<ui64>(part0)) | + (static_cast<ui64>(part1) << 28) | + (static_cast<ui64>(part2) << 56); + return true; + } else { + return ReadVarint64Slow(value); + } + } + + public: + TCodedStream(const TBaseStream& baseStream) + : TBaseStream(baseStream) + { + } + + bool ReadVarint64(ui64* value) { + if (BeginByte() < EndByte() && *BeginByte() < 0x80) { + *value = *BeginByte(); + TBaseStream::Advance(1); + return true; + } else { + return ReadVarint64Fallback(value); + } + } + + bool ReadVarint32(ui32* value) { + if (BeginByte() < EndByte() && *BeginByte() < 0x80) { + *value = *BeginByte(); + TBaseStream::Advance(1); + return true; + } else { + return ReadVarint32Fallback(value); + } + } + }; + + enum ENumericResult { + Int64 = 0, + Uint64 = 1, + Double = 2 + }; + + template <class TBlockStream, bool EnableLinePositionInfo> + class TLexerBase + : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> { + private: + using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>; + TVector<char> Buffer_; + TMaybe<ui64> MemoryLimit_; + + void CheckMemoryLimit() { + if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) { ythrow TYsonException() << "Memory limit exceeded while parsing YSON stream: allocated " << Buffer_.capacity() << ", limit " << (*MemoryLimit_); - } - } - - public: - TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) - : TBaseStream(blockStream) - , MemoryLimit_(memoryLimit) - { - } - - protected: - /// Lexer routines - - template <bool AllowFinish> - ENumericResult ReadNumeric(TStringBuf* value) { - Buffer_.clear(); - ENumericResult result = ENumericResult::Int64; - while (true) { - char ch = TBaseStream::template GetChar<AllowFinish>(); - if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-' - Buffer_.push_back(ch); - } else if (ch == '.' || ch == 'e' || ch == 'E') { - Buffer_.push_back(ch); - result = ENumericResult::Double; - } else if (ch == 'u') { - Buffer_.push_back(ch); - result = ENumericResult::Uint64; - } else if (isalpha(ch)) { + } + } + + public: + TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) + : TBaseStream(blockStream) + , MemoryLimit_(memoryLimit) + { + } + + protected: + /// Lexer routines + + template <bool AllowFinish> + ENumericResult ReadNumeric(TStringBuf* value) { + Buffer_.clear(); + ENumericResult result = ENumericResult::Int64; + while (true) { + char ch = TBaseStream::template GetChar<AllowFinish>(); + if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-' + Buffer_.push_back(ch); + } else if (ch == '.' || ch == 'e' || ch == 'E') { + Buffer_.push_back(ch); + result = ENumericResult::Double; + } else if (ch == 'u') { + Buffer_.push_back(ch); + result = ENumericResult::Uint64; + } else if (isalpha(ch)) { ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal"; - } else { - break; - } - CheckMemoryLimit(); - TBaseStream::Advance(1); - } + } else { + break; + } + CheckMemoryLimit(); + TBaseStream::Advance(1); + } - *value = TStringBuf(Buffer_.data(), Buffer_.size()); - return result; + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + return result; } - template <bool AllowFinish> - double ReadNanOrInf() { + template <bool AllowFinish> + double ReadNanOrInf() { static const TStringBuf nanString = "nan"; static const TStringBuf infString = "inf"; static const TStringBuf plusInfString = "+inf"; static const TStringBuf minusInfString = "-inf"; - TStringBuf expectedString; - double expectedValue; - char ch = TBaseStream::template GetChar<AllowFinish>(); - switch (ch) { - case '+': - expectedString = plusInfString; - expectedValue = std::numeric_limits<double>::infinity(); - break; - case '-': - expectedString = minusInfString; - expectedValue = -std::numeric_limits<double>::infinity(); - break; - case 'i': - expectedString = infString; - expectedValue = std::numeric_limits<double>::infinity(); - break; - case 'n': - expectedString = nanString; - expectedValue = std::numeric_limits<double>::quiet_NaN(); - break; - default: - ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'"; - } + TStringBuf expectedString; + double expectedValue; + char ch = TBaseStream::template GetChar<AllowFinish>(); + switch (ch) { + case '+': + expectedString = plusInfString; + expectedValue = std::numeric_limits<double>::infinity(); + break; + case '-': + expectedString = minusInfString; + expectedValue = -std::numeric_limits<double>::infinity(); + break; + case 'i': + expectedString = infString; + expectedValue = std::numeric_limits<double>::infinity(); + break; + case 'n': + expectedString = nanString; + expectedValue = std::numeric_limits<double>::quiet_NaN(); + break; + default: + ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'"; + } for (size_t i = 0; i < expectedString.size(); ++i) { - if (expectedString[i] != ch) { - ythrow TYsonException() - << "Incorrect %-literal prefix " - << "'" << expectedString.SubStr(0, i) << ch << "'," - << "expected " << expectedString; - } - TBaseStream::Advance(1); - ch = TBaseStream::template GetChar<AllowFinish>(); - } - - return expectedValue; - } - - void ReadQuotedString(TStringBuf* value) { - Buffer_.clear(); - while (true) { - if (TBaseStream::IsEmpty()) { - TBaseStream::Refresh(); - } - char ch = *TBaseStream::Begin(); - TBaseStream::Advance(1); - if (ch != '"') { - Buffer_.push_back(ch); - } else { - // We must count the number of '\' at the end of StringValue - // to check if it's not \" - int slashCount = 0; - int length = Buffer_.size(); - while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') { - ++slashCount; - } - if (slashCount % 2 == 0) { - break; - } else { - Buffer_.push_back(ch); - } - } - CheckMemoryLimit(); - } - - auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size()); - Buffer_.clear(); - Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size()); - CheckMemoryLimit(); - *value = TStringBuf(Buffer_.data(), Buffer_.size()); - } - - template <bool AllowFinish> - void ReadUnquotedString(TStringBuf* value) { - Buffer_.clear(); - while (true) { - char ch = TBaseStream::template GetChar<AllowFinish>(); - if (isalpha(ch) || isdigit(ch) || - ch == '_' || ch == '-' || ch == '%' || ch == '.') { - Buffer_.push_back(ch); - } else { - break; - } - CheckMemoryLimit(); - TBaseStream::Advance(1); + if (expectedString[i] != ch) { + ythrow TYsonException() + << "Incorrect %-literal prefix " + << "'" << expectedString.SubStr(0, i) << ch << "'," + << "expected " << expectedString; + } + TBaseStream::Advance(1); + ch = TBaseStream::template GetChar<AllowFinish>(); + } + + return expectedValue; + } + + void ReadQuotedString(TStringBuf* value) { + Buffer_.clear(); + while (true) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + } + char ch = *TBaseStream::Begin(); + TBaseStream::Advance(1); + if (ch != '"') { + Buffer_.push_back(ch); + } else { + // We must count the number of '\' at the end of StringValue + // to check if it's not \" + int slashCount = 0; + int length = Buffer_.size(); + while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') { + ++slashCount; + } + if (slashCount % 2 == 0) { + break; + } else { + Buffer_.push_back(ch); + } + } + CheckMemoryLimit(); + } + + auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size()); + Buffer_.clear(); + Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size()); + CheckMemoryLimit(); + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + + template <bool AllowFinish> + void ReadUnquotedString(TStringBuf* value) { + Buffer_.clear(); + while (true) { + char ch = TBaseStream::template GetChar<AllowFinish>(); + if (isalpha(ch) || isdigit(ch) || + ch == '_' || ch == '-' || ch == '%' || ch == '.') { + Buffer_.push_back(ch); + } else { + break; + } + CheckMemoryLimit(); + TBaseStream::Advance(1); } - *value = TStringBuf(Buffer_.data(), Buffer_.size()); + *value = TStringBuf(Buffer_.data(), Buffer_.size()); } - void ReadUnquotedString(TStringBuf* value) { - return ReadUnquotedString<false>(value); + void ReadUnquotedString(TStringBuf* value) { + return ReadUnquotedString<false>(value); } - void ReadBinaryString(TStringBuf* value) { - ui32 ulength = 0; - if (!TBaseStream::ReadVarint32(&ulength)) { - ythrow TYsonException() << "Error parsing varint value"; - } + void ReadBinaryString(TStringBuf* value) { + ui32 ulength = 0; + if (!TBaseStream::ReadVarint32(&ulength)) { + ythrow TYsonException() << "Error parsing varint value"; + } - i32 length = ZigZagDecode32(ulength); - if (length < 0) { + i32 length = ZigZagDecode32(ulength); + if (length < 0) { ythrow TYsonException() << "Negative binary string literal length " << length; - } - - if (TBaseStream::Begin() + length <= TBaseStream::End()) { - *value = TStringBuf(TBaseStream::Begin(), length); - TBaseStream::Advance(length); - } else { // reading in Buffer - size_t needToRead = length; - Buffer_.clear(); - while (needToRead) { - if (TBaseStream::IsEmpty()) { - TBaseStream::Refresh(); - continue; - } - size_t readingBytes = Min(needToRead, TBaseStream::Length()); - - Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes); - CheckMemoryLimit(); - needToRead -= readingBytes; - TBaseStream::Advance(readingBytes); - } - *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + + if (TBaseStream::Begin() + length <= TBaseStream::End()) { + *value = TStringBuf(TBaseStream::Begin(), length); + TBaseStream::Advance(length); + } else { // reading in Buffer + size_t needToRead = length; + Buffer_.clear(); + while (needToRead) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + continue; + } + size_t readingBytes = Min(needToRead, TBaseStream::Length()); + + Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes); + CheckMemoryLimit(); + needToRead -= readingBytes; + TBaseStream::Advance(readingBytes); + } + *value = TStringBuf(Buffer_.data(), Buffer_.size()); } } - template <bool AllowFinish> - bool ReadBoolean() { - Buffer_.clear(); + template <bool AllowFinish> + bool ReadBoolean() { + Buffer_.clear(); static TStringBuf trueString = "true"; static TStringBuf falseString = "false"; - auto throwIncorrectBoolean = [&]() { + auto throwIncorrectBoolean = [&]() { ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size()); - }; + }; Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); TBaseStream::Advance(1); - if (Buffer_[0] == trueString[0]) { + if (Buffer_[0] == trueString[0]) { for (size_t i = 1; i < trueString.size(); ++i) { - Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); - TBaseStream::Advance(1); - if (Buffer_.back() != trueString[i]) { - throwIncorrectBoolean(); - } - } - return true; - } else if (Buffer_[0] == falseString[0]) { + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_.back() != trueString[i]) { + throwIncorrectBoolean(); + } + } + return true; + } else if (Buffer_[0] == falseString[0]) { for (size_t i = 1; i < falseString.size(); ++i) { - Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); - TBaseStream::Advance(1); - if (Buffer_.back() != falseString[i]) { - throwIncorrectBoolean(); - } - } - return false; - } else { + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_.back() != falseString[i]) { + throwIncorrectBoolean(); + } + } + return false; + } else { throwIncorrectBoolean(); } - - Y_FAIL("unreachable"); - ; - } - - void ReadBinaryInt64(i64* result) { - ui64 uvalue; - if (!TBaseStream::ReadVarint64(&uvalue)) { - ythrow TYsonException() << "Error parsing varint value"; + + Y_FAIL("unreachable"); + ; + } + + void ReadBinaryInt64(i64* result) { + ui64 uvalue; + if (!TBaseStream::ReadVarint64(&uvalue)) { + ythrow TYsonException() << "Error parsing varint value"; + } + *result = ZigZagDecode64(uvalue); + } + + void ReadBinaryUint64(ui64* result) { + ui64 uvalue; + if (!TBaseStream::ReadVarint64(&uvalue)) { + ythrow TYsonException() << "Error parsing varint value"; } - *result = ZigZagDecode64(uvalue); - } - - void ReadBinaryUint64(ui64* result) { - ui64 uvalue; - if (!TBaseStream::ReadVarint64(&uvalue)) { - ythrow TYsonException() << "Error parsing varint value"; - } - *result = uvalue; - } - - void ReadBinaryDouble(double* value) { - size_t needToRead = sizeof(double); - - while (needToRead != 0) { - if (TBaseStream::IsEmpty()) { - TBaseStream::Refresh(); - continue; - } - - size_t chunkSize = Min(needToRead, TBaseStream::Length()); - if (chunkSize == 0) { - ythrow TYsonException() << "Error parsing binary double literal"; - } - std::copy( - TBaseStream::Begin(), - TBaseStream::Begin() + chunkSize, - reinterpret_cast<char*>(value) + (sizeof(double) - needToRead)); - needToRead -= chunkSize; - TBaseStream::Advance(chunkSize); - } - } - - /// Helpers - void SkipCharToken(char symbol) { - char ch = SkipSpaceAndGetChar(); - if (ch != symbol) { + *result = uvalue; + } + + void ReadBinaryDouble(double* value) { + size_t needToRead = sizeof(double); + + while (needToRead != 0) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + continue; + } + + size_t chunkSize = Min(needToRead, TBaseStream::Length()); + if (chunkSize == 0) { + ythrow TYsonException() << "Error parsing binary double literal"; + } + std::copy( + TBaseStream::Begin(), + TBaseStream::Begin() + chunkSize, + reinterpret_cast<char*>(value) + (sizeof(double) - needToRead)); + needToRead -= chunkSize; + TBaseStream::Advance(chunkSize); + } + } + + /// Helpers + void SkipCharToken(char symbol) { + char ch = SkipSpaceAndGetChar(); + if (ch != symbol) { ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'"; - } - - TBaseStream::Advance(1); - } - - static bool IsSpaceFast(char ch) { - static const ui8 lookupTable[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - return lookupTable[static_cast<ui8>(ch)]; - } - - template <bool AllowFinish> - char SkipSpaceAndGetChar() { - if (!TBaseStream::IsEmpty()) { - char ch = *TBaseStream::Begin(); - if (!IsSpaceFast(ch)) { - return ch; - } - } - return SkipSpaceAndGetCharFallback<AllowFinish>(); - } - - char SkipSpaceAndGetChar() { - return SkipSpaceAndGetChar<false>(); - } - - template <bool AllowFinish> - char SkipSpaceAndGetCharFallback() { - while (true) { - if (TBaseStream::IsEmpty()) { - if (TBaseStream::IsFinished()) { - return '\0'; - } - TBaseStream::template Refresh<AllowFinish>(); - continue; - } - if (!IsSpaceFast(*TBaseStream::Begin())) { - break; - } - TBaseStream::Advance(1); } - return TBaseStream::template GetChar<AllowFinish>(); - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - } - - //////////////////////////////////////////////////////////////////////////////// - - class TStringReader { - private: - const char* BeginPtr; - const char* EndPtr; - - public: - TStringReader() - : BeginPtr(nullptr) - , EndPtr(nullptr) - { - } - - TStringReader(const char* begin, const char* end) - : BeginPtr(begin) - , EndPtr(end) - { - } - - const char* Begin() const { - return BeginPtr; - } - - const char* End() const { - return EndPtr; - } - - void RefreshBlock() { - Y_FAIL("unreachable"); - } - - void Advance(size_t bytes) { - BeginPtr += bytes; - } - - bool IsFinished() const { - return true; - } - - void SetBuffer(const char* begin, const char* end) { - BeginPtr = begin; - EndPtr = end; - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - class TStreamReader { - public: - TStreamReader( - IInputStream* stream, - char* buffer, - size_t bufferSize) - : Stream(stream) - , Buffer(buffer) - , BufferSize(bufferSize) - { - BeginPtr = EndPtr = Buffer; - FinishFlag = false; - } - - const char* Begin() const { - return BeginPtr; - } - - const char* End() const { - return EndPtr; - } - - void RefreshBlock() { - size_t bytes = Stream->Read(Buffer, BufferSize); - BeginPtr = Buffer; - EndPtr = Buffer + bytes; - FinishFlag = (bytes == 0); - } - - void Advance(size_t bytes) { - BeginPtr += bytes; - } - - bool IsFinished() const { - return FinishFlag; - } - - private: - IInputStream* Stream; - char* Buffer; - size_t BufferSize; - - const char* BeginPtr; - const char* EndPtr; - bool FinishFlag; - }; - - //////////////////////////////////////////////////////////////////////////////// + + TBaseStream::Advance(1); + } + + static bool IsSpaceFast(char ch) { + static const ui8 lookupTable[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return lookupTable[static_cast<ui8>(ch)]; + } + + template <bool AllowFinish> + char SkipSpaceAndGetChar() { + if (!TBaseStream::IsEmpty()) { + char ch = *TBaseStream::Begin(); + if (!IsSpaceFast(ch)) { + return ch; + } + } + return SkipSpaceAndGetCharFallback<AllowFinish>(); + } + + char SkipSpaceAndGetChar() { + return SkipSpaceAndGetChar<false>(); + } + + template <bool AllowFinish> + char SkipSpaceAndGetCharFallback() { + while (true) { + if (TBaseStream::IsEmpty()) { + if (TBaseStream::IsFinished()) { + return '\0'; + } + TBaseStream::template Refresh<AllowFinish>(); + continue; + } + if (!IsSpaceFast(*TBaseStream::Begin())) { + break; + } + TBaseStream::Advance(1); + } + return TBaseStream::template GetChar<AllowFinish>(); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + } + + //////////////////////////////////////////////////////////////////////////////// + + class TStringReader { + private: + const char* BeginPtr; + const char* EndPtr; + + public: + TStringReader() + : BeginPtr(nullptr) + , EndPtr(nullptr) + { + } + + TStringReader(const char* begin, const char* end) + : BeginPtr(begin) + , EndPtr(end) + { + } + + const char* Begin() const { + return BeginPtr; + } + + const char* End() const { + return EndPtr; + } + + void RefreshBlock() { + Y_FAIL("unreachable"); + } + + void Advance(size_t bytes) { + BeginPtr += bytes; + } + + bool IsFinished() const { + return true; + } + + void SetBuffer(const char* begin, const char* end) { + BeginPtr = begin; + EndPtr = end; + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + class TStreamReader { + public: + TStreamReader( + IInputStream* stream, + char* buffer, + size_t bufferSize) + : Stream(stream) + , Buffer(buffer) + , BufferSize(bufferSize) + { + BeginPtr = EndPtr = Buffer; + FinishFlag = false; + } + + const char* Begin() const { + return BeginPtr; + } + + const char* End() const { + return EndPtr; + } + + void RefreshBlock() { + size_t bytes = Stream->Read(Buffer, BufferSize); + BeginPtr = Buffer; + EndPtr = Buffer + bytes; + FinishFlag = (bytes == 0); + } + + void Advance(size_t bytes) { + BeginPtr += bytes; + } + + bool IsFinished() const { + return FinishFlag; + } + + private: + IInputStream* Stream; + char* Buffer; + size_t BufferSize; + + const char* BeginPtr; + const char* EndPtr; + bool FinishFlag; + }; + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/format.h b/library/cpp/yson/format.h index 77abfba24f..2ff6dc9f6e 100644 --- a/library/cpp/yson/format.h +++ b/library/cpp/yson/format.h @@ -3,23 +3,23 @@ #include "token.h" namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - const ETokenType BeginListToken = LeftBracket; - const ETokenType EndListToken = RightBracket; + const ETokenType BeginListToken = LeftBracket; + const ETokenType EndListToken = RightBracket; - const ETokenType BeginMapToken = LeftBrace; - const ETokenType EndMapToken = RightBrace; + const ETokenType BeginMapToken = LeftBrace; + const ETokenType EndMapToken = RightBrace; - const ETokenType BeginAttributesToken = LeftAngle; - const ETokenType EndAttributesToken = RightAngle; + const ETokenType BeginAttributesToken = LeftAngle; + const ETokenType EndAttributesToken = RightAngle; - const ETokenType ListItemSeparatorToken = Semicolon; - const ETokenType KeyedItemSeparatorToken = Semicolon; - const ETokenType KeyValueSeparatorToken = Equals; + const ETokenType ListItemSeparatorToken = Semicolon; + const ETokenType KeyedItemSeparatorToken = Semicolon; + const ETokenType KeyValueSeparatorToken = Equals; - const ETokenType EntityToken = Hash; + const ETokenType EntityToken = Hash; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/json/json_writer.cpp b/library/cpp/yson/json/json_writer.cpp index f65144bd67..87481256ec 100644 --- a/library/cpp/yson/json/json_writer.cpp +++ b/library/cpp/yson/json/json_writer.cpp @@ -3,20 +3,20 @@ #include <library/cpp/json/json_writer.h> namespace NYT { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - static bool IsSpecialJsonKey(const TStringBuf& key) { - return key.size() > 0 && key[0] == '$'; - } + static bool IsSpecialJsonKey(const TStringBuf& key) { + return key.size() > 0 && key[0] == '$'; + } - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - TJsonWriter::TJsonWriter( - IOutputStream* output, + TJsonWriter::TJsonWriter( + IOutputStream* output, ::NYson::EYsonType type, - EJsonFormat format, - EJsonAttributesMode attributesMode, - ESerializedBoolFormat booleanFormat) + EJsonFormat format, + EJsonAttributesMode attributesMode, + ESerializedBoolFormat booleanFormat) : TJsonWriter( output, NJson::TJsonWriterConfig{}.SetFormatOutput(format == JF_PRETTY), @@ -32,189 +32,189 @@ namespace NYT { ::NYson::EYsonType type, EJsonAttributesMode attributesMode, ESerializedBoolFormat booleanFormat) - : Output(output) - , Type(type) - , AttributesMode(attributesMode) - , BooleanFormat(booleanFormat) - , Depth(0) - { + : Output(output) + , Type(type) + , AttributesMode(attributesMode) + , BooleanFormat(booleanFormat) + , Depth(0) + { if (Type == ::NYson::EYsonType::MapFragment) { ythrow ::NYson::TYsonException() << ("Map fragments are not supported by Json"); - } + } - UnderlyingJsonWriter.Reset(new NJson::TJsonWriter( - output, + UnderlyingJsonWriter.Reset(new NJson::TJsonWriter( + output, config)); - JsonWriter = UnderlyingJsonWriter.Get(); - HasAttributes = false; - InAttributesBalance = 0; - } - - void TJsonWriter::EnterNode() { - if (AttributesMode == JAM_NEVER) { - HasAttributes = false; - } else if (AttributesMode == JAM_ON_DEMAND) { - // Do nothing - } else if (AttributesMode == JAM_ALWAYS) { - if (!HasAttributes) { - JsonWriter->OpenMap(); - JsonWriter->Write("$attributes"); - JsonWriter->OpenMap(); - JsonWriter->CloseMap(); - } - HasAttributes = true; - } - HasUnfoldedStructureStack.push_back(HasAttributes); - - if (HasAttributes) { - JsonWriter->Write("$value"); - HasAttributes = false; - } - - Depth += 1; - } - - void TJsonWriter::LeaveNode() { - Y_ASSERT(!HasUnfoldedStructureStack.empty()); - if (HasUnfoldedStructureStack.back()) { - // Close map of the {$attributes, $value} - JsonWriter->CloseMap(); - } - HasUnfoldedStructureStack.pop_back(); - - Depth -= 1; - + JsonWriter = UnderlyingJsonWriter.Get(); + HasAttributes = false; + InAttributesBalance = 0; + } + + void TJsonWriter::EnterNode() { + if (AttributesMode == JAM_NEVER) { + HasAttributes = false; + } else if (AttributesMode == JAM_ON_DEMAND) { + // Do nothing + } else if (AttributesMode == JAM_ALWAYS) { + if (!HasAttributes) { + JsonWriter->OpenMap(); + JsonWriter->Write("$attributes"); + JsonWriter->OpenMap(); + JsonWriter->CloseMap(); + } + HasAttributes = true; + } + HasUnfoldedStructureStack.push_back(HasAttributes); + + if (HasAttributes) { + JsonWriter->Write("$value"); + HasAttributes = false; + } + + Depth += 1; + } + + void TJsonWriter::LeaveNode() { + Y_ASSERT(!HasUnfoldedStructureStack.empty()); + if (HasUnfoldedStructureStack.back()) { + // Close map of the {$attributes, $value} + JsonWriter->CloseMap(); + } + HasUnfoldedStructureStack.pop_back(); + + Depth -= 1; + if (Depth == 0 && Type == ::NYson::EYsonType::ListFragment && InAttributesBalance == 0) { - JsonWriter->Flush(); - Output->Write("\n"); - } + JsonWriter->Flush(); + Output->Write("\n"); + } } - bool TJsonWriter::IsWriteAllowed() { - if (AttributesMode == JAM_NEVER) { - return InAttributesBalance == 0; - } - return true; + bool TJsonWriter::IsWriteAllowed() { + if (AttributesMode == JAM_NEVER) { + return InAttributesBalance == 0; + } + return true; } void TJsonWriter::OnStringScalar(TStringBuf value) { - if (IsWriteAllowed()) { - EnterNode(); - WriteStringScalar(value); - LeaveNode(); - } - } - - void TJsonWriter::OnInt64Scalar(i64 value) { - if (IsWriteAllowed()) { - EnterNode(); - JsonWriter->Write(value); - LeaveNode(); - } - } - - void TJsonWriter::OnUint64Scalar(ui64 value) { - if (IsWriteAllowed()) { - EnterNode(); - JsonWriter->Write(value); - LeaveNode(); - } - } - - void TJsonWriter::OnDoubleScalar(double value) { - if (IsWriteAllowed()) { - EnterNode(); - JsonWriter->Write(value); - LeaveNode(); - } - } - - void TJsonWriter::OnBooleanScalar(bool value) { - if (IsWriteAllowed()) { - if (BooleanFormat == SBF_STRING) { - OnStringScalar(value ? "true" : "false"); - } else { - EnterNode(); - JsonWriter->Write(value); - LeaveNode(); - } - } - } - - void TJsonWriter::OnEntity() { - if (IsWriteAllowed()) { + if (IsWriteAllowed()) { + EnterNode(); + WriteStringScalar(value); + LeaveNode(); + } + } + + void TJsonWriter::OnInt64Scalar(i64 value) { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } + + void TJsonWriter::OnUint64Scalar(ui64 value) { + if (IsWriteAllowed()) { EnterNode(); - JsonWriter->WriteNull(); + JsonWriter->Write(value); LeaveNode(); } } - void TJsonWriter::OnBeginList() { - if (IsWriteAllowed()) { - EnterNode(); - JsonWriter->OpenArray(); - } + void TJsonWriter::OnDoubleScalar(double value) { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } + + void TJsonWriter::OnBooleanScalar(bool value) { + if (IsWriteAllowed()) { + if (BooleanFormat == SBF_STRING) { + OnStringScalar(value ? "true" : "false"); + } else { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } } - void TJsonWriter::OnListItem() { + void TJsonWriter::OnEntity() { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->WriteNull(); + LeaveNode(); + } } - void TJsonWriter::OnEndList() { - if (IsWriteAllowed()) { - JsonWriter->CloseArray(); - LeaveNode(); - } - } + void TJsonWriter::OnBeginList() { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->OpenArray(); + } + } - void TJsonWriter::OnBeginMap() { - if (IsWriteAllowed()) { - EnterNode(); - JsonWriter->OpenMap(); - } + void TJsonWriter::OnListItem() { + } + + void TJsonWriter::OnEndList() { + if (IsWriteAllowed()) { + JsonWriter->CloseArray(); + LeaveNode(); + } + } + + void TJsonWriter::OnBeginMap() { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->OpenMap(); + } } void TJsonWriter::OnKeyedItem(TStringBuf name) { - if (IsWriteAllowed()) { - if (IsSpecialJsonKey(name)) { - WriteStringScalar(TString("$") + name); - } else { - WriteStringScalar(name); - } - } + if (IsWriteAllowed()) { + if (IsSpecialJsonKey(name)) { + WriteStringScalar(TString("$") + name); + } else { + WriteStringScalar(name); + } + } + } + + void TJsonWriter::OnEndMap() { + if (IsWriteAllowed()) { + JsonWriter->CloseMap(); + LeaveNode(); + } } - void TJsonWriter::OnEndMap() { - if (IsWriteAllowed()) { - JsonWriter->CloseMap(); - LeaveNode(); + void TJsonWriter::OnBeginAttributes() { + InAttributesBalance += 1; + if (AttributesMode != JAM_NEVER) { + JsonWriter->OpenMap(); + JsonWriter->Write("$attributes"); + JsonWriter->OpenMap(); } } - void TJsonWriter::OnBeginAttributes() { - InAttributesBalance += 1; - if (AttributesMode != JAM_NEVER) { - JsonWriter->OpenMap(); - JsonWriter->Write("$attributes"); - JsonWriter->OpenMap(); - } + void TJsonWriter::OnEndAttributes() { + InAttributesBalance -= 1; + if (AttributesMode != JAM_NEVER) { + HasAttributes = true; + JsonWriter->CloseMap(); + } } - void TJsonWriter::OnEndAttributes() { - InAttributesBalance -= 1; - if (AttributesMode != JAM_NEVER) { - HasAttributes = true; - JsonWriter->CloseMap(); - } + void TJsonWriter::WriteStringScalar(const TStringBuf& value) { + JsonWriter->Write(value); } - void TJsonWriter::WriteStringScalar(const TStringBuf& value) { - JsonWriter->Write(value); + void TJsonWriter::Flush() { + JsonWriter->Flush(); } - void TJsonWriter::Flush() { - JsonWriter->Flush(); - } + //////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////// - } diff --git a/library/cpp/yson/json/json_writer.h b/library/cpp/yson/json/json_writer.h index 8dca12fe0e..d84ac0de53 100644 --- a/library/cpp/yson/json/json_writer.h +++ b/library/cpp/yson/json/json_writer.h @@ -8,33 +8,33 @@ #include <util/generic/vector.h> namespace NYT { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - enum EJsonFormat { - JF_TEXT, - JF_PRETTY - }; + enum EJsonFormat { + JF_TEXT, + JF_PRETTY + }; - enum EJsonAttributesMode { - JAM_NEVER, - JAM_ON_DEMAND, - JAM_ALWAYS - }; + enum EJsonAttributesMode { + JAM_NEVER, + JAM_ON_DEMAND, + JAM_ALWAYS + }; - enum ESerializedBoolFormat { - SBF_BOOLEAN, - SBF_STRING - }; + enum ESerializedBoolFormat { + SBF_BOOLEAN, + SBF_STRING + }; - class TJsonWriter + class TJsonWriter : public ::NYson::TYsonConsumerBase { - public: - TJsonWriter( - IOutputStream* output, + public: + TJsonWriter( + IOutputStream* output, ::NYson::EYsonType type = ::NYson::EYsonType::Node, - EJsonFormat format = JF_TEXT, - EJsonAttributesMode attributesMode = JAM_ON_DEMAND, - ESerializedBoolFormat booleanFormat = SBF_STRING); + EJsonFormat format = JF_TEXT, + EJsonAttributesMode attributesMode = JAM_ON_DEMAND, + ESerializedBoolFormat booleanFormat = SBF_STRING); TJsonWriter( IOutputStream* output, @@ -43,47 +43,47 @@ namespace NYT { EJsonAttributesMode attributesMode = JAM_ON_DEMAND, ESerializedBoolFormat booleanFormat = SBF_STRING); - void Flush(); + void Flush(); void OnStringScalar(TStringBuf value) override; - void OnInt64Scalar(i64 value) override; - void OnUint64Scalar(ui64 value) override; - void OnDoubleScalar(double value) override; - void OnBooleanScalar(bool value) override; + void OnInt64Scalar(i64 value) override; + void OnUint64Scalar(ui64 value) override; + void OnDoubleScalar(double value) override; + void OnBooleanScalar(bool value) override; - void OnEntity() override; + void OnEntity() override; - void OnBeginList() override; - void OnListItem() override; - void OnEndList() override; + void OnBeginList() override; + void OnListItem() override; + void OnEndList() override; - void OnBeginMap() override; + void OnBeginMap() override; void OnKeyedItem(TStringBuf key) override; - void OnEndMap() override; + void OnEndMap() override; - void OnBeginAttributes() override; - void OnEndAttributes() override; + void OnBeginAttributes() override; + void OnEndAttributes() override; - private: - THolder<NJson::TJsonWriter> UnderlyingJsonWriter; - NJson::TJsonWriter* JsonWriter; - IOutputStream* Output; + private: + THolder<NJson::TJsonWriter> UnderlyingJsonWriter; + NJson::TJsonWriter* JsonWriter; + IOutputStream* Output; ::NYson::EYsonType Type; - EJsonAttributesMode AttributesMode; - ESerializedBoolFormat BooleanFormat; + EJsonAttributesMode AttributesMode; + ESerializedBoolFormat BooleanFormat; - void WriteStringScalar(const TStringBuf& value); + void WriteStringScalar(const TStringBuf& value); - void EnterNode(); - void LeaveNode(); - bool IsWriteAllowed(); + void EnterNode(); + void LeaveNode(); + bool IsWriteAllowed(); - TVector<bool> HasUnfoldedStructureStack; - int InAttributesBalance; - bool HasAttributes; - int Depth; - }; + TVector<bool> HasUnfoldedStructureStack; + int InAttributesBalance; + bool HasAttributes; + int Depth; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// -} +} diff --git a/library/cpp/yson/json/yson2json_adapter.h b/library/cpp/yson/json/yson2json_adapter.h index 2a069c1f08..da1bf5ba70 100644 --- a/library/cpp/yson/json/yson2json_adapter.h +++ b/library/cpp/yson/json/yson2json_adapter.h @@ -50,4 +50,4 @@ namespace NYT { ::NYson::TYsonConsumerBase* Impl_; TState State_; }; -} +} diff --git a/library/cpp/yson/lexer.cpp b/library/cpp/yson/lexer.cpp index da0021cc4f..5eae94273b 100644 --- a/library/cpp/yson/lexer.cpp +++ b/library/cpp/yson/lexer.cpp @@ -5,39 +5,39 @@ #include <util/generic/ptr.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - class TStatelessLexer::TImpl { - private: - THolder<TStatelessYsonLexerImplBase> Impl; - - public: - TImpl(bool enableLinePositionInfo = false) - : Impl(enableLinePositionInfo - ? static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<true>()) - : static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<false>())) - { - } - - size_t GetToken(const TStringBuf& data, TToken* token) { - return Impl->GetToken(data, token); - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - TStatelessLexer::TStatelessLexer() - : Impl(new TImpl()) + //////////////////////////////////////////////////////////////////////////////// + + class TStatelessLexer::TImpl { + private: + THolder<TStatelessYsonLexerImplBase> Impl; + + public: + TImpl(bool enableLinePositionInfo = false) + : Impl(enableLinePositionInfo + ? static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<true>()) + : static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<false>())) + { + } + + size_t GetToken(const TStringBuf& data, TToken* token) { + return Impl->GetToken(data, token); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + TStatelessLexer::TStatelessLexer() + : Impl(new TImpl()) { } - TStatelessLexer::~TStatelessLexer() { - } + TStatelessLexer::~TStatelessLexer() { + } - size_t TStatelessLexer::GetToken(const TStringBuf& data, TToken* token) { - return Impl->GetToken(data, token); - } + size_t TStatelessLexer::GetToken(const TStringBuf& data, TToken* token) { + return Impl->GetToken(data, token); + } - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/lexer.h b/library/cpp/yson/lexer.h index 3855192ba7..d9d701874d 100644 --- a/library/cpp/yson/lexer.h +++ b/library/cpp/yson/lexer.h @@ -6,21 +6,21 @@ #include <util/generic/ptr.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TStatelessLexer { - public: - TStatelessLexer(); + class TStatelessLexer { + public: + TStatelessLexer(); - ~TStatelessLexer(); + ~TStatelessLexer(); - size_t GetToken(const TStringBuf& data, TToken* token); + size_t GetToken(const TStringBuf& data, TToken* token); - private: - class TImpl; - THolder<TImpl> Impl; - }; + private: + class TImpl; + THolder<TImpl> Impl; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/lexer_detail.h b/library/cpp/yson/lexer_detail.h index be61289cb1..0bba30acdd 100644 --- a/library/cpp/yson/lexer_detail.h +++ b/library/cpp/yson/lexer_detail.h @@ -4,58 +4,58 @@ #include "token.h" namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - namespace NDetail { - /*! \internal */ - //////////////////////////////////////////////////////////////////////////////// - - // EReadStartCase tree representation: - // Root = xb - // BinaryStringOrOtherSpecialToken = x0b - // BinaryString = 00b - // OtherSpecialToken = 10b - // Other = x1b - // BinaryScalar = xx01b - // BinaryInt64 = 0001b - // BinaryDouble = 0101b - // BinaryFalse = 1001b - // BinaryTrue = 1101b - // Other = xxx11b - // Quote = 00011b - // DigitOrMinus = 00111b - // String = 01011b - // Space = 01111b - // Plus = 10011b - // None = 10111b - // Percent = 11011b + //////////////////////////////////////////////////////////////////////////////// + + namespace NDetail { + /*! \internal */ + //////////////////////////////////////////////////////////////////////////////// + + // EReadStartCase tree representation: + // Root = xb + // BinaryStringOrOtherSpecialToken = x0b + // BinaryString = 00b + // OtherSpecialToken = 10b + // Other = x1b + // BinaryScalar = xx01b + // BinaryInt64 = 0001b + // BinaryDouble = 0101b + // BinaryFalse = 1001b + // BinaryTrue = 1101b + // Other = xxx11b + // Quote = 00011b + // DigitOrMinus = 00111b + // String = 01011b + // Space = 01111b + // Plus = 10011b + // None = 10111b + // Percent = 11011b enum EReadStartCase : unsigned { - BinaryString = 0, // = 00b - OtherSpecialToken = 2, // = 10b - - BinaryInt64 = 1, // = 001b - BinaryDouble = 5, // = 101b - BinaryFalse = 9, // = 1001b - BinaryTrue = 13, // = 1101b - BinaryUint64 = 17, // = 10001b - - Quote = 3, // = 00011b - DigitOrMinus = 7, // = 00111b - String = 11, // = 01011b - Space = 15, // = 01111b - Plus = 19, // = 10011b - None = 23, // = 10111b - Percent = 27 // = 11011b - }; - - template <class TBlockStream, bool EnableLinePositionInfo> - class TLexer - : public TLexerBase<TBlockStream, EnableLinePositionInfo> { - private: - using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; - - static EReadStartCase GetStartState(char ch) { + BinaryString = 0, // = 00b + OtherSpecialToken = 2, // = 10b + + BinaryInt64 = 1, // = 001b + BinaryDouble = 5, // = 101b + BinaryFalse = 9, // = 1001b + BinaryTrue = 13, // = 1101b + BinaryUint64 = 17, // = 10001b + + Quote = 3, // = 00011b + DigitOrMinus = 7, // = 00111b + String = 11, // = 01011b + Space = 15, // = 01111b + Plus = 19, // = 10011b + None = 23, // = 10111b + Percent = 27 // = 11011b + }; + + template <class TBlockStream, bool EnableLinePositionInfo> + class TLexer + : public TLexerBase<TBlockStream, EnableLinePositionInfo> { + private: + using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; + + static EReadStartCase GetStartState(char ch) { #define NN EReadStartCase::None #define BS EReadStartCase::BinaryString #define BI EReadStartCase::BinaryInt64 @@ -71,68 +71,68 @@ namespace NYson { #define PC EReadStartCase::Percent #define TT(name) (EReadStartCase(static_cast<ui8>(ETokenType::name) << 2) | EReadStartCase::OtherSpecialToken) - static const ui8 lookupTable[] = - { - NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - - // 32 - SP, // ' ' - NN, // '!' - QU, // '"' - TT(Hash), // '#' - NN, // '$' - PC, // '%' - NN, // '&' - NN, // "'" - TT(LeftParenthesis), // '(' - TT(RightParenthesis), // ')' - NN, // '*' - PL, // '+' - TT(Comma), // ',' - DM, // '-' - NN, // '.' - NN, // '/' - - // 48 - DM, DM, DM, DM, DM, DM, DM, DM, DM, DM, // '0' - '9' - TT(Colon), // ':' - TT(Semicolon), // ';' - TT(LeftAngle), // '<' - TT(Equals), // '=' - TT(RightAngle), // '>' - NN, // '?' - - // 64 - NN, // '@' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' - TT(LeftBracket), // '[' - NN, // '\' - TT(RightBracket), // ']' - NN, // '^' - ST, // '_' - - // 96 - NN, // '`' - - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' - TT(LeftBrace), // '{' - NN, // '|' - TT(RightBrace), // '}' - NN, // '~' - NN, // '^?' non-printable - // 128 - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; + static const ui8 lookupTable[] = + { + NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + // 32 + SP, // ' ' + NN, // '!' + QU, // '"' + TT(Hash), // '#' + NN, // '$' + PC, // '%' + NN, // '&' + NN, // "'" + TT(LeftParenthesis), // '(' + TT(RightParenthesis), // ')' + NN, // '*' + PL, // '+' + TT(Comma), // ',' + DM, // '-' + NN, // '.' + NN, // '/' + + // 48 + DM, DM, DM, DM, DM, DM, DM, DM, DM, DM, // '0' - '9' + TT(Colon), // ':' + TT(Semicolon), // ';' + TT(LeftAngle), // '<' + TT(Equals), // '=' + TT(RightAngle), // '>' + NN, // '?' + + // 64 + NN, // '@' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' + TT(LeftBracket), // '[' + NN, // '\' + TT(RightBracket), // ']' + NN, // '^' + ST, // '_' + + // 96 + NN, // '`' + + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' + TT(LeftBrace), // '{' + NN, // '|' + TT(RightBrace), // '}' + NN, // '~' + NN, // '^?' non-printable + // 128 + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; #undef NN #undef BS @@ -144,153 +144,153 @@ namespace NYson { #undef PL #undef QU #undef TT - return static_cast<EReadStartCase>(lookupTable[static_cast<ui8>(ch)]); - } - - public: - TLexer(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) - : TBase(blockStream, memoryLimit) - { - } - - void GetToken(TToken* token) { - char ch1 = TBase::SkipSpaceAndGetChar(); - auto state = GetStartState(ch1); - auto stateBits = static_cast<unsigned>(state); - - if (ch1 == '\0') { - *token = TToken::EndOfStream; - return; - } - - if (stateBits & 1) { // Other = x1b - if (stateBits & 1 << 1) { // Other = xxx11b - if (state == EReadStartCase::Quote) { - TStringBuf value; - TBase::Advance(1); - TBase::ReadQuotedString(&value); - *token = TToken(value); - } else if (state == EReadStartCase::DigitOrMinus) { - ReadNumeric<true>(token); - } else if (state == EReadStartCase::Plus) { - TBase::Advance(1); - - char ch2 = TBase::template GetChar<true>(); - - if (!isdigit(ch2)) { - *token = TToken(ETokenType::Plus); - } else { - ReadNumeric<true>(token); - } - } else if (state == EReadStartCase::String) { - TStringBuf value; - TBase::template ReadUnquotedString<true>(&value); - *token = TToken(value); - } else if (state == EReadStartCase::Percent) { - TBase::Advance(1); - char ch3 = TBase::template GetChar<true>(); - if (ch3 == 't' || ch3 == 'f') { - *token = TToken(TBase::template ReadBoolean<true>()); - } else { - *token = TToken(TBase::template ReadNanOrInf<true>()); - } - } else { // None - Y_ASSERT(state == EReadStartCase::None); + return static_cast<EReadStartCase>(lookupTable[static_cast<ui8>(ch)]); + } + + public: + TLexer(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) + : TBase(blockStream, memoryLimit) + { + } + + void GetToken(TToken* token) { + char ch1 = TBase::SkipSpaceAndGetChar(); + auto state = GetStartState(ch1); + auto stateBits = static_cast<unsigned>(state); + + if (ch1 == '\0') { + *token = TToken::EndOfStream; + return; + } + + if (stateBits & 1) { // Other = x1b + if (stateBits & 1 << 1) { // Other = xxx11b + if (state == EReadStartCase::Quote) { + TStringBuf value; + TBase::Advance(1); + TBase::ReadQuotedString(&value); + *token = TToken(value); + } else if (state == EReadStartCase::DigitOrMinus) { + ReadNumeric<true>(token); + } else if (state == EReadStartCase::Plus) { + TBase::Advance(1); + + char ch2 = TBase::template GetChar<true>(); + + if (!isdigit(ch2)) { + *token = TToken(ETokenType::Plus); + } else { + ReadNumeric<true>(token); + } + } else if (state == EReadStartCase::String) { + TStringBuf value; + TBase::template ReadUnquotedString<true>(&value); + *token = TToken(value); + } else if (state == EReadStartCase::Percent) { + TBase::Advance(1); + char ch3 = TBase::template GetChar<true>(); + if (ch3 == 't' || ch3 == 'f') { + *token = TToken(TBase::template ReadBoolean<true>()); + } else { + *token = TToken(TBase::template ReadNanOrInf<true>()); + } + } else { // None + Y_ASSERT(state == EReadStartCase::None); ythrow TYsonException() << "Unexpected " << ch1; - } - } else { // BinaryScalar = x01b - TBase::Advance(1); - if (state == EReadStartCase::BinaryDouble) { - double value; - TBase::ReadBinaryDouble(&value); - *token = TToken(value); - } else if (state == EReadStartCase::BinaryInt64) { - i64 value; - TBase::ReadBinaryInt64(&value); - *token = TToken(value); - } else if (state == EReadStartCase::BinaryUint64) { - ui64 value; - TBase::ReadBinaryUint64(&value); - *token = TToken(value); - } else if (state == EReadStartCase::BinaryFalse) { - *token = TToken(false); - } else if (state == EReadStartCase::BinaryTrue) { - *token = TToken(true); - } else { - Y_FAIL("unreachable"); - } + } + } else { // BinaryScalar = x01b + TBase::Advance(1); + if (state == EReadStartCase::BinaryDouble) { + double value; + TBase::ReadBinaryDouble(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryInt64) { + i64 value; + TBase::ReadBinaryInt64(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryUint64) { + ui64 value; + TBase::ReadBinaryUint64(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryFalse) { + *token = TToken(false); + } else if (state == EReadStartCase::BinaryTrue) { + *token = TToken(true); + } else { + Y_FAIL("unreachable"); + } } - } else { // BinaryStringOrOtherSpecialToken = x0b + } else { // BinaryStringOrOtherSpecialToken = x0b TBase::Advance(1); - if (stateBits & 1 << 1) { // OtherSpecialToken = 10b - Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::OtherSpecialToken)); - *token = TToken(ETokenType(stateBits >> 2)); - } else { // BinaryString = 00b - Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::BinaryString)); - TStringBuf value; - TBase::ReadBinaryString(&value); - *token = TToken(value); + if (stateBits & 1 << 1) { // OtherSpecialToken = 10b + Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::OtherSpecialToken)); + *token = TToken(ETokenType(stateBits >> 2)); + } else { // BinaryString = 00b + Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::BinaryString)); + TStringBuf value; + TBase::ReadBinaryString(&value); + *token = TToken(value); } } } - template <bool AllowFinish> - void ReadNumeric(TToken* token) { - TStringBuf valueBuffer; - ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); + template <bool AllowFinish> + void ReadNumeric(TToken* token) { + TStringBuf valueBuffer; + ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); - if (numericResult == ENumericResult::Double) { - try { - *token = TToken(FromString<double>(valueBuffer)); - } catch (yexception&) { + if (numericResult == ENumericResult::Double) { + try { + *token = TToken(FromString<double>(valueBuffer)); + } catch (yexception&) { ythrow TYsonException() << "Error parsing double literal " << valueBuffer; - } - } else if (numericResult == ENumericResult::Int64) { - try { - *token = TToken(FromString<i64>(valueBuffer)); - } catch (yexception&) { + } + } else if (numericResult == ENumericResult::Int64) { + try { + *token = TToken(FromString<i64>(valueBuffer)); + } catch (yexception&) { ythrow TYsonException() << "Error parsing int64 literal " << valueBuffer; - } - } else if (numericResult == ENumericResult::Uint64) { - try { - *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1))); - } catch (yexception&) { + } + } else if (numericResult == ENumericResult::Uint64) { + try { + *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1))); + } catch (yexception&) { ythrow TYsonException() << "Error parsing uint64 literal " << valueBuffer; - } - } + } + } } - }; - //////////////////////////////////////////////////////////////////////////////// - /*! \endinternal */ + }; + //////////////////////////////////////////////////////////////////////////////// + /*! \endinternal */ } - class TStatelessYsonLexerImplBase { - public: - virtual size_t GetToken(const TStringBuf& data, TToken* token) = 0; - - virtual ~TStatelessYsonLexerImplBase() { - } - }; - - template <bool EnableLinePositionInfo> - class TStatelesYsonLexerImpl: public TStatelessYsonLexerImplBase { - private: - using TLexer = NDetail::TLexer<TStringReader, EnableLinePositionInfo>; - TLexer Lexer; - - public: - TStatelesYsonLexerImpl() - : Lexer(TStringReader(), Nothing()) - { - } - - size_t GetToken(const TStringBuf& data, TToken* token) override { - Lexer.SetBuffer(data.begin(), data.end()); - Lexer.GetToken(token); - return Lexer.Begin() - data.begin(); - } - }; - - //////////////////////////////////////////////////////////////////////////////// + class TStatelessYsonLexerImplBase { + public: + virtual size_t GetToken(const TStringBuf& data, TToken* token) = 0; + + virtual ~TStatelessYsonLexerImplBase() { + } + }; + + template <bool EnableLinePositionInfo> + class TStatelesYsonLexerImpl: public TStatelessYsonLexerImplBase { + private: + using TLexer = NDetail::TLexer<TStringReader, EnableLinePositionInfo>; + TLexer Lexer; + + public: + TStatelesYsonLexerImpl() + : Lexer(TStringReader(), Nothing()) + { + } + + size_t GetToken(const TStringBuf& data, TToken* token) override { + Lexer.SetBuffer(data.begin(), data.end()); + Lexer.GetToken(token); + return Lexer.Begin() - data.begin(); + } + }; + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/parser.cpp b/library/cpp/yson/parser.cpp index 45f8d87014..783f9b9047 100644 --- a/library/cpp/yson/parser.cpp +++ b/library/cpp/yson/parser.cpp @@ -7,173 +7,173 @@ #include <util/generic/buffer.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TYsonParser::TImpl { - public: - TImpl( + class TYsonParser::TImpl { + public: + TImpl( NYT::NYson::IYsonConsumer* consumer, - IInputStream* stream, - EYsonType type, - bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit = Nothing()) - : Consumer_(consumer) - , Stream_(stream) - , Type_(type) - , EnableLinePositionInfo_(enableLinePositionInfo) - , MemoryLimit_(memoryLimit) - { - } - - void Parse() { - TBuffer buffer(64 << 10); + IInputStream* stream, + EYsonType type, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit = Nothing()) + : Consumer_(consumer) + , Stream_(stream) + , Type_(type) + , EnableLinePositionInfo_(enableLinePositionInfo) + , MemoryLimit_(memoryLimit) + { + } + + void Parse() { + TBuffer buffer(64 << 10); ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStreamReader>( - TStreamReader(Stream_, buffer.Data(), buffer.Capacity()), - Consumer_, - Type_, - EnableLinePositionInfo_, - MemoryLimit_); - } - - private: + TStreamReader(Stream_, buffer.Data(), buffer.Capacity()), + Consumer_, + Type_, + EnableLinePositionInfo_, + MemoryLimit_); + } + + private: NYT::NYson::IYsonConsumer* Consumer_; - IInputStream* Stream_; - EYsonType Type_; - bool EnableLinePositionInfo_; - TMaybe<ui64> MemoryLimit_; - }; - - //////////////////////////////////////////////////////////////////////////////// - - TYsonParser::TYsonParser( + IInputStream* Stream_; + EYsonType Type_; + bool EnableLinePositionInfo_; + TMaybe<ui64> MemoryLimit_; + }; + + //////////////////////////////////////////////////////////////////////////////// + + TYsonParser::TYsonParser( NYT::NYson::IYsonConsumer* consumer, IInputStream* stream, EYsonType type, bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit) - : Impl(new TImpl(consumer, stream, type, enableLinePositionInfo, memoryLimit)) + TMaybe<ui64> memoryLimit) + : Impl(new TImpl(consumer, stream, type, enableLinePositionInfo, memoryLimit)) { } - TYsonParser::~TYsonParser() { - } + TYsonParser::~TYsonParser() { + } - void TYsonParser::Parse() { - Impl->Parse(); - } + void TYsonParser::Parse() { + Impl->Parse(); + } - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TStatelessYsonParser::TImpl { - private: - THolder<TStatelessYsonParserImplBase> Impl; + class TStatelessYsonParser::TImpl { + private: + THolder<TStatelessYsonParserImplBase> Impl; - public: - TImpl( + public: + TImpl( NYT::NYson::IYsonConsumer* consumer, - bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit) - : Impl( - enableLinePositionInfo + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) + : Impl( + enableLinePositionInfo ? static_cast<TStatelessYsonParserImplBase*>(new TStatelessYsonParserImpl<NYT::NYson::IYsonConsumer, true>(consumer, memoryLimit)) : static_cast<TStatelessYsonParserImplBase*>(new TStatelessYsonParserImpl<NYT::NYson::IYsonConsumer, false>(consumer, memoryLimit))) - { - } + { + } void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) { - Impl->Parse(data, type); - } - }; + Impl->Parse(data, type); + } + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - TStatelessYsonParser::TStatelessYsonParser( + TStatelessYsonParser::TStatelessYsonParser( NYT::NYson::IYsonConsumer* consumer, bool enableLinePositionInfo, TMaybe<ui64> memoryLimit) - : Impl(new TImpl(consumer, enableLinePositionInfo, memoryLimit)) - { - } - - TStatelessYsonParser::~TStatelessYsonParser() { - } - - void TStatelessYsonParser::Parse(const TStringBuf& data, EYsonType type) { + : Impl(new TImpl(consumer, enableLinePositionInfo, memoryLimit)) + { + } + + TStatelessYsonParser::~TStatelessYsonParser() { + } + + void TStatelessYsonParser::Parse(const TStringBuf& data, EYsonType type) { Impl->Parse(data, type); } - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - void ParseYsonStringBuffer( - const TStringBuf& buffer, + void ParseYsonStringBuffer( + const TStringBuf& buffer, NYT::NYson::IYsonConsumer* consumer, - EYsonType type, - bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit) { + EYsonType type, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) { ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStringReader>( - TStringReader(buffer.begin(), buffer.end()), - consumer, - type, - enableLinePositionInfo, - memoryLimit); - } - - //////////////////////////////////////////////////////////////////////////////// - - class TYsonListParser::TImpl { - public: - TImpl( + TStringReader(buffer.begin(), buffer.end()), + consumer, + type, + enableLinePositionInfo, + memoryLimit); + } + + //////////////////////////////////////////////////////////////////////////////// + + class TYsonListParser::TImpl { + public: + TImpl( NYT::NYson::IYsonConsumer* consumer, - IInputStream* stream, - bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit = Nothing()) - : Consumer_(consumer) - , Stream_(stream) - , EnableLinePositionInfo_(enableLinePositionInfo) - , MemoryLimit_(memoryLimit) - , Buffer_(64 << 10) - , Reader_(Stream_, Buffer_.Data(), Buffer_.Capacity()) - { - } - - bool Parse() { - if (!Impl_) { - Impl_.Reset( - EnableLinePositionInfo_ + IInputStream* stream, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit = Nothing()) + : Consumer_(consumer) + , Stream_(stream) + , EnableLinePositionInfo_(enableLinePositionInfo) + , MemoryLimit_(memoryLimit) + , Buffer_(64 << 10) + , Reader_(Stream_, Buffer_.Data(), Buffer_.Capacity()) + { + } + + bool Parse() { + if (!Impl_) { + Impl_.Reset( + EnableLinePositionInfo_ ? static_cast<TYsonListParserImplBase*>(new TYsonListParserImpl<NYT::NYson::IYsonConsumer, TStreamReader, true>(Reader_, Consumer_, MemoryLimit_)) : static_cast<TYsonListParserImplBase*>(new TYsonListParserImpl<NYT::NYson::IYsonConsumer, TStreamReader, false>(Reader_, Consumer_, MemoryLimit_))); - } - return Impl_->Parse(); - } + } + return Impl_->Parse(); + } - private: + private: NYT::NYson::IYsonConsumer* Consumer_; - IInputStream* Stream_; - bool EnableLinePositionInfo_; - TMaybe<ui64> MemoryLimit_; - TBuffer Buffer_; - TStreamReader Reader_; - THolder<TYsonListParserImplBase> Impl_; - }; + IInputStream* Stream_; + bool EnableLinePositionInfo_; + TMaybe<ui64> MemoryLimit_; + TBuffer Buffer_; + TStreamReader Reader_; + THolder<TYsonListParserImplBase> Impl_; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - TYsonListParser::TYsonListParser( + TYsonListParser::TYsonListParser( NYT::NYson::IYsonConsumer* consumer, IInputStream* stream, bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit) - : Impl(new TImpl(consumer, stream, enableLinePositionInfo, memoryLimit)) + TMaybe<ui64> memoryLimit) + : Impl(new TImpl(consumer, stream, enableLinePositionInfo, memoryLimit)) { } - TYsonListParser::~TYsonListParser() { - } + TYsonListParser::~TYsonListParser() { + } - bool TYsonListParser::Parse() { - return Impl->Parse(); - } + bool TYsonListParser::Parse() { + return Impl->Parse(); + } - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/parser.h b/library/cpp/yson/parser.h index f935840b98..dce35a8cd4 100644 --- a/library/cpp/yson/parser.h +++ b/library/cpp/yson/parser.h @@ -12,72 +12,72 @@ struct IYsonConsumer; } // namespace NYT::NYson namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TYsonParser { - public: - TYsonParser( + class TYsonParser { + public: + TYsonParser( NYT::NYson::IYsonConsumer* consumer, - IInputStream* stream, + IInputStream* stream, EYsonType type = ::NYson::EYsonType::Node, - bool enableLinePositionInfo = false, - TMaybe<ui64> memoryLimit = Nothing()); + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); - ~TYsonParser(); + ~TYsonParser(); - void Parse(); + void Parse(); - private: - class TImpl; - THolder<TImpl> Impl; - }; + private: + class TImpl; + THolder<TImpl> Impl; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TStatelessYsonParser { - public: - TStatelessYsonParser( + class TStatelessYsonParser { + public: + TStatelessYsonParser( NYT::NYson::IYsonConsumer* consumer, - bool enableLinePositionInfo = false, - TMaybe<ui64> memoryLimit = Nothing()); + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); - ~TStatelessYsonParser(); + ~TStatelessYsonParser(); void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node); - private: - class TImpl; - THolder<TImpl> Impl; - }; + private: + class TImpl; + THolder<TImpl> Impl; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TYsonListParser { - public: - TYsonListParser( + class TYsonListParser { + public: + TYsonListParser( NYT::NYson::IYsonConsumer* consumer, - IInputStream* stream, - bool enableLinePositionInfo = false, - TMaybe<ui64> memoryLimit = Nothing()); + IInputStream* stream, + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); - ~TYsonListParser(); + ~TYsonListParser(); - bool Parse(); // Returns false, if there is no more list items + bool Parse(); // Returns false, if there is no more list items - private: - class TImpl; - THolder<TImpl> Impl; - }; + private: + class TImpl; + THolder<TImpl> Impl; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - void ParseYsonStringBuffer( - const TStringBuf& buffer, + void ParseYsonStringBuffer( + const TStringBuf& buffer, NYT::NYson::IYsonConsumer* consumer, EYsonType type = ::NYson::EYsonType::Node, - bool enableLinePositionInfo = false, - TMaybe<ui64> memoryLimit = Nothing()); + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/parser_detail.h b/library/cpp/yson/parser_detail.h index a4cfdc71da..44223caf12 100644 --- a/library/cpp/yson/parser_detail.h +++ b/library/cpp/yson/parser_detail.h @@ -3,379 +3,379 @@ #include "detail.h" namespace NYson { - namespace NDetail { - //////////////////////////////////////////////////////////////////////////////// - - template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> - class TParser - : public TLexerBase<TBlockStream, EnableLinePositionInfo> { - private: - using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; - TConsumer* Consumer; - - public: - TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) - : TBase(blockStream, memoryLimit) - , Consumer(consumer) - { - } - - void DoParse(EYsonType ysonType) { - switch (ysonType) { + namespace NDetail { + //////////////////////////////////////////////////////////////////////////////// + + template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> + class TParser + : public TLexerBase<TBlockStream, EnableLinePositionInfo> { + private: + using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; + TConsumer* Consumer; + + public: + TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) + : TBase(blockStream, memoryLimit) + , Consumer(consumer) + { + } + + void DoParse(EYsonType ysonType) { + switch (ysonType) { case ::NYson::EYsonType::Node: - ParseNode<true>(); - break; + ParseNode<true>(); + break; case ::NYson::EYsonType::ListFragment: - ParseListFragment<true>(EndSymbol); - break; + ParseListFragment<true>(EndSymbol); + break; case ::NYson::EYsonType::MapFragment: - ParseMapFragment<true>(EndSymbol); - break; + ParseMapFragment<true>(EndSymbol); + break; - default: - Y_FAIL("unreachable"); - } + default: + Y_FAIL("unreachable"); + } - while (!(TBase::IsFinished() && TBase::IsEmpty())) { - if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) { + while (!(TBase::IsFinished() && TBase::IsEmpty())) { + if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) { ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found"; - } else if (!TBase::IsEmpty()) { - TBase::Advance(1); - } - } - } - - bool DoParseListFragment(bool first) { - bool ret = first ? first : ParseListSeparator<true>(EndSymbol); - return ret && ParseListItem<true>(EndSymbol); + } else if (!TBase::IsEmpty()) { + TBase::Advance(1); + } + } + } + + bool DoParseListFragment(bool first) { + bool ret = first ? first : ParseListSeparator<true>(EndSymbol); + return ret && ParseListItem<true>(EndSymbol); + } + + void ParseAttributes() { + Consumer->OnBeginAttributes(); + ParseMapFragment(EndAttributesSymbol); + TBase::SkipCharToken(EndAttributesSymbol); + Consumer->OnEndAttributes(); + } + + void ParseMap() { + Consumer->OnBeginMap(); + ParseMapFragment(EndMapSymbol); + TBase::SkipCharToken(EndMapSymbol); + Consumer->OnEndMap(); + } + + void ParseList() { + Consumer->OnBeginList(); + ParseListFragment(EndListSymbol); + TBase::SkipCharToken(EndListSymbol); + Consumer->OnEndList(); } - void ParseAttributes() { - Consumer->OnBeginAttributes(); - ParseMapFragment(EndAttributesSymbol); - TBase::SkipCharToken(EndAttributesSymbol); - Consumer->OnEndAttributes(); - } - - void ParseMap() { - Consumer->OnBeginMap(); - ParseMapFragment(EndMapSymbol); - TBase::SkipCharToken(EndMapSymbol); - Consumer->OnEndMap(); - } - - void ParseList() { - Consumer->OnBeginList(); - ParseListFragment(EndListSymbol); - TBase::SkipCharToken(EndListSymbol); - Consumer->OnEndList(); - } - - template <bool AllowFinish> - void ParseNode() { - return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar()); - } - - template <bool AllowFinish> - void ParseNode(char ch) { - if (ch == BeginAttributesSymbol) { - TBase::Advance(1); - ParseAttributes(); - ch = TBase::SkipSpaceAndGetChar(); - } - - switch (ch) { - case BeginMapSymbol: - TBase::Advance(1); - ParseMap(); - break; - - case BeginListSymbol: - TBase::Advance(1); - ParseList(); - break; - - case '"': { - TBase::Advance(1); - TStringBuf value; - TBase::ReadQuotedString(&value); - Consumer->OnStringScalar(value); - break; - } - case StringMarker: { - TBase::Advance(1); - TStringBuf value; - TBase::ReadBinaryString(&value); - Consumer->OnStringScalar(value); - break; - } - case Int64Marker: { - TBase::Advance(1); - i64 value; - TBase::ReadBinaryInt64(&value); - Consumer->OnInt64Scalar(value); - break; - } - case Uint64Marker: { - TBase::Advance(1); - ui64 value; - TBase::ReadBinaryUint64(&value); - Consumer->OnUint64Scalar(value); - break; - } - case DoubleMarker: { - TBase::Advance(1); - double value; - TBase::ReadBinaryDouble(&value); - Consumer->OnDoubleScalar(value); - break; - } - case FalseMarker: { - TBase::Advance(1); - Consumer->OnBooleanScalar(false); - break; - } - case TrueMarker: { - TBase::Advance(1); - Consumer->OnBooleanScalar(true); - break; - } - case EntitySymbol: - TBase::Advance(1); - Consumer->OnEntity(); - break; - - default: { - if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state - ReadNumeric<AllowFinish>(); - } else if (isalpha((unsigned char)ch) || ch == '_') { - TStringBuf value; - TBase::template ReadUnquotedString<AllowFinish>(&value); - Consumer->OnStringScalar(value); - } else if (ch == '%') { - TBase::Advance(1); - ch = TBase::template GetChar<AllowFinish>(); - if (ch == 't' || ch == 'f') { - Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>()); - } else { - Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>()); - } - } else { + template <bool AllowFinish> + void ParseNode() { + return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar()); + } + + template <bool AllowFinish> + void ParseNode(char ch) { + if (ch == BeginAttributesSymbol) { + TBase::Advance(1); + ParseAttributes(); + ch = TBase::SkipSpaceAndGetChar(); + } + + switch (ch) { + case BeginMapSymbol: + TBase::Advance(1); + ParseMap(); + break; + + case BeginListSymbol: + TBase::Advance(1); + ParseList(); + break; + + case '"': { + TBase::Advance(1); + TStringBuf value; + TBase::ReadQuotedString(&value); + Consumer->OnStringScalar(value); + break; + } + case StringMarker: { + TBase::Advance(1); + TStringBuf value; + TBase::ReadBinaryString(&value); + Consumer->OnStringScalar(value); + break; + } + case Int64Marker: { + TBase::Advance(1); + i64 value; + TBase::ReadBinaryInt64(&value); + Consumer->OnInt64Scalar(value); + break; + } + case Uint64Marker: { + TBase::Advance(1); + ui64 value; + TBase::ReadBinaryUint64(&value); + Consumer->OnUint64Scalar(value); + break; + } + case DoubleMarker: { + TBase::Advance(1); + double value; + TBase::ReadBinaryDouble(&value); + Consumer->OnDoubleScalar(value); + break; + } + case FalseMarker: { + TBase::Advance(1); + Consumer->OnBooleanScalar(false); + break; + } + case TrueMarker: { + TBase::Advance(1); + Consumer->OnBooleanScalar(true); + break; + } + case EntitySymbol: + TBase::Advance(1); + Consumer->OnEntity(); + break; + + default: { + if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state + ReadNumeric<AllowFinish>(); + } else if (isalpha((unsigned char)ch) || ch == '_') { + TStringBuf value; + TBase::template ReadUnquotedString<AllowFinish>(&value); + Consumer->OnStringScalar(value); + } else if (ch == '%') { + TBase::Advance(1); + ch = TBase::template GetChar<AllowFinish>(); + if (ch == 't' || ch == 'f') { + Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>()); + } else { + Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>()); + } + } else { ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node"; - } - } - } + } + } + } } - - void ParseKey() { - return ParseKey(TBase::SkipSpaceAndGetChar()); + + void ParseKey() { + return ParseKey(TBase::SkipSpaceAndGetChar()); } - - void ParseKey(char ch) { - switch (ch) { - case '"': { - TBase::Advance(1); - TStringBuf value; - TBase::ReadQuotedString(&value); - Consumer->OnKeyedItem(value); - break; - } - case StringMarker: { - TBase::Advance(1); - TStringBuf value; - TBase::ReadBinaryString(&value); - Consumer->OnKeyedItem(value); - break; - } - default: { - if (isalpha(ch) || ch == '_') { - TStringBuf value; - TBase::ReadUnquotedString(&value); - Consumer->OnKeyedItem(value); - } else { + + void ParseKey(char ch) { + switch (ch) { + case '"': { + TBase::Advance(1); + TStringBuf value; + TBase::ReadQuotedString(&value); + Consumer->OnKeyedItem(value); + break; + } + case StringMarker: { + TBase::Advance(1); + TStringBuf value; + TBase::ReadBinaryString(&value); + Consumer->OnKeyedItem(value); + break; + } + default: { + if (isalpha(ch) || ch == '_') { + TStringBuf value; + TBase::ReadUnquotedString(&value); + Consumer->OnKeyedItem(value); + } else { ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key"; - } - } - } + } + } + } } - template <bool AllowFinish> - void ParseMapFragment(char endSymbol) { - char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - while (ch != endSymbol) { - ParseKey(ch); - ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch == KeyValueSeparatorSymbol) { - TBase::Advance(1); + template <bool AllowFinish> + void ParseMapFragment(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + while (ch != endSymbol) { + ParseKey(ch); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == KeyValueSeparatorSymbol) { + TBase::Advance(1); } else { ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found"; } - ParseNode<AllowFinish>(); - ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch == KeyedItemSeparatorSymbol) { - TBase::Advance(1); - ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - } else if (ch != endSymbol) { + ParseNode<AllowFinish>(); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == KeyedItemSeparatorSymbol) { + TBase::Advance(1); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + } else if (ch != endSymbol) { ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol << "' or '" << endSymbol << "' but '" << ch << "' found"; - } + } } } - void ParseMapFragment(char endSymbol) { - ParseMapFragment<false>(endSymbol); - } - - template <bool AllowFinish> - bool ParseListItem(char endSymbol) { - char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch != endSymbol) { - Consumer->OnListItem(); - ParseNode<AllowFinish>(ch); - return true; - } - return false; + void ParseMapFragment(char endSymbol) { + ParseMapFragment<false>(endSymbol); } - - template <bool AllowFinish> - bool ParseListSeparator(char endSymbol) { - char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch == ListItemSeparatorSymbol) { - TBase::Advance(1); - return true; - } else if (ch != endSymbol) { + + template <bool AllowFinish> + bool ParseListItem(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch != endSymbol) { + Consumer->OnListItem(); + ParseNode<AllowFinish>(ch); + return true; + } + return false; + } + + template <bool AllowFinish> + bool ParseListSeparator(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == ListItemSeparatorSymbol) { + TBase::Advance(1); + return true; + } else if (ch != endSymbol) { ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol << "' or '" << endSymbol << "' but '" << ch << "' found"; - } - return false; + } + return false; } - - template <bool AllowFinish> - void ParseListFragment(char endSymbol) { - while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) { + + template <bool AllowFinish> + void ParseListFragment(char endSymbol) { + while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) { } } - void ParseListFragment(char endSymbol) { - ParseListFragment<false>(endSymbol); + void ParseListFragment(char endSymbol) { + ParseListFragment<false>(endSymbol); } - - template <bool AllowFinish> - void ReadNumeric() { - TStringBuf valueBuffer; - ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); - - if (numericResult == ENumericResult::Double) { - double value; - try { - value = FromString<double>(valueBuffer); - } catch (yexception& e) { - // This exception is wrapped in parser. + + template <bool AllowFinish> + void ReadNumeric() { + TStringBuf valueBuffer; + ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); + + if (numericResult == ENumericResult::Double) { + double value; + try { + value = FromString<double>(valueBuffer); + } catch (yexception& e) { + // This exception is wrapped in parser. ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e; - } - Consumer->OnDoubleScalar(value); - } else if (numericResult == ENumericResult::Int64) { - i64 value; - try { - value = FromString<i64>(valueBuffer); - } catch (yexception& e) { - // This exception is wrapped in parser. + } + Consumer->OnDoubleScalar(value); + } else if (numericResult == ENumericResult::Int64) { + i64 value; + try { + value = FromString<i64>(valueBuffer); + } catch (yexception& e) { + // This exception is wrapped in parser. ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e; - } - Consumer->OnInt64Scalar(value); - } else if (numericResult == ENumericResult::Uint64) { - ui64 value; - try { - value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)); - } catch (yexception& e) { - // This exception is wrapped in parser. + } + Consumer->OnInt64Scalar(value); + } else if (numericResult == ENumericResult::Uint64) { + ui64 value; + try { + value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)); + } catch (yexception& e) { + // This exception is wrapped in parser. ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e; - } - Consumer->OnUint64Scalar(value); - } + } + Consumer->OnUint64Scalar(value); + } } - }; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } - template <class TConsumer, class TBlockStream> - void ParseYsonStreamImpl( - const TBlockStream& blockStream, + template <class TConsumer, class TBlockStream> + void ParseYsonStreamImpl( + const TBlockStream& blockStream, NYT::NYson::IYsonConsumer* consumer, - EYsonType parsingMode, - bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit) { - if (enableLinePositionInfo) { - using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>; - TImpl impl(blockStream, consumer, memoryLimit); - impl.DoParse(parsingMode); - } else { - using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>; - TImpl impl(blockStream, consumer, memoryLimit); - impl.DoParse(parsingMode); + EYsonType parsingMode, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) { + if (enableLinePositionInfo) { + using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>; + TImpl impl(blockStream, consumer, memoryLimit); + impl.DoParse(parsingMode); + } else { + using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>; + TImpl impl(blockStream, consumer, memoryLimit); + impl.DoParse(parsingMode); } } - class TStatelessYsonParserImplBase { - public: + class TStatelessYsonParserImplBase { + public: virtual void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) = 0; - virtual ~TStatelessYsonParserImplBase() { + virtual ~TStatelessYsonParserImplBase() { + } + }; + + template <class TConsumer, bool EnableLinePositionInfo> + class TStatelessYsonParserImpl + : public TStatelessYsonParserImplBase { + private: + using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>; + TParser Parser; + + public: + TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit) + : Parser(TStringReader(), consumer, memoryLimit) + { } - }; - template <class TConsumer, bool EnableLinePositionInfo> - class TStatelessYsonParserImpl - : public TStatelessYsonParserImplBase { - private: - using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>; - TParser Parser; + void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override { + Parser.SetBuffer(data.begin(), data.end()); + Parser.DoParse(type); + } + }; - public: - TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit) - : Parser(TStringReader(), consumer, memoryLimit) - { - } + class TYsonListParserImplBase { + public: + virtual bool Parse() = 0; - void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override { - Parser.SetBuffer(data.begin(), data.end()); - Parser.DoParse(type); + virtual ~TYsonListParserImplBase() { + } + }; + + template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> + class TYsonListParserImpl + : public TYsonListParserImplBase { + private: + using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>; + TParser Parser; + bool First = true; + + public: + TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) + : Parser(blockStream, consumer, memoryLimit) + { } - }; - - class TYsonListParserImplBase { - public: - virtual bool Parse() = 0; - - virtual ~TYsonListParserImplBase() { - } - }; - - template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> - class TYsonListParserImpl - : public TYsonListParserImplBase { - private: - using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>; - TParser Parser; - bool First = true; - - public: - TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) - : Parser(blockStream, consumer, memoryLimit) - { - } - - bool Parse() override { - bool ret = Parser.DoParseListFragment(First); - First = false; - return ret; - } - }; - - //////////////////////////////////////////////////////////////////////////////// + + bool Parse() override { + bool ret = Parser.DoParseListFragment(First); + First = false; + return ret; + } + }; + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/public.h b/library/cpp/yson/public.h index c03e53d41f..1ed793592b 100644 --- a/library/cpp/yson/public.h +++ b/library/cpp/yson/public.h @@ -8,7 +8,7 @@ namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// using NYT::NYson::EYsonFormat; using NYT::NYson::EYsonType; @@ -22,9 +22,9 @@ namespace NYson { class TStatelessYsonParser; class TYsonListParser; - class TYsonException - : public yexception {}; + class TYsonException + : public yexception {}; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/token.cpp b/library/cpp/yson/token.cpp index 13c6f170bb..c8584c8c2e 100644 --- a/library/cpp/yson/token.cpp +++ b/library/cpp/yson/token.cpp @@ -4,233 +4,233 @@ #include <util/string/printf.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - ETokenType CharToTokenType(char ch) { - switch (ch) { - case ';': - return ETokenType::Semicolon; - case '=': - return ETokenType::Equals; - case '{': - return ETokenType::LeftBrace; - case '}': - return ETokenType::RightBrace; - case '#': - return ETokenType::Hash; - case '[': - return ETokenType::LeftBracket; - case ']': - return ETokenType::RightBracket; - case '<': - return ETokenType::LeftAngle; - case '>': - return ETokenType::RightAngle; - case '(': - return ETokenType::LeftParenthesis; - case ')': - return ETokenType::RightParenthesis; - case '+': - return ETokenType::Plus; - case ':': - return ETokenType::Colon; - case ',': - return ETokenType::Comma; - default: - return ETokenType::EndOfStream; - } - } - - char TokenTypeToChar(ETokenType type) { - switch (type) { - case ETokenType::Semicolon: - return ';'; - case ETokenType::Equals: - return '='; - case ETokenType::Hash: - return '#'; - case ETokenType::LeftBracket: - return '['; - case ETokenType::RightBracket: - return ']'; - case ETokenType::LeftBrace: - return '{'; - case ETokenType::RightBrace: - return '}'; - case ETokenType::LeftAngle: - return '<'; - case ETokenType::RightAngle: - return '>'; - case ETokenType::LeftParenthesis: - return '('; - case ETokenType::RightParenthesis: - return ')'; - case ETokenType::Plus: - return '+'; - case ETokenType::Colon: - return ':'; - case ETokenType::Comma: - return ','; - default: - Y_FAIL("unreachable"); - } - } - - TString TokenTypeToString(ETokenType type) { + //////////////////////////////////////////////////////////////////////////////// + + ETokenType CharToTokenType(char ch) { + switch (ch) { + case ';': + return ETokenType::Semicolon; + case '=': + return ETokenType::Equals; + case '{': + return ETokenType::LeftBrace; + case '}': + return ETokenType::RightBrace; + case '#': + return ETokenType::Hash; + case '[': + return ETokenType::LeftBracket; + case ']': + return ETokenType::RightBracket; + case '<': + return ETokenType::LeftAngle; + case '>': + return ETokenType::RightAngle; + case '(': + return ETokenType::LeftParenthesis; + case ')': + return ETokenType::RightParenthesis; + case '+': + return ETokenType::Plus; + case ':': + return ETokenType::Colon; + case ',': + return ETokenType::Comma; + default: + return ETokenType::EndOfStream; + } + } + + char TokenTypeToChar(ETokenType type) { + switch (type) { + case ETokenType::Semicolon: + return ';'; + case ETokenType::Equals: + return '='; + case ETokenType::Hash: + return '#'; + case ETokenType::LeftBracket: + return '['; + case ETokenType::RightBracket: + return ']'; + case ETokenType::LeftBrace: + return '{'; + case ETokenType::RightBrace: + return '}'; + case ETokenType::LeftAngle: + return '<'; + case ETokenType::RightAngle: + return '>'; + case ETokenType::LeftParenthesis: + return '('; + case ETokenType::RightParenthesis: + return ')'; + case ETokenType::Plus: + return '+'; + case ETokenType::Colon: + return ':'; + case ETokenType::Comma: + return ','; + default: + Y_FAIL("unreachable"); + } + } + + TString TokenTypeToString(ETokenType type) { return TString(1, TokenTypeToChar(type)); } - //////////////////////////////////////////////////////////////////////////////// - - const TToken TToken::EndOfStream; - - TToken::TToken() - : Type_(ETokenType::EndOfStream) - , Int64Value(0) - , Uint64Value(0) - , DoubleValue(0.0) - , BooleanValue(false) - { - } - - TToken::TToken(ETokenType type) - : Type_(type) - , Int64Value(0) - , Uint64Value(0) - , DoubleValue(0.0) - , BooleanValue(false) - { - switch (type) { - case ETokenType::String: - case ETokenType::Int64: - case ETokenType::Uint64: - case ETokenType::Double: - case ETokenType::Boolean: - Y_FAIL("unreachable"); - default: - break; - } - } - - TToken::TToken(const TStringBuf& stringValue) - : Type_(ETokenType::String) - , StringValue(stringValue) - , Int64Value(0) - , Uint64Value(0) - , DoubleValue(0.0) - , BooleanValue(false) - { - } - - TToken::TToken(i64 int64Value) - : Type_(ETokenType::Int64) - , Int64Value(int64Value) - , Uint64Value(0) - , DoubleValue(0.0) - { - } - - TToken::TToken(ui64 uint64Value) - : Type_(ETokenType::Uint64) - , Int64Value(0) - , Uint64Value(uint64Value) - , DoubleValue(0.0) - , BooleanValue(false) - { - } - - TToken::TToken(double doubleValue) - : Type_(ETokenType::Double) - , Int64Value(0) - , Uint64Value(0) - , DoubleValue(doubleValue) - , BooleanValue(false) - { - } - - TToken::TToken(bool booleanValue) - : Type_(ETokenType::Boolean) - , Int64Value(0) - , DoubleValue(0.0) - , BooleanValue(booleanValue) - { - } - - bool TToken::IsEmpty() const { - return Type_ == ETokenType::EndOfStream; - } - - const TStringBuf& TToken::GetStringValue() const { - CheckType(ETokenType::String); - return StringValue; - } - - i64 TToken::GetInt64Value() const { - CheckType(ETokenType::Int64); - return Int64Value; - } - - ui64 TToken::GetUint64Value() const { - CheckType(ETokenType::Uint64); - return Uint64Value; - } - - double TToken::GetDoubleValue() const { - CheckType(ETokenType::Double); - return DoubleValue; - } - - bool TToken::GetBooleanValue() const { - CheckType(ETokenType::Boolean); - return BooleanValue; - } - - void TToken::CheckType(ETokenType expectedType) const { - if (Type_ != expectedType) { - if (Type_ == ETokenType::EndOfStream) { + //////////////////////////////////////////////////////////////////////////////// + + const TToken TToken::EndOfStream; + + TToken::TToken() + : Type_(ETokenType::EndOfStream) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(0.0) + , BooleanValue(false) + { + } + + TToken::TToken(ETokenType type) + : Type_(type) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(0.0) + , BooleanValue(false) + { + switch (type) { + case ETokenType::String: + case ETokenType::Int64: + case ETokenType::Uint64: + case ETokenType::Double: + case ETokenType::Boolean: + Y_FAIL("unreachable"); + default: + break; + } + } + + TToken::TToken(const TStringBuf& stringValue) + : Type_(ETokenType::String) + , StringValue(stringValue) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(0.0) + , BooleanValue(false) + { + } + + TToken::TToken(i64 int64Value) + : Type_(ETokenType::Int64) + , Int64Value(int64Value) + , Uint64Value(0) + , DoubleValue(0.0) + { + } + + TToken::TToken(ui64 uint64Value) + : Type_(ETokenType::Uint64) + , Int64Value(0) + , Uint64Value(uint64Value) + , DoubleValue(0.0) + , BooleanValue(false) + { + } + + TToken::TToken(double doubleValue) + : Type_(ETokenType::Double) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(doubleValue) + , BooleanValue(false) + { + } + + TToken::TToken(bool booleanValue) + : Type_(ETokenType::Boolean) + , Int64Value(0) + , DoubleValue(0.0) + , BooleanValue(booleanValue) + { + } + + bool TToken::IsEmpty() const { + return Type_ == ETokenType::EndOfStream; + } + + const TStringBuf& TToken::GetStringValue() const { + CheckType(ETokenType::String); + return StringValue; + } + + i64 TToken::GetInt64Value() const { + CheckType(ETokenType::Int64); + return Int64Value; + } + + ui64 TToken::GetUint64Value() const { + CheckType(ETokenType::Uint64); + return Uint64Value; + } + + double TToken::GetDoubleValue() const { + CheckType(ETokenType::Double); + return DoubleValue; + } + + bool TToken::GetBooleanValue() const { + CheckType(ETokenType::Boolean); + return BooleanValue; + } + + void TToken::CheckType(ETokenType expectedType) const { + if (Type_ != expectedType) { + if (Type_ == ETokenType::EndOfStream) { ythrow TYsonException() << "Unexpected end of stream (ExpectedType: " << TokenTypeToString(expectedType) << ")"; - } else { + } else { ythrow TYsonException() << "Unexpected token (Token: '" << ToString(*this) << "', Type: " << TokenTypeToString(Type_) << ", ExpectedType: " << TokenTypeToString(expectedType) << ")"; - } + } } } - void TToken::Reset() { - Type_ = ETokenType::EndOfStream; - Int64Value = 0; - Uint64Value = 0; - DoubleValue = 0.0; - StringValue = TStringBuf(); - BooleanValue = false; - } + void TToken::Reset() { + Type_ = ETokenType::EndOfStream; + Int64Value = 0; + Uint64Value = 0; + DoubleValue = 0.0; + StringValue = TStringBuf(); + BooleanValue = false; + } - TString ToString(const TToken& token) { - switch (token.GetType()) { - case ETokenType::EndOfStream: - return TString(); + TString ToString(const TToken& token) { + switch (token.GetType()) { + case ETokenType::EndOfStream: + return TString(); - case ETokenType::String: - return TString(token.GetStringValue()); + case ETokenType::String: + return TString(token.GetStringValue()); - case ETokenType::Int64: - return ::ToString(token.GetInt64Value()); + case ETokenType::Int64: + return ::ToString(token.GetInt64Value()); - case ETokenType::Uint64: - return ::ToString(token.GetUint64Value()); + case ETokenType::Uint64: + return ::ToString(token.GetUint64Value()); - case ETokenType::Double: - return ::ToString(token.GetDoubleValue()); + case ETokenType::Double: + return ::ToString(token.GetDoubleValue()); - case ETokenType::Boolean: - return token.GetBooleanValue() ? "true" : "false"; + case ETokenType::Boolean: + return token.GetBooleanValue() ? "true" : "false"; - default: - return TokenTypeToString(token.GetType()); - } + default: + return TokenTypeToString(token.GetType()); + } } - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/token.h b/library/cpp/yson/token.h index a0a3975eff..7283e56950 100644 --- a/library/cpp/yson/token.h +++ b/library/cpp/yson/token.h @@ -5,89 +5,89 @@ #include <util/generic/strbuf.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - enum ETokenType { - EndOfStream, - - String, - Int64, - Uint64, - Double, - Boolean, - - // Special values: - // YSON - Semicolon, // ; - Equals, // = - Hash, // # - LeftBracket, // [ - RightBracket, // ] - LeftBrace, // { - RightBrace, // } - LeftAngle, // < - RightAngle, // > - - // Table ranges - LeftParenthesis, // ( - RightParenthesis, // ) - Plus, // + - Colon, // : - Comma, // , - }; - - //////////////////////////////////////////////////////////////////////////////// - - ETokenType CharToTokenType(char ch); - char TokenTypeToChar(ETokenType type); - TString TokenTypeToString(ETokenType type); - - //////////////////////////////////////////////////////////////////////////////// - - class TLexerImpl; - - //////////////////////////////////////////////////////////////////////////////// - - class TToken { - public: - static const TToken EndOfStream; - - TToken(); - TToken(ETokenType type); - explicit TToken(const TStringBuf& stringValue); - explicit TToken(i64 int64Value); - explicit TToken(ui64 int64Value); - explicit TToken(double doubleValue); - explicit TToken(bool booleanValue); - - ETokenType GetType() const { - return Type_; - } - - bool IsEmpty() const; - const TStringBuf& GetStringValue() const; - i64 GetInt64Value() const; - ui64 GetUint64Value() const; - double GetDoubleValue() const; - bool GetBooleanValue() const; - - void CheckType(ETokenType expectedType) const; - void Reset(); - - private: - friend class TLexerImpl; - - ETokenType Type_; - - TStringBuf StringValue; - i64 Int64Value; - ui64 Uint64Value; - double DoubleValue; - bool BooleanValue; - }; - - TString ToString(const TToken& token); - - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// + + enum ETokenType { + EndOfStream, + + String, + Int64, + Uint64, + Double, + Boolean, + + // Special values: + // YSON + Semicolon, // ; + Equals, // = + Hash, // # + LeftBracket, // [ + RightBracket, // ] + LeftBrace, // { + RightBrace, // } + LeftAngle, // < + RightAngle, // > + + // Table ranges + LeftParenthesis, // ( + RightParenthesis, // ) + Plus, // + + Colon, // : + Comma, // , + }; + + //////////////////////////////////////////////////////////////////////////////// + + ETokenType CharToTokenType(char ch); + char TokenTypeToChar(ETokenType type); + TString TokenTypeToString(ETokenType type); + + //////////////////////////////////////////////////////////////////////////////// + + class TLexerImpl; + + //////////////////////////////////////////////////////////////////////////////// + + class TToken { + public: + static const TToken EndOfStream; + + TToken(); + TToken(ETokenType type); + explicit TToken(const TStringBuf& stringValue); + explicit TToken(i64 int64Value); + explicit TToken(ui64 int64Value); + explicit TToken(double doubleValue); + explicit TToken(bool booleanValue); + + ETokenType GetType() const { + return Type_; + } + + bool IsEmpty() const; + const TStringBuf& GetStringValue() const; + i64 GetInt64Value() const; + ui64 GetUint64Value() const; + double GetDoubleValue() const; + bool GetBooleanValue() const; + + void CheckType(ETokenType expectedType) const; + void Reset(); + + private: + friend class TLexerImpl; + + ETokenType Type_; + + TStringBuf StringValue; + i64 Int64Value; + ui64 Uint64Value; + double DoubleValue; + bool BooleanValue; + }; + + TString ToString(const TToken& token); + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/tokenizer.cpp b/library/cpp/yson/tokenizer.cpp index e0296e8ea7..06760170d4 100644 --- a/library/cpp/yson/tokenizer.cpp +++ b/library/cpp/yson/tokenizer.cpp @@ -1,37 +1,37 @@ #include "tokenizer.h" namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - TTokenizer::TTokenizer(const TStringBuf& input) - : Input(input) - , Parsed(0) - { - } - - bool TTokenizer::ParseNext() { - Input = Input.Tail(Parsed); - Token.Reset(); - Parsed = Lexer.GetToken(Input, &Token); - return !CurrentToken().IsEmpty(); - } - - const TToken& TTokenizer::CurrentToken() const { - return Token; - } - - ETokenType TTokenizer::GetCurrentType() const { - return CurrentToken().GetType(); - } - - TStringBuf TTokenizer::GetCurrentSuffix() const { - return Input.Tail(Parsed); - } - - const TStringBuf& TTokenizer::CurrentInput() const { - return Input; - } - - //////////////////////////////////////////////////////////////////////////////// - + //////////////////////////////////////////////////////////////////////////////// + + TTokenizer::TTokenizer(const TStringBuf& input) + : Input(input) + , Parsed(0) + { + } + + bool TTokenizer::ParseNext() { + Input = Input.Tail(Parsed); + Token.Reset(); + Parsed = Lexer.GetToken(Input, &Token); + return !CurrentToken().IsEmpty(); + } + + const TToken& TTokenizer::CurrentToken() const { + return Token; + } + + ETokenType TTokenizer::GetCurrentType() const { + return CurrentToken().GetType(); + } + + TStringBuf TTokenizer::GetCurrentSuffix() const { + return Input.Tail(Parsed); + } + + const TStringBuf& TTokenizer::CurrentInput() const { + return Input; + } + + //////////////////////////////////////////////////////////////////////////////// + } // namespace NYson diff --git a/library/cpp/yson/tokenizer.h b/library/cpp/yson/tokenizer.h index c4449f590c..0576aace95 100644 --- a/library/cpp/yson/tokenizer.h +++ b/library/cpp/yson/tokenizer.h @@ -4,25 +4,25 @@ #include "lexer.h" namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - class TTokenizer { - public: - explicit TTokenizer(const TStringBuf& input); + class TTokenizer { + public: + explicit TTokenizer(const TStringBuf& input); - bool ParseNext(); - const TToken& CurrentToken() const; - ETokenType GetCurrentType() const; - TStringBuf GetCurrentSuffix() const; - const TStringBuf& CurrentInput() const; + bool ParseNext(); + const TToken& CurrentToken() const; + ETokenType GetCurrentType() const; + TStringBuf GetCurrentSuffix() const; + const TStringBuf& CurrentInput() const; - private: - TStringBuf Input; - TToken Token; - TStatelessLexer Lexer; - size_t Parsed; - }; + private: + TStringBuf Input; + TToken Token; + TStatelessLexer Lexer; + size_t Parsed; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/varint.cpp b/library/cpp/yson/varint.cpp index d715d08294..d538ee3cff 100644 --- a/library/cpp/yson/varint.cpp +++ b/library/cpp/yson/varint.cpp @@ -5,67 +5,67 @@ #include <util/generic/yexception.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - int WriteVarUInt64(IOutputStream* output, ui64 value) { - bool stop = false; - int bytesWritten = 0; - while (!stop) { - ++bytesWritten; - ui8 byte = static_cast<ui8>(value | 0x80); - value >>= 7; - if (value == 0) { - stop = true; - byte &= 0x7F; - } - output->Write(byte); + int WriteVarUInt64(IOutputStream* output, ui64 value) { + bool stop = false; + int bytesWritten = 0; + while (!stop) { + ++bytesWritten; + ui8 byte = static_cast<ui8>(value | 0x80); + value >>= 7; + if (value == 0) { + stop = true; + byte &= 0x7F; + } + output->Write(byte); } - return bytesWritten; + return bytesWritten; } - int WriteVarInt32(IOutputStream* output, i32 value) { - return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode32(value))); - } + int WriteVarInt32(IOutputStream* output, i32 value) { + return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode32(value))); + } + + int WriteVarInt64(IOutputStream* output, i64 value) { + return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode64(value))); + } - int WriteVarInt64(IOutputStream* output, i64 value) { - return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode64(value))); - } + int ReadVarUInt64(IInputStream* input, ui64* value) { + size_t count = 0; + ui64 result = 0; - int ReadVarUInt64(IInputStream* input, ui64* value) { - size_t count = 0; - ui64 result = 0; + ui8 byte = 0; + do { + if (7 * count > 8 * sizeof(ui64)) { + ythrow yexception() << "The data is too long to read ui64"; + } + if (input->Read(&byte, 1) != 1) { + ythrow yexception() << "The data is too long to read ui64"; + } + result |= (static_cast<ui64>(byte & 0x7F)) << (7 * count); + ++count; + } while (byte & 0x80); - ui8 byte = 0; - do { - if (7 * count > 8 * sizeof(ui64)) { - ythrow yexception() << "The data is too long to read ui64"; - } - if (input->Read(&byte, 1) != 1) { - ythrow yexception() << "The data is too long to read ui64"; - } - result |= (static_cast<ui64>(byte & 0x7F)) << (7 * count); - ++count; - } while (byte & 0x80); - - *value = result; - return count; - } - - int ReadVarInt32(IInputStream* input, i32* value) { - ui64 varInt; - int bytesRead = ReadVarUInt64(input, &varInt); - if (varInt > Max<ui32>()) { + *value = result; + return count; + } + + int ReadVarInt32(IInputStream* input, i32* value) { + ui64 varInt; + int bytesRead = ReadVarUInt64(input, &varInt); + if (varInt > Max<ui32>()) { ythrow yexception() << "The data is too long to read ui64"; } - *value = ZigZagDecode32(static_cast<ui32>(varInt)); - return bytesRead; - } + *value = ZigZagDecode32(static_cast<ui32>(varInt)); + return bytesRead; + } - int ReadVarInt64(IInputStream* input, i64* value) { - ui64 varInt; - int bytesRead = ReadVarUInt64(input, &varInt); - *value = ZigZagDecode64(varInt); - return bytesRead; + int ReadVarInt64(IInputStream* input, i64* value) { + ui64 varInt; + int bytesRead = ReadVarUInt64(input, &varInt); + *value = ZigZagDecode64(varInt); + return bytesRead; } } // namespace NYson diff --git a/library/cpp/yson/varint.h b/library/cpp/yson/varint.h index 3733bbfc8c..80b1184e57 100644 --- a/library/cpp/yson/varint.h +++ b/library/cpp/yson/varint.h @@ -5,20 +5,20 @@ #include <util/system/defaults.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - // Various functions that read/write varints from/to a stream. + // Various functions that read/write varints from/to a stream. - // Returns the number of bytes written. - int WriteVarUInt64(IOutputStream* output, ui64 value); - int WriteVarInt32(IOutputStream* output, i32 value); - int WriteVarInt64(IOutputStream* output, i64 value); + // Returns the number of bytes written. + int WriteVarUInt64(IOutputStream* output, ui64 value); + int WriteVarInt32(IOutputStream* output, i32 value); + int WriteVarInt64(IOutputStream* output, i64 value); - // Returns the number of bytes read. - int ReadVarUInt64(IInputStream* input, ui64* value); - int ReadVarInt32(IInputStream* input, i32* value); - int ReadVarInt64(IInputStream* input, i64* value); + // Returns the number of bytes read. + int ReadVarUInt64(IInputStream* input, ui64* value); + int ReadVarInt32(IInputStream* input, i32* value); + int ReadVarInt64(IInputStream* input, i64* value); - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/writer.cpp b/library/cpp/yson/writer.cpp index 4810ca14d8..054459f9f5 100644 --- a/library/cpp/yson/writer.cpp +++ b/library/cpp/yson/writer.cpp @@ -11,345 +11,345 @@ #include <cmath> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - // Copied from <util/string/escape.cpp> - namespace { + // Copied from <util/string/escape.cpp> + namespace { inline char HexDigit(char value) { - Y_ASSERT(value < 16); - if (value < 10) - return '0' + value; - else - return 'A' + value - 10; - } + Y_ASSERT(value < 16); + if (value < 10) + return '0' + value; + else + return 'A' + value - 10; + } inline char OctDigit(char value) { - Y_ASSERT(value < 8); - return '0' + value; - } + Y_ASSERT(value < 8); + return '0' + value; + } inline bool IsPrintable(char c) { - return c >= 32 && c <= 126; - } + return c >= 32 && c <= 126; + } inline bool IsHexDigit(char c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); - } + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } inline bool IsOctDigit(char c) { - return c >= '0' && c <= '7'; - } + return c >= '0' && c <= '7'; + } const size_t ESCAPE_C_BUFFER_SIZE = 4; inline size_t EscapeC(unsigned char c, char next, char r[ESCAPE_C_BUFFER_SIZE]) { - // (1) Printable characters go as-is, except backslash and double quote. - // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). - // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. - if (c == '\"') { - r[0] = '\\'; - r[1] = '\"'; - return 2; - } else if (c == '\\') { - r[0] = '\\'; - r[1] = '\\'; - return 2; - } else if (IsPrintable(c)) { - r[0] = c; - return 1; - } else if (c == '\r') { - r[0] = '\\'; - r[1] = 'r'; - return 2; - } else if (c == '\n') { - r[0] = '\\'; - r[1] = 'n'; - return 2; - } else if (c == '\t') { - r[0] = '\\'; - r[1] = 't'; - return 2; - } else if (c < 8 && !IsOctDigit(next)) { - r[0] = '\\'; - r[1] = OctDigit(c); - return 2; - } else if (!IsHexDigit(next)) { - r[0] = '\\'; - r[1] = 'x'; - r[2] = HexDigit((c & 0xF0) >> 4); - r[3] = HexDigit((c & 0x0F) >> 0); - return 4; - } else { - r[0] = '\\'; - r[1] = OctDigit((c & 0700) >> 6); - r[2] = OctDigit((c & 0070) >> 3); - r[3] = OctDigit((c & 0007) >> 0); - return 4; - } - } - - void EscapeC(const char* str, size_t len, IOutputStream& output) { - char buffer[ESCAPE_C_BUFFER_SIZE]; - - size_t i, j; - for (i = 0, j = 0; i < len; ++i) { - size_t rlen = EscapeC(str[i], (i + 1 < len ? str[i + 1] : 0), buffer); - - if (rlen > 1) { - output.Write(str + j, i - j); - j = i + 1; - output.Write(buffer, rlen); - } - } - - if (j > 0) { - output.Write(str + j, len - j); - } else { - output.Write(str, len); - } + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (IsPrintable(c)) { + r[0] = c; + return 1; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (c < 8 && !IsOctDigit(next)) { + r[0] = '\\'; + r[1] = OctDigit(c); + return 2; + } else if (!IsHexDigit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = HexDigit((c & 0xF0) >> 4); + r[3] = HexDigit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = OctDigit((c & 0700) >> 6); + r[2] = OctDigit((c & 0070) >> 3); + r[3] = OctDigit((c & 0007) >> 0); + return 4; + } } - TString FloatToStringWithNanInf(double value) { - if (std::isfinite(value)) { - return ::ToString(value); - } + void EscapeC(const char* str, size_t len, IOutputStream& output) { + char buffer[ESCAPE_C_BUFFER_SIZE]; + + size_t i, j; + for (i = 0, j = 0; i < len; ++i) { + size_t rlen = EscapeC(str[i], (i + 1 < len ? str[i + 1] : 0), buffer); + + if (rlen > 1) { + output.Write(str + j, i - j); + j = i + 1; + output.Write(buffer, rlen); + } + } + + if (j > 0) { + output.Write(str + j, len - j); + } else { + output.Write(str, len); + } + } + + TString FloatToStringWithNanInf(double value) { + if (std::isfinite(value)) { + return ::ToString(value); + } static const TStringBuf nanLiteral = "%nan"; static const TStringBuf infLiteral = "%inf"; static const TStringBuf negativeInfLiteral = "%-inf"; - TStringBuf str; - if (std::isnan(value)) { - str = nanLiteral; - } else if (value > 0) { - str = infLiteral; - } else { - str = negativeInfLiteral; - } + TStringBuf str; + if (std::isnan(value)) { + str = nanLiteral; + } else if (value > 0) { + str = infLiteral; + } else { + str = negativeInfLiteral; + } return TString(str.data(), str.size()); - } + } + + } + //////////////////////////////////////////////////////////////////////////////// + + TYsonWriter::TYsonWriter( + IOutputStream* stream, + EYsonFormat format, + EYsonType type, + bool enableRaw) + : Stream(stream) + , Format(format) + , Type(type) + , EnableRaw(enableRaw) + , Depth(0) + , BeforeFirstItem(true) + { + Y_ASSERT(stream); } - //////////////////////////////////////////////////////////////////////////////// - - TYsonWriter::TYsonWriter( - IOutputStream* stream, - EYsonFormat format, - EYsonType type, - bool enableRaw) - : Stream(stream) - , Format(format) - , Type(type) - , EnableRaw(enableRaw) - , Depth(0) - , BeforeFirstItem(true) - { - Y_ASSERT(stream); - } - - void TYsonWriter::WriteIndent() { - for (int i = 0; i < IndentSize * Depth; ++i) { - Stream->Write(' '); - } - } - - bool TYsonWriter::IsTopLevelFragmentContext() const { + void TYsonWriter::WriteIndent() { + for (int i = 0; i < IndentSize * Depth; ++i) { + Stream->Write(' '); + } + } + + bool TYsonWriter::IsTopLevelFragmentContext() const { return Depth == 0 && (Type == ::NYson::EYsonType::ListFragment || Type == ::NYson::EYsonType::MapFragment); } - void TYsonWriter::EndNode() { - if (IsTopLevelFragmentContext()) { - ETokenType separatorToken = + void TYsonWriter::EndNode() { + if (IsTopLevelFragmentContext()) { + ETokenType separatorToken = Type == ::NYson::EYsonType::ListFragment - ? ListItemSeparatorToken - : KeyedItemSeparatorToken; - Stream->Write(TokenTypeToChar(separatorToken)); + ? ListItemSeparatorToken + : KeyedItemSeparatorToken; + Stream->Write(TokenTypeToChar(separatorToken)); if (Format == EYsonFormat::Text || Format == EYsonFormat::Pretty) { - Stream->Write('\n'); - } + Stream->Write('\n'); + } } } - void TYsonWriter::BeginCollection(ETokenType beginToken) { - Stream->Write(TokenTypeToChar(beginToken)); - ++Depth; - BeforeFirstItem = true; - } - - void TYsonWriter::CollectionItem(ETokenType separatorToken) { - if (!IsTopLevelFragmentContext()) { - if (!BeforeFirstItem) { - Stream->Write(TokenTypeToChar(separatorToken)); - } - + void TYsonWriter::BeginCollection(ETokenType beginToken) { + Stream->Write(TokenTypeToChar(beginToken)); + ++Depth; + BeforeFirstItem = true; + } + + void TYsonWriter::CollectionItem(ETokenType separatorToken) { + if (!IsTopLevelFragmentContext()) { + if (!BeforeFirstItem) { + Stream->Write(TokenTypeToChar(separatorToken)); + } + if (Format == EYsonFormat::Pretty) { - Stream->Write('\n'); - WriteIndent(); - } + Stream->Write('\n'); + WriteIndent(); + } } - BeforeFirstItem = false; - } - - void TYsonWriter::EndCollection(ETokenType endToken) { - --Depth; + BeforeFirstItem = false; + } + + void TYsonWriter::EndCollection(ETokenType endToken) { + --Depth; if (Format == EYsonFormat::Pretty && !BeforeFirstItem) { Stream->Write('\n'); WriteIndent(); } - Stream->Write(TokenTypeToChar(endToken)); - BeforeFirstItem = false; + Stream->Write(TokenTypeToChar(endToken)); + BeforeFirstItem = false; } - void TYsonWriter::WriteStringScalar(const TStringBuf& value) { + void TYsonWriter::WriteStringScalar(const TStringBuf& value) { if (Format == EYsonFormat::Binary) { - Stream->Write(NDetail::StringMarker); - WriteVarInt32(Stream, static_cast<i32>(value.length())); - Stream->Write(value.begin(), value.length()); - } else { - Stream->Write('"'); - EscapeC(value.data(), value.length(), *Stream); - Stream->Write('"'); - } - } + Stream->Write(NDetail::StringMarker); + WriteVarInt32(Stream, static_cast<i32>(value.length())); + Stream->Write(value.begin(), value.length()); + } else { + Stream->Write('"'); + EscapeC(value.data(), value.length(), *Stream); + Stream->Write('"'); + } + } void TYsonWriter::OnStringScalar(TStringBuf value) { - WriteStringScalar(value); - EndNode(); + WriteStringScalar(value); + EndNode(); } - void TYsonWriter::OnInt64Scalar(i64 value) { + void TYsonWriter::OnInt64Scalar(i64 value) { if (Format == EYsonFormat::Binary) { - Stream->Write(NDetail::Int64Marker); - WriteVarInt64(Stream, value); - } else { - Stream->Write(::ToString(value)); - } - EndNode(); + Stream->Write(NDetail::Int64Marker); + WriteVarInt64(Stream, value); + } else { + Stream->Write(::ToString(value)); + } + EndNode(); } - void TYsonWriter::OnUint64Scalar(ui64 value) { + void TYsonWriter::OnUint64Scalar(ui64 value) { if (Format == EYsonFormat::Binary) { - Stream->Write(NDetail::Uint64Marker); - WriteVarUInt64(Stream, value); - } else { - Stream->Write(::ToString(value)); - Stream->Write("u"); - } - EndNode(); + Stream->Write(NDetail::Uint64Marker); + WriteVarUInt64(Stream, value); + } else { + Stream->Write(::ToString(value)); + Stream->Write("u"); + } + EndNode(); } - void TYsonWriter::OnDoubleScalar(double value) { + void TYsonWriter::OnDoubleScalar(double value) { if (Format == EYsonFormat::Binary) { - Stream->Write(NDetail::DoubleMarker); - Stream->Write(&value, sizeof(double)); - } else { - auto str = FloatToStringWithNanInf(value); - Stream->Write(str); - if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) { - Stream->Write("."); - } - } - EndNode(); + Stream->Write(NDetail::DoubleMarker); + Stream->Write(&value, sizeof(double)); + } else { + auto str = FloatToStringWithNanInf(value); + Stream->Write(str); + if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) { + Stream->Write("."); + } + } + EndNode(); } - void TYsonWriter::OnBooleanScalar(bool value) { + void TYsonWriter::OnBooleanScalar(bool value) { if (Format == EYsonFormat::Binary) { - Stream->Write(value ? NDetail::TrueMarker : NDetail::FalseMarker); - } else { - Stream->Write(value ? "%true" : "%false"); + Stream->Write(value ? NDetail::TrueMarker : NDetail::FalseMarker); + } else { + Stream->Write(value ? "%true" : "%false"); } - EndNode(); + EndNode(); } - void TYsonWriter::OnEntity() { - Stream->Write(TokenTypeToChar(EntityToken)); - EndNode(); + void TYsonWriter::OnEntity() { + Stream->Write(TokenTypeToChar(EntityToken)); + EndNode(); } - void TYsonWriter::OnBeginList() { - BeginCollection(BeginListToken); - } + void TYsonWriter::OnBeginList() { + BeginCollection(BeginListToken); + } - void TYsonWriter::OnListItem() { - CollectionItem(ListItemSeparatorToken); - } + void TYsonWriter::OnListItem() { + CollectionItem(ListItemSeparatorToken); + } - void TYsonWriter::OnEndList() { - EndCollection(EndListToken); - EndNode(); - } + void TYsonWriter::OnEndList() { + EndCollection(EndListToken); + EndNode(); + } - void TYsonWriter::OnBeginMap() { - BeginCollection(BeginMapToken); - } + void TYsonWriter::OnBeginMap() { + BeginCollection(BeginMapToken); + } void TYsonWriter::OnKeyedItem(TStringBuf key) { - CollectionItem(KeyedItemSeparatorToken); + CollectionItem(KeyedItemSeparatorToken); - WriteStringScalar(key); + WriteStringScalar(key); if (Format == NYson::EYsonFormat::Pretty) { - Stream->Write(' '); - } - Stream->Write(TokenTypeToChar(KeyValueSeparatorToken)); + Stream->Write(' '); + } + Stream->Write(TokenTypeToChar(KeyValueSeparatorToken)); if (Format == NYson::EYsonFormat::Pretty) { - Stream->Write(' '); - } + Stream->Write(' '); + } - BeforeFirstItem = false; + BeforeFirstItem = false; } - - void TYsonWriter::OnEndMap() { - EndCollection(EndMapToken); - EndNode(); + + void TYsonWriter::OnEndMap() { + EndCollection(EndMapToken); + EndNode(); } - void TYsonWriter::OnBeginAttributes() { - BeginCollection(BeginAttributesToken); - } + void TYsonWriter::OnBeginAttributes() { + BeginCollection(BeginAttributesToken); + } - void TYsonWriter::OnEndAttributes() { - EndCollection(EndAttributesToken); + void TYsonWriter::OnEndAttributes() { + EndCollection(EndAttributesToken); if (Format == NYson::EYsonFormat::Pretty) { - Stream->Write(' '); - } - } + Stream->Write(' '); + } + } void TYsonWriter::OnRaw(TStringBuf yson, EYsonType type) { - if (EnableRaw) { - Stream->Write(yson); - BeforeFirstItem = false; - } else { - TYsonConsumerBase::OnRaw(yson, type); - } - } - - TYsonWriter::TState TYsonWriter::State() const { - TState state; - state.Depth = Depth; - state.BeforeFirstItem = BeforeFirstItem; - return state; + if (EnableRaw) { + Stream->Write(yson); + BeforeFirstItem = false; + } else { + TYsonConsumerBase::OnRaw(yson, type); + } } - void TYsonWriter::Reset(const TState& state) { - Depth = state.Depth; - BeforeFirstItem = state.BeforeFirstItem; + TYsonWriter::TState TYsonWriter::State() const { + TState state; + state.Depth = Depth; + state.BeforeFirstItem = BeforeFirstItem; + return state; } - //////////////////////////////////////////////////////////////////////////////// + void TYsonWriter::Reset(const TState& state) { + Depth = state.Depth; + BeforeFirstItem = state.BeforeFirstItem; + } + + //////////////////////////////////////////////////////////////////////////////// - void ReformatYsonStream( + void ReformatYsonStream( IInputStream* input, - IOutputStream* output, - EYsonFormat format, - EYsonType type) { - TYsonWriter writer(output, format, type); - TYsonParser parser(&writer, input, type); - parser.Parse(); - } - - //////////////////////////////////////////////////////////////////////////////// + IOutputStream* output, + EYsonFormat format, + EYsonType type) { + TYsonWriter writer(output, format, type); + TYsonParser parser(&writer, input, type); + parser.Parse(); + } + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/writer.h b/library/cpp/yson/writer.h index db4ed0f5e6..40f5d7d501 100644 --- a/library/cpp/yson/writer.h +++ b/library/cpp/yson/writer.h @@ -10,80 +10,80 @@ class IOutputStream; class IZeroCopyInput; namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - class TYsonWriter - : public TYsonConsumerBase, - private TNonCopyable { - public: - class TState { - private: - int Depth; - bool BeforeFirstItem; - - friend class TYsonWriter; - }; - - public: - TYsonWriter( - IOutputStream* stream, + //////////////////////////////////////////////////////////////////////////////// + + class TYsonWriter + : public TYsonConsumerBase, + private TNonCopyable { + public: + class TState { + private: + int Depth; + bool BeforeFirstItem; + + friend class TYsonWriter; + }; + + public: + TYsonWriter( + IOutputStream* stream, EYsonFormat format = EYsonFormat::Binary, EYsonType type = ::NYson::EYsonType::Node, - bool enableRaw = false); + bool enableRaw = false); void OnStringScalar(TStringBuf value) override; - void OnInt64Scalar(i64 value) override; - void OnUint64Scalar(ui64 value) override; - void OnDoubleScalar(double value) override; - void OnBooleanScalar(bool value) override; - void OnEntity() override; + void OnInt64Scalar(i64 value) override; + void OnUint64Scalar(ui64 value) override; + void OnDoubleScalar(double value) override; + void OnBooleanScalar(bool value) override; + void OnEntity() override; - void OnBeginList() override; - void OnListItem() override; - void OnEndList() override; + void OnBeginList() override; + void OnListItem() override; + void OnEndList() override; - void OnBeginMap() override; + void OnBeginMap() override; void OnKeyedItem(TStringBuf key) override; - void OnEndMap() override; + void OnEndMap() override; - void OnBeginAttributes() override; - void OnEndAttributes() override; + void OnBeginAttributes() override; + void OnEndAttributes() override; void OnRaw(TStringBuf yson, EYsonType type = ::NYson::EYsonType::Node) override; - TState State() const; - void Reset(const TState& state); + TState State() const; + void Reset(const TState& state); - protected: - IOutputStream* Stream; - EYsonFormat Format; - EYsonType Type; - bool EnableRaw; + protected: + IOutputStream* Stream; + EYsonFormat Format; + EYsonType Type; + bool EnableRaw; - int Depth; - bool BeforeFirstItem; + int Depth; + bool BeforeFirstItem; - static const int IndentSize = 4; + static const int IndentSize = 4; - void WriteIndent(); - void WriteStringScalar(const TStringBuf& value); + void WriteIndent(); + void WriteStringScalar(const TStringBuf& value); - void BeginCollection(ETokenType beginToken); - void CollectionItem(ETokenType separatorToken); - void EndCollection(ETokenType endToken); + void BeginCollection(ETokenType beginToken); + void CollectionItem(ETokenType separatorToken); + void EndCollection(ETokenType endToken); - bool IsTopLevelFragmentContext() const; - void EndNode(); - }; + bool IsTopLevelFragmentContext() const; + void EndNode(); + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - void ReformatYsonStream( + void ReformatYsonStream( IInputStream* input, - IOutputStream* output, + IOutputStream* output, EYsonFormat format = EYsonFormat::Binary, EYsonType type = ::NYson::EYsonType::Node); - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson diff --git a/library/cpp/yson/zigzag.h b/library/cpp/yson/zigzag.h index e36df71714..2f1190508f 100644 --- a/library/cpp/yson/zigzag.h +++ b/library/cpp/yson/zigzag.h @@ -3,29 +3,29 @@ #include <util/system/defaults.h> namespace NYson { - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// - //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x| - //! Actually taken 'as is' from protobuf/wire_format_lite.h + //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x| + //! Actually taken 'as is' from protobuf/wire_format_lite.h - inline ui32 ZigZagEncode32(i32 n) { - // Note: the right-shift must be arithmetic - return (ui32(n) << 1) ^ (n >> 31); - } + inline ui32 ZigZagEncode32(i32 n) { + // Note: the right-shift must be arithmetic + return (ui32(n) << 1) ^ (n >> 31); + } - inline i32 ZigZagDecode32(ui32 n) { - return (n >> 1) ^ -static_cast<i32>(n & 1); - } + inline i32 ZigZagDecode32(ui32 n) { + return (n >> 1) ^ -static_cast<i32>(n & 1); + } - inline ui64 ZigZagEncode64(i64 n) { - // Note: the right-shift must be arithmetic - return (ui64(n) << 1) ^ (n >> 63); - } + inline ui64 ZigZagEncode64(i64 n) { + // Note: the right-shift must be arithmetic + return (ui64(n) << 1) ^ (n >> 63); + } - inline i64 ZigZagDecode64(ui64 n) { - return (n >> 1) ^ -static_cast<i64>(n & 1); - } + inline i64 ZigZagDecode64(ui64 n) { + return (n >> 1) ^ -static_cast<i64>(n & 1); + } + + //////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////// - } // namespace NYson |