diff options
author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yson |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yson')
43 files changed, 6162 insertions, 0 deletions
diff --git a/library/cpp/yson/consumer.cpp b/library/cpp/yson/consumer.cpp new file mode 100644 index 00000000000..40ae452978c --- /dev/null +++ b/library/cpp/yson/consumer.cpp @@ -0,0 +1,15 @@ +#include "consumer.h" +#include "string.h" +#include "parser.h" + +namespace NYson { + + //////////////////////////////////////////////////////////////////////////////// + + void TYsonConsumerBase::OnRaw(TStringBuf str, NYT::NYson::EYsonType type) { + ParseYsonStringBuffer(str, this, type); + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/consumer.h b/library/cpp/yson/consumer.h new file mode 100644 index 00000000000..d5a9d663355 --- /dev/null +++ b/library/cpp/yson/consumer.h @@ -0,0 +1,13 @@ +#pragma once + +#include <library/cpp/yt/yson/consumer.h> + +#include <util/generic/strbuf.h> +#include <util/system/defaults.h> + +namespace NYson { + struct TYsonConsumerBase + : public virtual NYT::NYson::IYsonConsumer { + void OnRaw(TStringBuf ysonNode, NYT::NYson::EYsonType type) override; + }; +} // namespace NYson diff --git a/library/cpp/yson/detail.h b/library/cpp/yson/detail.h new file mode 100644 index 00000000000..27f5e8ffff2 --- /dev/null +++ b/library/cpp/yson/detail.h @@ -0,0 +1,806 @@ +#pragma once + +#include "public.h" +#include "zigzag.h" + +#include <util/generic/vector.h> +#include <util/generic/maybe.h> +#include <util/generic/buffer.h> +#include <util/string/escape.h> +#include <util/string/cast.h> +#include <util/stream/input.h> + +namespace NYson { + namespace NDetail { + //////////////////////////////////////////////////////////////////////////////// + + //! Indicates the beginning of a list. + const char BeginListSymbol = '['; + //! Indicates the end of a list. + const char EndListSymbol = ']'; + + //! Indicates the beginning of a map. + const char BeginMapSymbol = '{'; + //! Indicates the end of a map. + const char EndMapSymbol = '}'; + + //! Indicates the beginning of an attribute map. + const char BeginAttributesSymbol = '<'; + //! Indicates the end of an attribute map. + const char EndAttributesSymbol = '>'; + + //! Separates items in lists. + const char ListItemSeparatorSymbol = ';'; + //! Separates items in maps, attributes. + const char KeyedItemSeparatorSymbol = ';'; + //! Separates keys from values in maps. + const char KeyValueSeparatorSymbol = '='; + + //! Indicates an entity. + const char EntitySymbol = '#'; + + //! Indicates end of stream. + const char EndSymbol = '\0'; + + //! Marks the beginning of a binary string literal. + const char StringMarker = '\x01'; + //! Marks the beginning of a binary i64 literal. + const char Int64Marker = '\x02'; + //! Marks the beginning of a binary double literal. + const char DoubleMarker = '\x03'; + //! Marks true and false values of boolean. + const char FalseMarker = '\x04'; + const char TrueMarker = '\x05'; + //! Marks the beginning of a binary ui64 literal. + const char Uint64Marker = '\x06'; + + //////////////////////////////////////////////////////////////////////////////// + + template <bool EnableLinePositionInfo> + class TPositionInfo; + + template <> + class TPositionInfo<true> { + private: + int Offset; + int Line; + int Column; + + public: + TPositionInfo() + : Offset(0) + , Line(1) + , Column(1) + { + } + + void OnRangeConsumed(const char* begin, const char* end) { + Offset += end - begin; + for (auto current = begin; current != end; ++current) { + ++Column; + if (*current == '\n') { //TODO: memchr + ++Line; + Column = 1; + } + } + } + }; + + template <> + class TPositionInfo<false> { + private: + int Offset; + + public: + TPositionInfo() + : Offset(0) + { + } + + void OnRangeConsumed(const char* begin, const char* end) { + Offset += end - begin; + } + }; + + template <class TBlockStream, class TPositionBase> + class TCharStream + : public TBlockStream, + public TPositionBase { + public: + TCharStream(const TBlockStream& blockStream) + : TBlockStream(blockStream) + { + } + + bool IsEmpty() const { + return TBlockStream::Begin() == TBlockStream::End(); + } + + template <bool AllowFinish> + void Refresh() { + while (IsEmpty() && !TBlockStream::IsFinished()) { + TBlockStream::RefreshBlock(); + } + if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) { + ythrow TYsonException() << "Premature end of yson stream"; + } + } + + void Refresh() { + return Refresh<false>(); + } + + template <bool AllowFinish> + char GetChar() { + Refresh<AllowFinish>(); + return !IsEmpty() ? *TBlockStream::Begin() : '\0'; + } + + char GetChar() { + return GetChar<false>(); + } + + void Advance(size_t bytes) { + TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes); + TBlockStream::Advance(bytes); + } + + size_t Length() const { + return TBlockStream::End() - TBlockStream::Begin(); + } + }; + + template <class TBaseStream> + class TCodedStream + : public TBaseStream { + private: + static const int MaxVarintBytes = 10; + static const int MaxVarint32Bytes = 5; + + const ui8* BeginByte() const { + return reinterpret_cast<const ui8*>(TBaseStream::Begin()); + } + + const ui8* EndByte() const { + return reinterpret_cast<const ui8*>(TBaseStream::End()); + } + + // Following functions is an adaptation Protobuf code from coded_stream.cc + bool ReadVarint32FromArray(ui32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + const ui8* ptr = BeginByte(); + ui32 b; + ui32 result; + + b = *(ptr++); + result = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + result |= b << 28; + if (!(b & 0x80)) + goto done; + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + + for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) { + b = *(ptr++); + if (!(b & 0x80)) + goto done; + } + + // We have overrun the maximum size of a Varint (10 bytes). Assume + // the data is corrupt. + return false; + + done: + TBaseStream::Advance(ptr - BeginByte()); + *value = result; + return true; + } + + bool ReadVarint32Fallback(ui32* value) { + if (BeginByte() + MaxVarint32Bytes <= EndByte() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) + { + return ReadVarint32FromArray(value); + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarint32Slow(value); + } + } + + bool ReadVarint32Slow(ui32* value) { + ui64 result; + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte Varints. + if (ReadVarint64Fallback(&result)) { + *value = static_cast<ui32>(result); + return true; + } else { + return false; + } + } + + bool ReadVarint64Slow(ui64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + ui64 result = 0; + int count = 0; + ui32 b; + + do { + if (count == MaxVarintBytes) { + return false; + } + while (BeginByte() == EndByte()) { + TBaseStream::Refresh(); + } + b = *BeginByte(); + result |= static_cast<ui64>(b & 0x7F) << (7 * count); + TBaseStream::Advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; + } + + bool ReadVarint64Fallback(ui64* value) { + if (BeginByte() + MaxVarintBytes <= EndByte() || + // Optimization: If the Varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80))) + { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + + const ui8* ptr = BeginByte(); + ui32 b; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + ui32 part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); + part0 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part0 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 14; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part1 |= (b & 0x7F) << 21; + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 = (b & 0x7F); + if (!(b & 0x80)) + goto done; + b = *(ptr++); + part2 |= (b & 0x7F) << 7; + if (!(b & 0x80)) + goto done; + + // We have overrun the maximum size of a Varint (10 bytes). The data + // must be corrupt. + return false; + + done: + TBaseStream::Advance(ptr - BeginByte()); + *value = (static_cast<ui64>(part0)) | + (static_cast<ui64>(part1) << 28) | + (static_cast<ui64>(part2) << 56); + return true; + } else { + return ReadVarint64Slow(value); + } + } + + public: + TCodedStream(const TBaseStream& baseStream) + : TBaseStream(baseStream) + { + } + + bool ReadVarint64(ui64* value) { + if (BeginByte() < EndByte() && *BeginByte() < 0x80) { + *value = *BeginByte(); + TBaseStream::Advance(1); + return true; + } else { + return ReadVarint64Fallback(value); + } + } + + bool ReadVarint32(ui32* value) { + if (BeginByte() < EndByte() && *BeginByte() < 0x80) { + *value = *BeginByte(); + TBaseStream::Advance(1); + return true; + } else { + return ReadVarint32Fallback(value); + } + } + }; + + enum ENumericResult { + Int64 = 0, + Uint64 = 1, + Double = 2 + }; + + template <class TBlockStream, bool EnableLinePositionInfo> + class TLexerBase + : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> { + private: + using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>; + TVector<char> Buffer_; + TMaybe<ui64> MemoryLimit_; + + void CheckMemoryLimit() { + if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) { + ythrow TYsonException() + << "Memory limit exceeded while parsing YSON stream: allocated " + << Buffer_.capacity() << ", limit " << (*MemoryLimit_); + } + } + + public: + TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) + : TBaseStream(blockStream) + , MemoryLimit_(memoryLimit) + { + } + + protected: + /// Lexer routines + + template <bool AllowFinish> + ENumericResult ReadNumeric(TStringBuf* value) { + Buffer_.clear(); + ENumericResult result = ENumericResult::Int64; + while (true) { + char ch = TBaseStream::template GetChar<AllowFinish>(); + if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-' + Buffer_.push_back(ch); + } else if (ch == '.' || ch == 'e' || ch == 'E') { + Buffer_.push_back(ch); + result = ENumericResult::Double; + } else if (ch == 'u') { + Buffer_.push_back(ch); + result = ENumericResult::Uint64; + } else if (isalpha(ch)) { + ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal"; + } else { + break; + } + CheckMemoryLimit(); + TBaseStream::Advance(1); + } + + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + return result; + } + + template <bool AllowFinish> + double ReadNanOrInf() { + static const TStringBuf nanString = "nan"; + static const TStringBuf infString = "inf"; + static const TStringBuf plusInfString = "+inf"; + static const TStringBuf minusInfString = "-inf"; + + TStringBuf expectedString; + double expectedValue; + char ch = TBaseStream::template GetChar<AllowFinish>(); + switch (ch) { + case '+': + expectedString = plusInfString; + expectedValue = std::numeric_limits<double>::infinity(); + break; + case '-': + expectedString = minusInfString; + expectedValue = -std::numeric_limits<double>::infinity(); + break; + case 'i': + expectedString = infString; + expectedValue = std::numeric_limits<double>::infinity(); + break; + case 'n': + expectedString = nanString; + expectedValue = std::numeric_limits<double>::quiet_NaN(); + break; + default: + ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'"; + } + + for (size_t i = 0; i < expectedString.size(); ++i) { + if (expectedString[i] != ch) { + ythrow TYsonException() + << "Incorrect %-literal prefix " + << "'" << expectedString.SubStr(0, i) << ch << "'," + << "expected " << expectedString; + } + TBaseStream::Advance(1); + ch = TBaseStream::template GetChar<AllowFinish>(); + } + + return expectedValue; + } + + void ReadQuotedString(TStringBuf* value) { + Buffer_.clear(); + while (true) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + } + char ch = *TBaseStream::Begin(); + TBaseStream::Advance(1); + if (ch != '"') { + Buffer_.push_back(ch); + } else { + // We must count the number of '\' at the end of StringValue + // to check if it's not \" + int slashCount = 0; + int length = Buffer_.size(); + while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') { + ++slashCount; + } + if (slashCount % 2 == 0) { + break; + } else { + Buffer_.push_back(ch); + } + } + CheckMemoryLimit(); + } + + auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size()); + Buffer_.clear(); + Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size()); + CheckMemoryLimit(); + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + + template <bool AllowFinish> + void ReadUnquotedString(TStringBuf* value) { + Buffer_.clear(); + while (true) { + char ch = TBaseStream::template GetChar<AllowFinish>(); + if (isalpha(ch) || isdigit(ch) || + ch == '_' || ch == '-' || ch == '%' || ch == '.') { + Buffer_.push_back(ch); + } else { + break; + } + CheckMemoryLimit(); + TBaseStream::Advance(1); + } + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + + void ReadUnquotedString(TStringBuf* value) { + return ReadUnquotedString<false>(value); + } + + void ReadBinaryString(TStringBuf* value) { + ui32 ulength = 0; + if (!TBaseStream::ReadVarint32(&ulength)) { + ythrow TYsonException() << "Error parsing varint value"; + } + + i32 length = ZigZagDecode32(ulength); + if (length < 0) { + ythrow TYsonException() << "Negative binary string literal length " << length; + } + + if (TBaseStream::Begin() + length <= TBaseStream::End()) { + *value = TStringBuf(TBaseStream::Begin(), length); + TBaseStream::Advance(length); + } else { // reading in Buffer + size_t needToRead = length; + Buffer_.clear(); + while (needToRead) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + continue; + } + size_t readingBytes = Min(needToRead, TBaseStream::Length()); + + Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes); + CheckMemoryLimit(); + needToRead -= readingBytes; + TBaseStream::Advance(readingBytes); + } + *value = TStringBuf(Buffer_.data(), Buffer_.size()); + } + } + + template <bool AllowFinish> + bool ReadBoolean() { + Buffer_.clear(); + + static TStringBuf trueString = "true"; + static TStringBuf falseString = "false"; + + auto throwIncorrectBoolean = [&]() { + ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size()); + }; + + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_[0] == trueString[0]) { + for (size_t i = 1; i < trueString.size(); ++i) { + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_.back() != trueString[i]) { + throwIncorrectBoolean(); + } + } + return true; + } else if (Buffer_[0] == falseString[0]) { + for (size_t i = 1; i < falseString.size(); ++i) { + Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>()); + TBaseStream::Advance(1); + if (Buffer_.back() != falseString[i]) { + throwIncorrectBoolean(); + } + } + return false; + } else { + throwIncorrectBoolean(); + } + + Y_FAIL("unreachable"); + ; + } + + void ReadBinaryInt64(i64* result) { + ui64 uvalue; + if (!TBaseStream::ReadVarint64(&uvalue)) { + ythrow TYsonException() << "Error parsing varint value"; + } + *result = ZigZagDecode64(uvalue); + } + + void ReadBinaryUint64(ui64* result) { + ui64 uvalue; + if (!TBaseStream::ReadVarint64(&uvalue)) { + ythrow TYsonException() << "Error parsing varint value"; + } + *result = uvalue; + } + + void ReadBinaryDouble(double* value) { + size_t needToRead = sizeof(double); + + while (needToRead != 0) { + if (TBaseStream::IsEmpty()) { + TBaseStream::Refresh(); + continue; + } + + size_t chunkSize = Min(needToRead, TBaseStream::Length()); + if (chunkSize == 0) { + ythrow TYsonException() << "Error parsing binary double literal"; + } + std::copy( + TBaseStream::Begin(), + TBaseStream::Begin() + chunkSize, + reinterpret_cast<char*>(value) + (sizeof(double) - needToRead)); + needToRead -= chunkSize; + TBaseStream::Advance(chunkSize); + } + } + + /// Helpers + void SkipCharToken(char symbol) { + char ch = SkipSpaceAndGetChar(); + if (ch != symbol) { + ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'"; + } + + TBaseStream::Advance(1); + } + + static bool IsSpaceFast(char ch) { + static const ui8 lookupTable[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return lookupTable[static_cast<ui8>(ch)]; + } + + template <bool AllowFinish> + char SkipSpaceAndGetChar() { + if (!TBaseStream::IsEmpty()) { + char ch = *TBaseStream::Begin(); + if (!IsSpaceFast(ch)) { + return ch; + } + } + return SkipSpaceAndGetCharFallback<AllowFinish>(); + } + + char SkipSpaceAndGetChar() { + return SkipSpaceAndGetChar<false>(); + } + + template <bool AllowFinish> + char SkipSpaceAndGetCharFallback() { + while (true) { + if (TBaseStream::IsEmpty()) { + if (TBaseStream::IsFinished()) { + return '\0'; + } + TBaseStream::template Refresh<AllowFinish>(); + continue; + } + if (!IsSpaceFast(*TBaseStream::Begin())) { + break; + } + TBaseStream::Advance(1); + } + return TBaseStream::template GetChar<AllowFinish>(); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + } + + //////////////////////////////////////////////////////////////////////////////// + + class TStringReader { + private: + const char* BeginPtr; + const char* EndPtr; + + public: + TStringReader() + : BeginPtr(nullptr) + , EndPtr(nullptr) + { + } + + TStringReader(const char* begin, const char* end) + : BeginPtr(begin) + , EndPtr(end) + { + } + + const char* Begin() const { + return BeginPtr; + } + + const char* End() const { + return EndPtr; + } + + void RefreshBlock() { + Y_FAIL("unreachable"); + } + + void Advance(size_t bytes) { + BeginPtr += bytes; + } + + bool IsFinished() const { + return true; + } + + void SetBuffer(const char* begin, const char* end) { + BeginPtr = begin; + EndPtr = end; + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + class TStreamReader { + public: + TStreamReader( + IInputStream* stream, + char* buffer, + size_t bufferSize) + : Stream(stream) + , Buffer(buffer) + , BufferSize(bufferSize) + { + BeginPtr = EndPtr = Buffer; + FinishFlag = false; + } + + const char* Begin() const { + return BeginPtr; + } + + const char* End() const { + return EndPtr; + } + + void RefreshBlock() { + size_t bytes = Stream->Read(Buffer, BufferSize); + BeginPtr = Buffer; + EndPtr = Buffer + bytes; + FinishFlag = (bytes == 0); + } + + void Advance(size_t bytes) { + BeginPtr += bytes; + } + + bool IsFinished() const { + return FinishFlag; + } + + private: + IInputStream* Stream; + char* Buffer; + size_t BufferSize; + + const char* BeginPtr; + const char* EndPtr; + bool FinishFlag; + }; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/format.h b/library/cpp/yson/format.h new file mode 100644 index 00000000000..2ff6dc9f6e2 --- /dev/null +++ b/library/cpp/yson/format.h @@ -0,0 +1,25 @@ +#pragma once + +#include "token.h" + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + const ETokenType BeginListToken = LeftBracket; + const ETokenType EndListToken = RightBracket; + + const ETokenType BeginMapToken = LeftBrace; + const ETokenType EndMapToken = RightBrace; + + const ETokenType BeginAttributesToken = LeftAngle; + const ETokenType EndAttributesToken = RightAngle; + + const ETokenType ListItemSeparatorToken = Semicolon; + const ETokenType KeyedItemSeparatorToken = Semicolon; + const ETokenType KeyValueSeparatorToken = Equals; + + const ETokenType EntityToken = Hash; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/json/json_writer.cpp b/library/cpp/yson/json/json_writer.cpp new file mode 100644 index 00000000000..87481256ecd --- /dev/null +++ b/library/cpp/yson/json/json_writer.cpp @@ -0,0 +1,220 @@ +#include "json_writer.h" + +#include <library/cpp/json/json_writer.h> + +namespace NYT { + //////////////////////////////////////////////////////////////////////////////// + + static bool IsSpecialJsonKey(const TStringBuf& key) { + return key.size() > 0 && key[0] == '$'; + } + + //////////////////////////////////////////////////////////////////////////////// + + TJsonWriter::TJsonWriter( + IOutputStream* output, + ::NYson::EYsonType type, + EJsonFormat format, + EJsonAttributesMode attributesMode, + ESerializedBoolFormat booleanFormat) + : TJsonWriter( + output, + NJson::TJsonWriterConfig{}.SetFormatOutput(format == JF_PRETTY), + type, + attributesMode, + booleanFormat + ) + {} + + TJsonWriter::TJsonWriter( + IOutputStream* output, + NJson::TJsonWriterConfig config, + ::NYson::EYsonType type, + EJsonAttributesMode attributesMode, + ESerializedBoolFormat booleanFormat) + : Output(output) + , Type(type) + , AttributesMode(attributesMode) + , BooleanFormat(booleanFormat) + , Depth(0) + { + if (Type == ::NYson::EYsonType::MapFragment) { + ythrow ::NYson::TYsonException() << ("Map fragments are not supported by Json"); + } + + UnderlyingJsonWriter.Reset(new NJson::TJsonWriter( + output, + config)); + JsonWriter = UnderlyingJsonWriter.Get(); + HasAttributes = false; + InAttributesBalance = 0; + } + + void TJsonWriter::EnterNode() { + if (AttributesMode == JAM_NEVER) { + HasAttributes = false; + } else if (AttributesMode == JAM_ON_DEMAND) { + // Do nothing + } else if (AttributesMode == JAM_ALWAYS) { + if (!HasAttributes) { + JsonWriter->OpenMap(); + JsonWriter->Write("$attributes"); + JsonWriter->OpenMap(); + JsonWriter->CloseMap(); + } + HasAttributes = true; + } + HasUnfoldedStructureStack.push_back(HasAttributes); + + if (HasAttributes) { + JsonWriter->Write("$value"); + HasAttributes = false; + } + + Depth += 1; + } + + void TJsonWriter::LeaveNode() { + Y_ASSERT(!HasUnfoldedStructureStack.empty()); + if (HasUnfoldedStructureStack.back()) { + // Close map of the {$attributes, $value} + JsonWriter->CloseMap(); + } + HasUnfoldedStructureStack.pop_back(); + + Depth -= 1; + + if (Depth == 0 && Type == ::NYson::EYsonType::ListFragment && InAttributesBalance == 0) { + JsonWriter->Flush(); + Output->Write("\n"); + } + } + + bool TJsonWriter::IsWriteAllowed() { + if (AttributesMode == JAM_NEVER) { + return InAttributesBalance == 0; + } + return true; + } + + void TJsonWriter::OnStringScalar(TStringBuf value) { + if (IsWriteAllowed()) { + EnterNode(); + WriteStringScalar(value); + LeaveNode(); + } + } + + void TJsonWriter::OnInt64Scalar(i64 value) { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } + + void TJsonWriter::OnUint64Scalar(ui64 value) { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } + + void TJsonWriter::OnDoubleScalar(double value) { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } + + void TJsonWriter::OnBooleanScalar(bool value) { + if (IsWriteAllowed()) { + if (BooleanFormat == SBF_STRING) { + OnStringScalar(value ? "true" : "false"); + } else { + EnterNode(); + JsonWriter->Write(value); + LeaveNode(); + } + } + } + + void TJsonWriter::OnEntity() { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->WriteNull(); + LeaveNode(); + } + } + + void TJsonWriter::OnBeginList() { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->OpenArray(); + } + } + + void TJsonWriter::OnListItem() { + } + + void TJsonWriter::OnEndList() { + if (IsWriteAllowed()) { + JsonWriter->CloseArray(); + LeaveNode(); + } + } + + void TJsonWriter::OnBeginMap() { + if (IsWriteAllowed()) { + EnterNode(); + JsonWriter->OpenMap(); + } + } + + void TJsonWriter::OnKeyedItem(TStringBuf name) { + if (IsWriteAllowed()) { + if (IsSpecialJsonKey(name)) { + WriteStringScalar(TString("$") + name); + } else { + WriteStringScalar(name); + } + } + } + + void TJsonWriter::OnEndMap() { + if (IsWriteAllowed()) { + JsonWriter->CloseMap(); + LeaveNode(); + } + } + + void TJsonWriter::OnBeginAttributes() { + InAttributesBalance += 1; + if (AttributesMode != JAM_NEVER) { + JsonWriter->OpenMap(); + JsonWriter->Write("$attributes"); + JsonWriter->OpenMap(); + } + } + + void TJsonWriter::OnEndAttributes() { + InAttributesBalance -= 1; + if (AttributesMode != JAM_NEVER) { + HasAttributes = true; + JsonWriter->CloseMap(); + } + } + + void TJsonWriter::WriteStringScalar(const TStringBuf& value) { + JsonWriter->Write(value); + } + + void TJsonWriter::Flush() { + JsonWriter->Flush(); + } + + //////////////////////////////////////////////////////////////////////////////// + +} diff --git a/library/cpp/yson/json/json_writer.h b/library/cpp/yson/json/json_writer.h new file mode 100644 index 00000000000..d84ac0de530 --- /dev/null +++ b/library/cpp/yson/json/json_writer.h @@ -0,0 +1,89 @@ +#pragma once + +#include <library/cpp/yson/public.h> +#include <library/cpp/yson/consumer.h> + +#include <library/cpp/json/json_writer.h> + +#include <util/generic/vector.h> + +namespace NYT { + //////////////////////////////////////////////////////////////////////////////// + + enum EJsonFormat { + JF_TEXT, + JF_PRETTY + }; + + enum EJsonAttributesMode { + JAM_NEVER, + JAM_ON_DEMAND, + JAM_ALWAYS + }; + + enum ESerializedBoolFormat { + SBF_BOOLEAN, + SBF_STRING + }; + + class TJsonWriter + : public ::NYson::TYsonConsumerBase { + public: + TJsonWriter( + IOutputStream* output, + ::NYson::EYsonType type = ::NYson::EYsonType::Node, + EJsonFormat format = JF_TEXT, + EJsonAttributesMode attributesMode = JAM_ON_DEMAND, + ESerializedBoolFormat booleanFormat = SBF_STRING); + + TJsonWriter( + IOutputStream* output, + NJson::TJsonWriterConfig config, + ::NYson::EYsonType type = ::NYson::EYsonType::Node, + EJsonAttributesMode attributesMode = JAM_ON_DEMAND, + ESerializedBoolFormat booleanFormat = SBF_STRING); + + void Flush(); + + void OnStringScalar(TStringBuf value) override; + void OnInt64Scalar(i64 value) override; + void OnUint64Scalar(ui64 value) override; + void OnDoubleScalar(double value) override; + void OnBooleanScalar(bool value) override; + + void OnEntity() override; + + void OnBeginList() override; + void OnListItem() override; + void OnEndList() override; + + void OnBeginMap() override; + void OnKeyedItem(TStringBuf key) override; + void OnEndMap() override; + + void OnBeginAttributes() override; + void OnEndAttributes() override; + + private: + THolder<NJson::TJsonWriter> UnderlyingJsonWriter; + NJson::TJsonWriter* JsonWriter; + IOutputStream* Output; + ::NYson::EYsonType Type; + EJsonAttributesMode AttributesMode; + ESerializedBoolFormat BooleanFormat; + + void WriteStringScalar(const TStringBuf& value); + + void EnterNode(); + void LeaveNode(); + bool IsWriteAllowed(); + + TVector<bool> HasUnfoldedStructureStack; + int InAttributesBalance; + bool HasAttributes; + int Depth; + }; + + //////////////////////////////////////////////////////////////////////////////// + +} diff --git a/library/cpp/yson/json/ya.make b/library/cpp/yson/json/ya.make new file mode 100644 index 00000000000..625a6b231e7 --- /dev/null +++ b/library/cpp/yson/json/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +OWNER( + ermolovd + g:yt +) + +SRCS( + json_writer.cpp + yson2json_adapter.cpp +) + +PEERDIR( + library/cpp/json +) + +END() diff --git a/library/cpp/yson/json/yson2json_adapter.cpp b/library/cpp/yson/json/yson2json_adapter.cpp new file mode 100644 index 00000000000..b5e7c49d4d8 --- /dev/null +++ b/library/cpp/yson/json/yson2json_adapter.cpp @@ -0,0 +1,82 @@ +#include "yson2json_adapter.h" + +namespace NYT { + TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException) + : NJson::TJsonCallbacks(throwException) + , Impl_(impl) + { + } + + bool TYson2JsonCallbacksAdapter::OnNull() { + WrapIfListItem(); + Impl_->OnEntity(); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnBoolean(bool val) { + WrapIfListItem(); + Impl_->OnBooleanScalar(val); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnInteger(long long val) { + WrapIfListItem(); + Impl_->OnInt64Scalar(val); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnUInteger(unsigned long long val) { + WrapIfListItem(); + Impl_->OnUint64Scalar(val); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnString(const TStringBuf& val) { + WrapIfListItem(); + Impl_->OnStringScalar(val); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnDouble(double val) { + WrapIfListItem(); + Impl_->OnDoubleScalar(val); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnOpenArray() { + WrapIfListItem(); + State_.ContextStack.push(true); + Impl_->OnBeginList(); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnCloseArray() { + State_.ContextStack.pop(); + Impl_->OnEndList(); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnOpenMap() { + WrapIfListItem(); + State_.ContextStack.push(false); + Impl_->OnBeginMap(); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnCloseMap() { + State_.ContextStack.pop(); + Impl_->OnEndMap(); + return true; + } + + bool TYson2JsonCallbacksAdapter::OnMapKey(const TStringBuf& val) { + Impl_->OnKeyedItem(val); + return true; + } + + void TYson2JsonCallbacksAdapter::WrapIfListItem() { + if (!State_.ContextStack.empty() && State_.ContextStack.top()) { + Impl_->OnListItem(); + } + } +} diff --git a/library/cpp/yson/json/yson2json_adapter.h b/library/cpp/yson/json/yson2json_adapter.h new file mode 100644 index 00000000000..da1bf5ba709 --- /dev/null +++ b/library/cpp/yson/json/yson2json_adapter.h @@ -0,0 +1,53 @@ +#pragma once + +#include <library/cpp/yson/consumer.h> + +#include <library/cpp/json/json_reader.h> + +#include <util/generic/stack.h> + +namespace NYT { + class TYson2JsonCallbacksAdapter + : public NJson::TJsonCallbacks { + public: + class TState { + private: + // Stores current context stack + // If true - we are in a list + // If false - we are in a map + TStack<bool> ContextStack; + + friend class TYson2JsonCallbacksAdapter; + }; + + public: + TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException = false); + + bool OnNull() override; + bool OnBoolean(bool val) override; + bool OnInteger(long long val) override; + bool OnUInteger(unsigned long long val) override; + bool OnString(const TStringBuf& val) override; + bool OnDouble(double val) override; + bool OnOpenArray() override; + bool OnCloseArray() override; + bool OnOpenMap() override; + bool OnCloseMap() override; + bool OnMapKey(const TStringBuf& val) override; + + TState State() const { + return State_; + } + + void Reset(const TState& state) { + State_ = state; + } + + private: + void WrapIfListItem(); + + private: + ::NYson::TYsonConsumerBase* Impl_; + TState State_; + }; +} diff --git a/library/cpp/yson/lexer.cpp b/library/cpp/yson/lexer.cpp new file mode 100644 index 00000000000..5eae94273bf --- /dev/null +++ b/library/cpp/yson/lexer.cpp @@ -0,0 +1,43 @@ +#include "lexer.h" +#include "lexer_detail.h" +#include "token.h" + +#include <util/generic/ptr.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + class TStatelessLexer::TImpl { + private: + THolder<TStatelessYsonLexerImplBase> Impl; + + public: + TImpl(bool enableLinePositionInfo = false) + : Impl(enableLinePositionInfo + ? static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<true>()) + : static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<false>())) + { + } + + size_t GetToken(const TStringBuf& data, TToken* token) { + return Impl->GetToken(data, token); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + TStatelessLexer::TStatelessLexer() + : Impl(new TImpl()) + { + } + + TStatelessLexer::~TStatelessLexer() { + } + + size_t TStatelessLexer::GetToken(const TStringBuf& data, TToken* token) { + return Impl->GetToken(data, token); + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/lexer.h b/library/cpp/yson/lexer.h new file mode 100644 index 00000000000..d9d701874d4 --- /dev/null +++ b/library/cpp/yson/lexer.h @@ -0,0 +1,26 @@ +#pragma once + +#include "public.h" +#include "token.h" + +#include <util/generic/ptr.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + class TStatelessLexer { + public: + TStatelessLexer(); + + ~TStatelessLexer(); + + size_t GetToken(const TStringBuf& data, TToken* token); + + private: + class TImpl; + THolder<TImpl> Impl; + }; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/lexer_detail.h b/library/cpp/yson/lexer_detail.h new file mode 100644 index 00000000000..0bba30acdd2 --- /dev/null +++ b/library/cpp/yson/lexer_detail.h @@ -0,0 +1,296 @@ +#pragma once + +#include "detail.h" +#include "token.h" + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + namespace NDetail { + /*! \internal */ + //////////////////////////////////////////////////////////////////////////////// + + // EReadStartCase tree representation: + // Root = xb + // BinaryStringOrOtherSpecialToken = x0b + // BinaryString = 00b + // OtherSpecialToken = 10b + // Other = x1b + // BinaryScalar = xx01b + // BinaryInt64 = 0001b + // BinaryDouble = 0101b + // BinaryFalse = 1001b + // BinaryTrue = 1101b + // Other = xxx11b + // Quote = 00011b + // DigitOrMinus = 00111b + // String = 01011b + // Space = 01111b + // Plus = 10011b + // None = 10111b + // Percent = 11011b + + enum EReadStartCase : unsigned { + BinaryString = 0, // = 00b + OtherSpecialToken = 2, // = 10b + + BinaryInt64 = 1, // = 001b + BinaryDouble = 5, // = 101b + BinaryFalse = 9, // = 1001b + BinaryTrue = 13, // = 1101b + BinaryUint64 = 17, // = 10001b + + Quote = 3, // = 00011b + DigitOrMinus = 7, // = 00111b + String = 11, // = 01011b + Space = 15, // = 01111b + Plus = 19, // = 10011b + None = 23, // = 10111b + Percent = 27 // = 11011b + }; + + template <class TBlockStream, bool EnableLinePositionInfo> + class TLexer + : public TLexerBase<TBlockStream, EnableLinePositionInfo> { + private: + using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; + + static EReadStartCase GetStartState(char ch) { +#define NN EReadStartCase::None +#define BS EReadStartCase::BinaryString +#define BI EReadStartCase::BinaryInt64 +#define BD EReadStartCase::BinaryDouble +#define BF EReadStartCase::BinaryFalse +#define BT EReadStartCase::BinaryTrue +#define BU EReadStartCase::BinaryUint64 +#define SP NN // EReadStartCase::Space +#define DM EReadStartCase::DigitOrMinus +#define ST EReadStartCase::String +#define PL EReadStartCase::Plus +#define QU EReadStartCase::Quote +#define PC EReadStartCase::Percent +#define TT(name) (EReadStartCase(static_cast<ui8>(ETokenType::name) << 2) | EReadStartCase::OtherSpecialToken) + + static const ui8 lookupTable[] = + { + NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + // 32 + SP, // ' ' + NN, // '!' + QU, // '"' + TT(Hash), // '#' + NN, // '$' + PC, // '%' + NN, // '&' + NN, // "'" + TT(LeftParenthesis), // '(' + TT(RightParenthesis), // ')' + NN, // '*' + PL, // '+' + TT(Comma), // ',' + DM, // '-' + NN, // '.' + NN, // '/' + + // 48 + DM, DM, DM, DM, DM, DM, DM, DM, DM, DM, // '0' - '9' + TT(Colon), // ':' + TT(Semicolon), // ';' + TT(LeftAngle), // '<' + TT(Equals), // '=' + TT(RightAngle), // '>' + NN, // '?' + + // 64 + NN, // '@' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' + TT(LeftBracket), // '[' + NN, // '\' + TT(RightBracket), // ']' + NN, // '^' + ST, // '_' + + // 96 + NN, // '`' + + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' + TT(LeftBrace), // '{' + NN, // '|' + TT(RightBrace), // '}' + NN, // '~' + NN, // '^?' non-printable + // 128 + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; + +#undef NN +#undef BS +#undef BI +#undef BD +#undef SP +#undef DM +#undef ST +#undef PL +#undef QU +#undef TT + return static_cast<EReadStartCase>(lookupTable[static_cast<ui8>(ch)]); + } + + public: + TLexer(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) + : TBase(blockStream, memoryLimit) + { + } + + void GetToken(TToken* token) { + char ch1 = TBase::SkipSpaceAndGetChar(); + auto state = GetStartState(ch1); + auto stateBits = static_cast<unsigned>(state); + + if (ch1 == '\0') { + *token = TToken::EndOfStream; + return; + } + + if (stateBits & 1) { // Other = x1b + if (stateBits & 1 << 1) { // Other = xxx11b + if (state == EReadStartCase::Quote) { + TStringBuf value; + TBase::Advance(1); + TBase::ReadQuotedString(&value); + *token = TToken(value); + } else if (state == EReadStartCase::DigitOrMinus) { + ReadNumeric<true>(token); + } else if (state == EReadStartCase::Plus) { + TBase::Advance(1); + + char ch2 = TBase::template GetChar<true>(); + + if (!isdigit(ch2)) { + *token = TToken(ETokenType::Plus); + } else { + ReadNumeric<true>(token); + } + } else if (state == EReadStartCase::String) { + TStringBuf value; + TBase::template ReadUnquotedString<true>(&value); + *token = TToken(value); + } else if (state == EReadStartCase::Percent) { + TBase::Advance(1); + char ch3 = TBase::template GetChar<true>(); + if (ch3 == 't' || ch3 == 'f') { + *token = TToken(TBase::template ReadBoolean<true>()); + } else { + *token = TToken(TBase::template ReadNanOrInf<true>()); + } + } else { // None + Y_ASSERT(state == EReadStartCase::None); + ythrow TYsonException() << "Unexpected " << ch1; + } + } else { // BinaryScalar = x01b + TBase::Advance(1); + if (state == EReadStartCase::BinaryDouble) { + double value; + TBase::ReadBinaryDouble(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryInt64) { + i64 value; + TBase::ReadBinaryInt64(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryUint64) { + ui64 value; + TBase::ReadBinaryUint64(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryFalse) { + *token = TToken(false); + } else if (state == EReadStartCase::BinaryTrue) { + *token = TToken(true); + } else { + Y_FAIL("unreachable"); + } + } + } else { // BinaryStringOrOtherSpecialToken = x0b + TBase::Advance(1); + if (stateBits & 1 << 1) { // OtherSpecialToken = 10b + Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::OtherSpecialToken)); + *token = TToken(ETokenType(stateBits >> 2)); + } else { // BinaryString = 00b + Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::BinaryString)); + TStringBuf value; + TBase::ReadBinaryString(&value); + *token = TToken(value); + } + } + } + + template <bool AllowFinish> + void ReadNumeric(TToken* token) { + TStringBuf valueBuffer; + ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); + + if (numericResult == ENumericResult::Double) { + try { + *token = TToken(FromString<double>(valueBuffer)); + } catch (yexception&) { + ythrow TYsonException() << "Error parsing double literal " << valueBuffer; + } + } else if (numericResult == ENumericResult::Int64) { + try { + *token = TToken(FromString<i64>(valueBuffer)); + } catch (yexception&) { + ythrow TYsonException() << "Error parsing int64 literal " << valueBuffer; + } + } else if (numericResult == ENumericResult::Uint64) { + try { + *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1))); + } catch (yexception&) { + ythrow TYsonException() << "Error parsing uint64 literal " << valueBuffer; + } + } + } + }; + //////////////////////////////////////////////////////////////////////////////// + /*! \endinternal */ + } + + class TStatelessYsonLexerImplBase { + public: + virtual size_t GetToken(const TStringBuf& data, TToken* token) = 0; + + virtual ~TStatelessYsonLexerImplBase() { + } + }; + + template <bool EnableLinePositionInfo> + class TStatelesYsonLexerImpl: public TStatelessYsonLexerImplBase { + private: + using TLexer = NDetail::TLexer<TStringReader, EnableLinePositionInfo>; + TLexer Lexer; + + public: + TStatelesYsonLexerImpl() + : Lexer(TStringReader(), Nothing()) + { + } + + size_t GetToken(const TStringBuf& data, TToken* token) override { + Lexer.SetBuffer(data.begin(), data.end()); + Lexer.GetToken(token); + return Lexer.Begin() - data.begin(); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/node/node.cpp b/library/cpp/yson/node/node.cpp new file mode 100644 index 00000000000..b39e0707187 --- /dev/null +++ b/library/cpp/yson/node/node.cpp @@ -0,0 +1,915 @@ +#include "node.h" + +#include "node_io.h" + +#include <library/cpp/yson/writer.h> + +#include <util/generic/overloaded.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +bool TNode::TNull::operator==(const TNull&) const { + return true; +} + +//////////////////////////////////////////////////////////////////////////////// + +bool TNode::TUndefined::operator==(const TUndefined&) const { + return true; +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace NNodeCmp { + +bool IsComparableType(const TNode::EType type) { + switch (type) { + case TNode::String: + case TNode::Int64: + case TNode::Uint64: + case TNode::Double: + case TNode::Bool: + case TNode::Null: + case TNode::Undefined: + return true; + default: + return false; + } +} + +bool operator<(const TNode& lhs, const TNode& rhs) +{ + if (!lhs.GetAttributes().Empty() || !rhs.GetAttributes().Empty()) { + ythrow TNode::TTypeError() << "Unsupported attributes comparison"; + } + + if (!IsComparableType(lhs.GetType()) || !IsComparableType(rhs.GetType())) { + ythrow TNode::TTypeError() << "Unsupported types for comparison: " << lhs.GetType() << " with " << rhs.GetType(); + } + + if (lhs.GetType() != rhs.GetType()) { + return lhs.GetType() < rhs.GetType(); + } + + switch (lhs.GetType()) { + case TNode::String: + return lhs.AsString() < rhs.AsString(); + case TNode::Int64: + return lhs.AsInt64() < rhs.AsInt64(); + case TNode::Uint64: + return lhs.AsUint64() < rhs.AsUint64(); + case TNode::Double: + return lhs.AsDouble() < rhs.AsDouble(); + case TNode::Bool: + return lhs.AsBool() < rhs.AsBool(); + case TNode::Null: + case TNode::Undefined: + return false; + default: + Y_FAIL("Unexpected type: %d", lhs.GetType()); + } +} + +bool operator>(const TNode& lhs, const TNode& rhs) +{ + return rhs < lhs; +} + +bool operator<=(const TNode& lhs, const TNode& rhs) +{ + return !(lhs > rhs); +} + +bool operator>=(const TNode& lhs, const TNode& rhs) +{ + return !(lhs < rhs); +} + +} // namespace NNodeCmp + +//////////////////////////////////////////////////////////////////////////////// + +TNode::TNode() + : Value_(TUndefined{}) +{ } + +TNode::TNode(const char* s) + : Value_(TString(s)) +{ } + +TNode::TNode(TStringBuf s) + : Value_(TString(s)) +{ } + +TNode::TNode(std::string_view s) + : Value_(TString(s)) +{ } + +TNode::TNode(const std::string& s) + : Value_(TString(s)) +{ } + +TNode::TNode(TString s) + : Value_(std::move(s)) +{ } + +TNode::TNode(int i) + : Value_(static_cast<i64>(i)) +{ } + + +TNode::TNode(unsigned int ui) + : Value_(static_cast<ui64>(ui)) +{ } + +TNode::TNode(long i) + : Value_(static_cast<i64>(i)) +{ } + +TNode::TNode(unsigned long ui) + : Value_(static_cast<ui64>(ui)) +{ } + +TNode::TNode(long long i) + : Value_(static_cast<i64>(i)) +{ } + +TNode::TNode(unsigned long long ui) + : Value_(static_cast<ui64>(ui)) +{ } + +TNode::TNode(double d) + : Value_(d) +{ } + +TNode::TNode(bool b) + : Value_(b) +{ } + +TNode::TNode(TMapType map) + : Value_(std::move(map)) +{ } + +TNode::TNode(const TNode& rhs) + : TNode() +{ + if (rhs.Attributes_) { + CreateAttributes(); + *Attributes_ = *rhs.Attributes_; + } + Value_ = rhs.Value_; +} + +TNode& TNode::operator=(const TNode& rhs) +{ + if (this != &rhs) { + TNode tmp = rhs; + Move(std::move(tmp)); + } + return *this; +} + +TNode::TNode(TNode&& rhs) noexcept + : TNode() +{ + Move(std::move(rhs)); +} + +TNode& TNode::operator=(TNode&& rhs) noexcept +{ + if (this != &rhs) { + TNode tmp = std::move(rhs); + Move(std::move(tmp)); + } + return *this; +} + +TNode::~TNode() = default; + +void TNode::Clear() +{ + ClearAttributes(); + Value_ = TUndefined(); +} + +bool TNode::IsString() const +{ + return std::holds_alternative<TString>(Value_); +} + +bool TNode::IsInt64() const +{ + return std::holds_alternative<i64>(Value_); +} + +bool TNode::IsUint64() const +{ + return std::holds_alternative<ui64>(Value_); +} + +bool TNode::IsDouble() const +{ + return std::holds_alternative<double>(Value_); +} + +bool TNode::IsBool() const +{ + return std::holds_alternative<bool>(Value_); +} + +bool TNode::IsList() const +{ + return std::holds_alternative<TListType>(Value_); +} + +bool TNode::IsMap() const +{ + return std::holds_alternative<TMapType>(Value_); +} + +bool TNode::IsEntity() const +{ + return IsNull(); +} + +bool TNode::IsNull() const +{ + return std::holds_alternative<TNull>(Value_); +} + +bool TNode::IsUndefined() const +{ + return std::holds_alternative<TUndefined>(Value_); +} + +bool TNode::HasValue() const +{ + return !IsNull() && !IsUndefined(); +} + +bool TNode::Empty() const +{ + switch (GetType()) { + case String: + return std::get<TString>(Value_).empty(); + case List: + return std::get<TListType>(Value_).empty(); + case Map: + return std::get<TMapType>(Value_).empty(); + default: + ythrow TTypeError() << "Empty() called for type " << GetType(); + } +} + +size_t TNode::Size() const +{ + switch (GetType()) { + case String: + return std::get<TString>(Value_).size(); + case List: + return std::get<TListType>(Value_).size(); + case Map: + return std::get<TMapType>(Value_).size(); + default: + ythrow TTypeError() << "Size() called for type " << GetType(); + } +} + +TNode::EType TNode::GetType() const +{ + return std::visit(TOverloaded{ + [](const TUndefined&) { return Undefined; }, + [](const TString&) { return String; }, + [](i64) { return Int64; }, + [](ui64) { return Uint64; }, + [](double) { return Double; }, + [](bool) { return Bool; }, + [](const TListType&) { return List; }, + [](const TMapType&) { return Map; }, + [](const TNull&) { return Null; } + }, Value_); +} + +const TString& TNode::AsString() const +{ + CheckType(String); + return std::get<TString>(Value_); +} + +i64 TNode::AsInt64() const +{ + CheckType(Int64); + return std::get<i64>(Value_); +} + +ui64 TNode::AsUint64() const +{ + CheckType(Uint64); + return std::get<ui64>(Value_); +} + +double TNode::AsDouble() const +{ + CheckType(Double); + return std::get<double>(Value_); +} + +bool TNode::AsBool() const +{ + CheckType(Bool); + return std::get<bool>(Value_); +} + +const TNode::TListType& TNode::AsList() const +{ + CheckType(List); + return std::get<TListType>(Value_); +} + +const TNode::TMapType& TNode::AsMap() const +{ + CheckType(Map); + return std::get<TMapType>(Value_); +} + +TNode::TListType& TNode::AsList() +{ + CheckType(List); + return std::get<TListType>(Value_); +} + +TNode::TMapType& TNode::AsMap() +{ + CheckType(Map); + return std::get<TMapType>(Value_); +} + +const TString& TNode::UncheckedAsString() const noexcept +{ + return std::get<TString>(Value_); +} + +i64 TNode::UncheckedAsInt64() const noexcept +{ + return std::get<i64>(Value_); +} + +ui64 TNode::UncheckedAsUint64() const noexcept +{ + return std::get<ui64>(Value_); +} + +double TNode::UncheckedAsDouble() const noexcept +{ + return std::get<double>(Value_); +} + +bool TNode::UncheckedAsBool() const noexcept +{ + return std::get<bool>(Value_); +} + +const TNode::TListType& TNode::UncheckedAsList() const noexcept +{ + return std::get<TListType>(Value_); +} + +const TNode::TMapType& TNode::UncheckedAsMap() const noexcept +{ + return std::get<TMapType>(Value_); +} + +TNode::TListType& TNode::UncheckedAsList() noexcept +{ + return std::get<TListType>(Value_); +} + +TNode::TMapType& TNode::UncheckedAsMap() noexcept +{ + return std::get<TMapType>(Value_); +} + +TNode TNode::CreateList() +{ + TNode node; + node.Value_ = TListType{}; + return node; +} + +TNode TNode::CreateList(TListType list) +{ + TNode node; + node.Value_ = std::move(list); + return node; +} + +TNode TNode::CreateMap() +{ + TNode node; + node.Value_ = TMapType{}; + return node; +} + +TNode TNode::CreateMap(TMapType map) +{ + TNode node; + node.Value_ = std::move(map); + return node; +} + +TNode TNode::CreateEntity() +{ + TNode node; + node.Value_ = TNull{}; + return node; +} + +const TNode& TNode::operator[](size_t index) const +{ + CheckType(List); + return std::get<TListType>(Value_)[index]; +} + +TNode& TNode::operator[](size_t index) +{ + CheckType(List); + return std::get<TListType>(Value_)[index]; +} + +const TNode& TNode::At(size_t index) const { + CheckType(List); + const auto& list = std::get<TListType>(Value_); + if (index >= list.size()) { + ythrow TLookupError() << "List out-of-range: requested index=" << index << ", but size=" << list.size(); + } + return list[index]; +} + +TNode& TNode::At(size_t index) { + CheckType(List); + auto& list = std::get<TListType>(Value_); + if (index >= list.size()) { + ythrow TLookupError() << "List out-of-range: requested index=" << index << ", but size=" << list.size(); + } + return list[index]; +} + +TNode& TNode::Add() & +{ + AssureList(); + return std::get<TListType>(Value_).emplace_back(); +} + +TNode TNode::Add() && +{ + return std::move(Add()); +} + +TNode& TNode::Add(const TNode& node) & +{ + AssureList(); + std::get<TListType>(Value_).emplace_back(node); + return *this; +} + +TNode TNode::Add(const TNode& node) && +{ + return std::move(Add(node)); +} + +TNode& TNode::Add(TNode&& node) & +{ + AssureList(); + std::get<TListType>(Value_).emplace_back(std::move(node)); + return *this; +} + +TNode TNode::Add(TNode&& node) && +{ + return std::move(Add(std::move(node))); +} + +bool TNode::HasKey(const TStringBuf key) const +{ + CheckType(Map); + return std::get<TMapType>(Value_).contains(key); +} + +TNode& TNode::operator()(const TString& key, const TNode& value) & +{ + AssureMap(); + std::get<TMapType>(Value_)[key] = value; + return *this; +} + +TNode TNode::operator()(const TString& key, const TNode& value) && +{ + return std::move(operator()(key, value)); +} + +TNode& TNode::operator()(const TString& key, TNode&& value) & +{ + AssureMap(); + std::get<TMapType>(Value_)[key] = std::move(value); + return *this; +} + +TNode TNode::operator()(const TString& key, TNode&& value) && +{ + return std::move(operator()(key, std::move(value))); +} + +const TNode& TNode::operator[](const TStringBuf key) const +{ + CheckType(Map); + static TNode notFound; + const auto& map = std::get<TMapType>(Value_); + TMapType::const_iterator i = map.find(key); + if (i == map.end()) { + return notFound; + } else { + return i->second; + } +} + +TNode& TNode::operator[](const TStringBuf key) +{ + AssureMap(); + return std::get<TMapType>(Value_)[key]; +} + +const TNode& TNode::At(const TStringBuf key) const { + CheckType(Map); + const auto& map = std::get<TMapType>(Value_); + TMapType::const_iterator i = map.find(key); + if (i == map.end()) { + ythrow TLookupError() << "Cannot find key " << key; + } else { + return i->second; + } +} + +TNode& TNode::At(const TStringBuf key) { + CheckType(Map); + auto& map = std::get<TMapType>(Value_); + TMapType::iterator i = map.find(key); + if (i == map.end()) { + ythrow TLookupError() << "Cannot find key " << key; + } else { + return i->second; + } +} + +const TString& TNode::ChildAsString(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsString(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +i64 TNode::ChildAsInt64(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsInt64(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +ui64 TNode::ChildAsUint64(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsUint64(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +double TNode::ChildAsDouble(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsDouble(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +bool TNode::ChildAsBool(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsBool(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +const TNode::TListType& TNode::ChildAsList(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsList(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +const TNode::TMapType& TNode::ChildAsMap(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.AsMap(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +TNode::TListType& TNode::ChildAsList(const TStringBuf key) { + auto& node = At(key); + try { + return node.AsList(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +TNode::TMapType& TNode::ChildAsMap(const TStringBuf key) { + auto& node = At(key); + try { + return node.AsMap(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +const TString& TNode::ChildAsString(size_t index) const { + const auto& node = At(index); + try { + return node.AsString(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +i64 TNode::ChildAsInt64(size_t index) const { + const auto& node = At(index); + try { + return node.AsInt64(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +ui64 TNode::ChildAsUint64(size_t index) const { + const auto& node = At(index); + try { + return node.AsUint64(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +double TNode::ChildAsDouble(size_t index) const { + const auto& node = At(index); + try { + return node.AsDouble(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +bool TNode::ChildAsBool(size_t index) const { + const auto& node = At(index); + try { + return node.AsBool(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +const TNode::TListType& TNode::ChildAsList(size_t index) const { + const auto& node = At(index); + try { + return node.AsList(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +const TNode::TMapType& TNode::ChildAsMap(size_t index) const { + const auto& node = At(index); + try { + return node.AsMap(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +TNode::TListType& TNode::ChildAsList(size_t index) { + auto& node = At(index); + try { + return node.AsList(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +TNode::TMapType& TNode::ChildAsMap(size_t index) { + auto& node = At(index); + try { + return node.AsMap(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +bool TNode::HasAttributes() const +{ + return Attributes_ && !Attributes_->Empty(); +} + +void TNode::ClearAttributes() +{ + if (Attributes_) { + Attributes_.Destroy(); + } +} + +const TNode& TNode::GetAttributes() const +{ + static TNode notFound = TNode::CreateMap(); + if (!Attributes_) { + return notFound; + } + return *Attributes_; +} + +TNode& TNode::Attributes() +{ + if (!Attributes_) { + CreateAttributes(); + } + return *Attributes_; +} + +void TNode::MoveWithoutAttributes(TNode&& rhs) +{ + Value_ = std::move(rhs.Value_); + rhs.Clear(); +} + +void TNode::Move(TNode&& rhs) +{ + Value_ = std::move(rhs.Value_); + Attributes_ = std::move(rhs.Attributes_); +} + +void TNode::CheckType(EType type) const +{ + Y_ENSURE_EX(GetType() == type, + TTypeError() << "TNode type " << type << " expected, actual type " << GetType(); + ); +} + +void TNode::AssureMap() +{ + if (std::holds_alternative<TUndefined>(Value_)) { + Value_ = TMapType(); + } else { + CheckType(Map); + } +} + +void TNode::AssureList() +{ + if (std::holds_alternative<TUndefined>(Value_)) { + Value_ = TListType(); + } else { + CheckType(List); + } +} + +void TNode::CreateAttributes() +{ + Attributes_ = MakeHolder<TNode>(); + Attributes_->Value_ = TMapType(); +} + +void TNode::Save(IOutputStream* out) const +{ + NodeToYsonStream(*this, out, NYson::EYsonFormat::Binary); +} + +void TNode::Load(IInputStream* in) +{ + Clear(); + *this = NodeFromYsonStream(in, ::NYson::EYsonType::Node); +} + +//////////////////////////////////////////////////////////////////////////////// + +bool operator==(const TNode& lhs, const TNode& rhs) +{ + if (std::holds_alternative<TNode::TUndefined>(lhs.Value_) || + std::holds_alternative<TNode::TUndefined>(rhs.Value_)) + { + // TODO: should try to remove this behaviour if nobody uses it. + return false; + } + + if (lhs.GetType() != rhs.GetType()) { + return false; + } + + if (lhs.Attributes_) { + if (rhs.Attributes_) { + if (*lhs.Attributes_ != *rhs.Attributes_) { + return false; + } + } else { + return false; + } + } else { + if (rhs.Attributes_) { + return false; + } + } + + return rhs.Value_ == lhs.Value_; +} + +bool operator!=(const TNode& lhs, const TNode& rhs) +{ + return !(lhs == rhs); +} + +bool GetBool(const TNode& node) +{ + if (node.IsBool()) { + return node.AsBool(); + } else if (node.IsString()) { + return node.AsString() == "true"; + } else { + ythrow TNode::TTypeError() + << "GetBool(): not a boolean or string type"; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node.h b/library/cpp/yson/node/node.h new file mode 100644 index 00000000000..5f90f95df07 --- /dev/null +++ b/library/cpp/yson/node/node.h @@ -0,0 +1,523 @@ +#pragma once + +#include <util/generic/bt_exception.h> +#include <util/generic/cast.h> +#include <util/generic/hash.h> +#include <util/generic/variant.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> +#include <util/generic/ylimits.h> +#include <util/string/cast.h> + +#include <cmath> +#include <variant> + +class IInputStream; +class IOutputStream; + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +class TNode +{ +public: + class TLookupError + : public TWithBackTrace<yexception> + { }; + + class TTypeError + : public TWithBackTrace<yexception> + { }; + + enum EType { + Undefined = 0 /*"undefined"*/, + + // NOTE: string representation of all node types + // are compatible with server node type (except `Undefined' which is missing on server). + String = 1 /*"string_node"*/, + Int64 = 2 /*"int64_node"*/, + Uint64 = 3 /*"uint64_node"*/, + Double = 4 /*"double_node"*/, + Bool = 5 /*"boolean_node"*/, + List = 6 /*"list_node"*/, + Map = 7 /*"map_node"*/, + Null = 8 /*"null"*/, + }; + + using TListType = TVector<TNode>; + using TMapType = THashMap<TString, TNode>; + +private: + struct TNull { + bool operator==(const TNull&) const; + }; + + struct TUndefined { + bool operator==(const TUndefined&) const; + }; + + using TValue = std::variant< + bool, + i64, + ui64, + double, + TString, + TListType, + TMapType, + TNull, + TUndefined + >; + +public: + + TNode(); + TNode(const char* s); + TNode(TStringBuf s); + explicit TNode(std::string_view s); + explicit TNode(const std::string& s); + TNode(TString s); + TNode(int i); + + //this case made speccially for prevent mess cast of EType into TNode through TNode(int) constructor + //usual case of error SomeNode == TNode::Undefined <-- SomeNode indeed will be compared with TNode(0) without this method + //correct way is SomeNode.GetType() == TNode::Undefined + template<class T = EType> + Y_FORCE_INLINE TNode(EType) + { + static_assert(!std::is_same<T, EType>::value, "looks like a mistake, may be you forget .GetType()"); + } + + //this case made speccially for prevent mess cast of T* into TNode through implicit bool ctr + template<class T = int> + Y_FORCE_INLINE TNode(const T*) : TNode() { + static_assert(!std::is_same<T,T>::value, "looks like a mistake, and pointer have converted to bool"); + } + + TNode(unsigned int ui); + TNode(long i); + TNode(unsigned long ui); + TNode(long long i); + TNode(unsigned long long ui); + TNode(double d); + TNode(bool b); + TNode(TMapType map); + + TNode(const TNode& rhs); + TNode& operator=(const TNode& rhs); + + TNode(TNode&& rhs) noexcept; + TNode& operator=(TNode&& rhs) noexcept; + + ~TNode(); + + void Clear(); + + bool IsString() const; + bool IsInt64() const; + bool IsUint64() const; + bool IsDouble() const; + bool IsBool() const; + bool IsList() const; + bool IsMap() const; + + // `IsEntity' is deprecated use `IsNull' instead. + bool IsEntity() const; + bool IsNull() const; + bool IsUndefined() const; + // Returns true if TNode is neither Null, nor Undefined + bool HasValue() const; + + template<typename T> + bool IsOfType() const noexcept; + + // Int64, Uint64, Double, or Bool + bool IsArithmetic() const; + + bool Empty() const; + size_t Size() const; + + EType GetType() const; + + const TString& AsString() const; + i64 AsInt64() const; + ui64 AsUint64() const; + double AsDouble() const; + bool AsBool() const; + const TListType& AsList() const; + const TMapType& AsMap() const; + TListType& AsList(); + TMapType& AsMap(); + + const TString& UncheckedAsString() const noexcept; + i64 UncheckedAsInt64() const noexcept; + ui64 UncheckedAsUint64() const noexcept; + double UncheckedAsDouble() const noexcept; + bool UncheckedAsBool() const noexcept; + const TListType& UncheckedAsList() const noexcept; + const TMapType& UncheckedAsMap() const noexcept; + TListType& UncheckedAsList() noexcept; + TMapType& UncheckedAsMap() noexcept; + + // integer types cast + // makes overflow checks + template<typename T> + T IntCast() const; + + // integers <-> double <-> string + // makes overflow checks + template<typename T> + T ConvertTo() const; + + template<typename T> + T& As(); + + template<typename T> + const T& As() const; + + static TNode CreateList(); + static TNode CreateList(TListType list); + static TNode CreateMap(); + static TNode CreateMap(TMapType map); + static TNode CreateEntity(); + + const TNode& operator[](size_t index) const; + TNode& operator[](size_t index); + const TNode& At(size_t index) const; + TNode& At(size_t index); + + TNode& Add() &; + TNode Add() &&; + TNode& Add(const TNode& node) &; + TNode Add(const TNode& node) &&; + TNode& Add(TNode&& node) &; + TNode Add(TNode&& node) &&; + + bool HasKey(const TStringBuf key) const; + + TNode& operator()(const TString& key, const TNode& value) &; + TNode operator()(const TString& key, const TNode& value) &&; + TNode& operator()(const TString& key, TNode&& value) &; + TNode operator()(const TString& key, TNode&& value) &&; + + const TNode& operator[](const TStringBuf key) const; + TNode& operator[](const TStringBuf key); + const TNode& At(const TStringBuf key) const; + TNode& At(const TStringBuf key); + + // map getters + // works the same way like simple getters + const TString& ChildAsString(const TStringBuf key) const; + i64 ChildAsInt64(const TStringBuf key) const; + ui64 ChildAsUint64(const TStringBuf key) const; + double ChildAsDouble(const TStringBuf key) const; + bool ChildAsBool(const TStringBuf key) const; + const TListType& ChildAsList(const TStringBuf key) const; + const TMapType& ChildAsMap(const TStringBuf key) const; + TListType& ChildAsList(const TStringBuf key); + TMapType& ChildAsMap(const TStringBuf key); + + template<typename T> + T ChildIntCast(const TStringBuf key) const; + + template<typename T> + T ChildConvertTo(const TStringBuf key) const; + + template<typename T> + const T& ChildAs(const TStringBuf key) const; + + template<typename T> + T& ChildAs(const TStringBuf key); + + // list getters + // works the same way like simple getters + const TString& ChildAsString(size_t index) const; + i64 ChildAsInt64(size_t index) const; + ui64 ChildAsUint64(size_t index) const; + double ChildAsDouble(size_t index) const; + bool ChildAsBool(size_t index) const; + const TListType& ChildAsList(size_t index) const; + const TMapType& ChildAsMap(size_t index) const; + TListType& ChildAsList(size_t index); + TMapType& ChildAsMap(size_t index); + + template<typename T> + T ChildIntCast(size_t index) const; + + template<typename T> + T ChildConvertTo(size_t index) const; + + template<typename T> + const T& ChildAs(size_t index) const; + + template<typename T> + T& ChildAs(size_t index); + + + // attributes + bool HasAttributes() const; + void ClearAttributes(); + const TNode& GetAttributes() const; + TNode& Attributes(); + + void MoveWithoutAttributes(TNode&& rhs); + + // Serialize TNode using binary yson format. + // Methods for ysaveload. + void Save(IOutputStream* output) const; + void Load(IInputStream* input); + +private: + void Move(TNode&& rhs); + + void CheckType(EType type) const; + + void AssureMap(); + void AssureList(); + + void CreateAttributes(); + +private: + TValue Value_; + THolder<TNode> Attributes_; + + friend bool operator==(const TNode& lhs, const TNode& rhs); + friend bool operator!=(const TNode& lhs, const TNode& rhs); +}; + +bool operator==(const TNode& lhs, const TNode& rhs); +bool operator!=(const TNode& lhs, const TNode& rhs); + +bool GetBool(const TNode& node); + +inline bool TNode::IsArithmetic() const { + return IsInt64() || IsUint64() || IsDouble() || IsBool(); +} + +template<typename T> +inline T TNode::IntCast() const { + if constexpr (std::is_integral<T>::value) { + try { + switch (GetType()) { + case TNode::Uint64: + return SafeIntegerCast<T>(AsUint64()); + case TNode::Int64: + return SafeIntegerCast<T>(AsInt64()); + default: + ythrow TTypeError() << "IntCast() called for type " << GetType(); + } + } catch(TBadCastException& exc) { + ythrow TTypeError() << "TBadCastException during IntCast(): " << exc.what(); + } + } else { + static_assert(sizeof(T) != sizeof(T), "implemented only for std::is_integral types"); + } +} + +template<typename T> +inline T TNode::ConvertTo() const { + if constexpr (std::is_integral<T>::value) { + switch (GetType()) { + case NYT::TNode::String: + return ::FromString(AsString()); + case NYT::TNode::Int64: + case NYT::TNode::Uint64: + return IntCast<T>(); + case NYT::TNode::Double: + if (AsDouble() < Min<T>() || AsDouble() > MaxFloor<T>() || !std::isfinite(AsDouble())) { + ythrow TTypeError() << AsDouble() << " can't be converted to " << TypeName<T>(); + } + return AsDouble(); + case NYT::TNode::Bool: + return AsBool(); + case NYT::TNode::List: + case NYT::TNode::Map: + case NYT::TNode::Null: + case NYT::TNode::Undefined: + ythrow TTypeError() << "ConvertTo<" << TypeName<T>() << ">() called for type " << GetType(); + }; + } else { + static_assert(sizeof(T) != sizeof(T), "should have template specialization"); + } +} + +template<> +inline TString TNode::ConvertTo<TString>() const { + switch (GetType()) { + case NYT::TNode::String: + return AsString(); + case NYT::TNode::Int64: + return ::ToString(AsInt64()); + case NYT::TNode::Uint64: + return ::ToString(AsUint64()); + case NYT::TNode::Double: + return ::ToString(AsDouble()); + case NYT::TNode::Bool: + return ::ToString(AsBool()); + case NYT::TNode::List: + case NYT::TNode::Map: + case NYT::TNode::Null: + case NYT::TNode::Undefined: + ythrow TTypeError() << "ConvertTo<TString>() called for type " << GetType(); + } + Y_UNREACHABLE(); +} + +template<> +inline double TNode::ConvertTo<double>() const { + switch (GetType()) { + case NYT::TNode::String: + return ::FromString(AsString()); + case NYT::TNode::Int64: + return AsInt64(); + case NYT::TNode::Uint64: + return AsUint64(); + case NYT::TNode::Double: + return AsDouble(); + case NYT::TNode::Bool: + return AsBool(); + case NYT::TNode::List: + case NYT::TNode::Map: + case NYT::TNode::Null: + case NYT::TNode::Undefined: + ythrow TTypeError() << "ConvertTo<double>() called for type " << GetType(); + } +} + +template<> +inline bool TNode::ConvertTo<bool>() const { + switch (GetType()) { + case NYT::TNode::String: + return ::FromString(AsString()); + case NYT::TNode::Int64: + return AsInt64(); + case NYT::TNode::Uint64: + return AsUint64(); + case NYT::TNode::Double: + return AsDouble(); + case NYT::TNode::Bool: + return AsBool(); + case NYT::TNode::List: + case NYT::TNode::Map: + case NYT::TNode::Null: + case NYT::TNode::Undefined: + ythrow TTypeError() << "ConvertTo<bool>() called for type " << GetType(); + } +} + +template<typename T> +inline T TNode::ChildIntCast(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.IntCast<T>(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +template<typename T> +inline T TNode::ChildIntCast(size_t index) const { + const auto& node = At(index); + try { + return node.IntCast<T>(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +template<typename T> +inline T TNode::ChildConvertTo(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.ConvertTo<T>(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +template<typename T> +inline T TNode::ChildConvertTo(size_t index) const { + const auto& node = At(index); + try { + return node.ConvertTo<T>(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +template<typename T> +inline const T& TNode::ChildAs(const TStringBuf key) const { + const auto& node = At(key); + try { + return node.As<T>(); + } catch (TTypeError& e) { + e << ", during getting key=" << key; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key; + } +} + +template<typename T> +inline const T& TNode::ChildAs(size_t index) const { + const auto& node = At(index); + try { + return node.As<T>(); + } catch (TTypeError& e) { + e << ", during getting index=" << index; + throw e; + } catch (...) { + ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index; + } +} + +template<typename T> +inline T& TNode::ChildAs(const TStringBuf key) { + return const_cast<T&>(static_cast<const TNode*>(this)->ChildAs<T>(key)); +} + +template<typename T> +inline T& TNode::ChildAs(size_t index) { + return const_cast<T&>(static_cast<const TNode*>(this)->ChildAs<T>(index)); +} + +template<typename T> +inline bool TNode::IsOfType() const noexcept { + return std::holds_alternative<T>(Value_); +} + +template<typename T> +inline T& TNode::As() { + return std::get<T>(Value_); +} + +template<typename T> +inline const T& TNode::As() const { + return std::get<T>(Value_); +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace NNodeCmp { + bool operator<(const TNode& lhs, const TNode& rhs); + bool operator<=(const TNode& lhs, const TNode& rhs); + bool operator>(const TNode& lhs, const TNode& rhs); + bool operator>=(const TNode& lhs, const TNode& rhs); + bool IsComparableType(const TNode::EType type); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node_builder.cpp b/library/cpp/yson/node/node_builder.cpp new file mode 100644 index 00000000000..b4431bc77af --- /dev/null +++ b/library/cpp/yson/node/node_builder.cpp @@ -0,0 +1,96 @@ +#include "node_builder.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +TNodeBuilder::TNodeBuilder(TNode* node) +{ + Stack_.push(node); +} + +void TNodeBuilder::OnStringScalar(TStringBuf value) +{ + AddNode(value, true); +} + +void TNodeBuilder::OnInt64Scalar(i64 value) +{ + AddNode(value, true); +} + +void TNodeBuilder::OnUint64Scalar(ui64 value) +{ + AddNode(value, true); +} + +void TNodeBuilder::OnDoubleScalar(double value) +{ + AddNode(value, true); +} + +void TNodeBuilder::OnBooleanScalar(bool value) +{ + AddNode(value, true); +} + +void TNodeBuilder::OnEntity() +{ + AddNode(TNode::CreateEntity(), true); +} + +void TNodeBuilder::OnBeginList() +{ + AddNode(TNode::CreateList(), false); +} + +void TNodeBuilder::OnListItem() +{ + Stack_.push(&Stack_.top()->Add()); +} + +void TNodeBuilder::OnEndList() +{ + Stack_.pop(); +} + +void TNodeBuilder::OnBeginMap() +{ + AddNode(TNode::CreateMap(), false); +} + +void TNodeBuilder::OnKeyedItem(TStringBuf key) +{ + Stack_.push(&(*Stack_.top())[TString(key)]); +} + +void TNodeBuilder::OnEndMap() +{ + Stack_.pop(); +} + +void TNodeBuilder::OnBeginAttributes() +{ + Stack_.push(&Stack_.top()->Attributes()); +} + +void TNodeBuilder::OnEndAttributes() +{ + Stack_.pop(); +} + +void TNodeBuilder::OnNode(TNode node) +{ + AddNode(std::move(node), true); +} + +void TNodeBuilder::AddNode(TNode value, bool pop) +{ + Stack_.top()->MoveWithoutAttributes(std::move(value)); + if (pop) + Stack_.pop(); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node_builder.h b/library/cpp/yson/node/node_builder.h new file mode 100644 index 00000000000..69800016e09 --- /dev/null +++ b/library/cpp/yson/node/node_builder.h @@ -0,0 +1,46 @@ +#pragma once + +#include "node.h" + +#include <library/cpp/json/json_reader.h> + +#include <library/cpp/yson/consumer.h> + +#include <util/generic/stack.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +class TNodeBuilder + : public ::NYson::TYsonConsumerBase +{ +public: + TNodeBuilder(TNode* node); + + void OnStringScalar(TStringBuf) override; + void OnInt64Scalar(i64) override; + void OnUint64Scalar(ui64) override; + void OnDoubleScalar(double) override; + void OnBooleanScalar(bool) override; + void OnEntity() override; + void OnBeginList() override; + void OnListItem() override; + void OnEndList() override; + void OnBeginMap() override; + void OnKeyedItem(TStringBuf) override; + void OnEndMap() override; + void OnBeginAttributes() override; + void OnEndAttributes() override; + void OnNode(TNode node); + +private: + TStack<TNode*> Stack_; + +private: + inline void AddNode(TNode node, bool pop); +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp new file mode 100644 index 00000000000..294a7f7217f --- /dev/null +++ b/library/cpp/yson/node/node_io.cpp @@ -0,0 +1,154 @@ +#include "node_io.h" + +#include "node_builder.h" +#include "node_visitor.h" + +#include <library/cpp/yson/json/json_writer.h> +#include <library/cpp/yson/parser.h> +#include <library/cpp/yson/writer.h> +#include <library/cpp/yson/json/yson2json_adapter.h> + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_value.h> + +#include <util/stream/input.h> +#include <util/stream/output.h> +#include <util/stream/str.h> +#include <util/stream/mem.h> + +namespace NYT { + +static void WalkJsonTree(const NJson::TJsonValue& jsonValue, NJson::TJsonCallbacks* callbacks) +{ + using namespace NJson; + switch (jsonValue.GetType()) { + case JSON_NULL: + callbacks->OnNull(); + return; + case JSON_BOOLEAN: + callbacks->OnBoolean(jsonValue.GetBoolean()); + return; + case JSON_INTEGER: + callbacks->OnInteger(jsonValue.GetInteger()); + return; + case JSON_UINTEGER: + callbacks->OnUInteger(jsonValue.GetUInteger()); + return; + case JSON_DOUBLE: + callbacks->OnDouble(jsonValue.GetDouble()); + return; + case JSON_STRING: + callbacks->OnString(jsonValue.GetString()); + return; + case JSON_MAP: + { + callbacks->OnOpenMap(); + for (const auto& item : jsonValue.GetMap()) { + callbacks->OnMapKey(item.first); + WalkJsonTree(item.second, callbacks); + } + callbacks->OnCloseMap(); + } + return; + case JSON_ARRAY: + { + callbacks->OnOpenArray(); + for (const auto& item : jsonValue.GetArray()) { + WalkJsonTree(item, callbacks); + } + callbacks->OnCloseArray(); + } + return; + case JSON_UNDEFINED: + ythrow yexception() << "cannot consume undefined json value"; + return; + } + Y_UNREACHABLE(); +} + +static TNode CreateEmptyNodeByType(::NYson::EYsonType type) +{ + TNode result; + switch (type) { + case ::NYson::EYsonType::ListFragment: + result = TNode::CreateList(); + break; + case ::NYson::EYsonType::MapFragment: + result = TNode::CreateMap(); + break; + default: + break; + } + return result; +} + +TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type) +{ + TMemoryInput stream(input); + return NodeFromYsonStream(&stream, type); +} + +TString NodeToYsonString(const TNode& node, NYson::EYsonFormat format) +{ + TStringStream stream; + NodeToYsonStream(node, &stream, format); + return stream.Str(); +} + +TString NodeToCanonicalYsonString(const TNode& node, NYson::EYsonFormat format) +{ + TStringStream stream; + NodeToCanonicalYsonStream(node, &stream, format); + return stream.Str(); +} + +TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type) +{ + TNode result = CreateEmptyNodeByType(type); + + TNodeBuilder builder(&result); + ::NYson::TYsonParser parser(&builder, input, type); + parser.Parse(); + return result; +} + +void NodeToYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format) +{ + ::NYson::TYsonWriter writer(output, format); + TNodeVisitor visitor(&writer); + visitor.Visit(node); +} + +void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format) +{ + ::NYson::TYsonWriter writer(output, format); + TNodeVisitor visitor(&writer, /*sortMapKeys*/ true); + visitor.Visit(node); +} + +TNode NodeFromJsonString(const TStringBuf input) +{ + TMemoryInput stream(input); + + TNode result; + + TNodeBuilder builder(&result); + TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true); + NJson::TJsonReaderConfig config; + config.DontValidateUtf8 = true; + NJson::ReadJson(&stream, &config, &callbacks); + return result; +} + +TNode NodeFromJsonValue(const NJson::TJsonValue& input) +{ + TNode result; + TNodeBuilder builder(&result); + TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true); + WalkJsonTree(input, &callbacks); + return result; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h new file mode 100644 index 00000000000..2ad23b658f2 --- /dev/null +++ b/library/cpp/yson/node/node_io.h @@ -0,0 +1,40 @@ +#pragma once + +#include "node.h" +#include <library/cpp/yson/public.h> + +namespace NJson { + class TJsonValue; +} // namespace NJson + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +// Parse TNode from string in YSON format +TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type = ::NYson::EYsonType::Node); + +// Serialize TNode to string in one of YSON formats with random order of maps' keys (don't use in tests) +TString NodeToYsonString(const TNode& node, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text); + +// Same as the latter, but maps' keys are sorted lexicographically (to be used in tests) +TString NodeToCanonicalYsonString(const TNode& node, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text); + +// Parse TNode from stream in YSON format +TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node); + +// Serialize TNode to stream in one of YSON formats with random order of maps' keys (don't use in tests) +void NodeToYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text); + +// Same as the latter, but maps' keys are sorted lexicographically (to be used in tests) +void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text); + +// Parse TNode from string in JSON format +TNode NodeFromJsonString(const TStringBuf input); + +// Convert TJsonValue to TNode +TNode NodeFromJsonValue(const NJson::TJsonValue& input); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node_ut.cpp b/library/cpp/yson/node/node_ut.cpp new file mode 100644 index 00000000000..448e99f5753 --- /dev/null +++ b/library/cpp/yson/node/node_ut.cpp @@ -0,0 +1,484 @@ +#include "node.h" +#include "node_io.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/ysaveload.h> + +using namespace NYT; + +template<> +void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node) +{ + s << "TNode:" << NodeToYsonString(node); +} + +Y_UNIT_TEST_SUITE(YtNodeTest) { + Y_UNIT_TEST(TestConstsructors) { + TNode nodeEmpty; + UNIT_ASSERT_EQUAL(nodeEmpty.GetType(), TNode::Undefined); + + TNode nodeString("foobar"); + UNIT_ASSERT_EQUAL(nodeString.GetType(), TNode::String); + UNIT_ASSERT(nodeString.IsString()); + UNIT_ASSERT_VALUES_EQUAL(nodeString.AsString(), "foobar"); + + TNode nodeInt(int(54)); + UNIT_ASSERT_EQUAL(nodeInt.GetType(), TNode::Int64); + UNIT_ASSERT(nodeInt.IsInt64()); + UNIT_ASSERT(!nodeInt.IsUint64()); + UNIT_ASSERT_VALUES_EQUAL(nodeInt.AsInt64(), 54ull); + + TNode nodeUint(ui64(42)); + UNIT_ASSERT_EQUAL(nodeUint.GetType(), TNode::Uint64); + UNIT_ASSERT(nodeUint.IsUint64()); + UNIT_ASSERT(!nodeUint.IsInt64()); + UNIT_ASSERT_VALUES_EQUAL(nodeUint.AsUint64(), 42ull); + + TNode nodeDouble(double(2.3)); + UNIT_ASSERT_EQUAL(nodeDouble.GetType(), TNode::Double); + UNIT_ASSERT(nodeDouble.IsDouble()); + UNIT_ASSERT_VALUES_EQUAL(nodeDouble.AsDouble(), double(2.3)); + + TNode nodeBool(true); + UNIT_ASSERT_EQUAL(nodeBool.GetType(), TNode::Bool); + UNIT_ASSERT(nodeBool.IsBool()); + UNIT_ASSERT_VALUES_EQUAL(nodeBool.AsBool(), true); + + TNode nodeEntity = TNode::CreateEntity(); + UNIT_ASSERT_EQUAL(nodeEntity.GetType(), TNode::Null); + UNIT_ASSERT(nodeEntity.IsEntity()); + } + + Y_UNIT_TEST(TestPredicates) { + const TNode undefinedNode; + UNIT_ASSERT(undefinedNode.IsUndefined()); + UNIT_ASSERT(!undefinedNode.IsNull()); + UNIT_ASSERT(!undefinedNode.HasValue()); + + const TNode nullNode = TNode::CreateEntity(); + UNIT_ASSERT(!nullNode.IsUndefined()); + UNIT_ASSERT(nullNode.IsNull()); + UNIT_ASSERT(!nullNode.HasValue()); + + const TNode intNode(int(64)); + UNIT_ASSERT(!intNode.IsUndefined()); + UNIT_ASSERT(!intNode.IsNull()); + UNIT_ASSERT(intNode.HasValue()); + + const TNode stringNode("blah"); + UNIT_ASSERT(!stringNode.IsUndefined()); + UNIT_ASSERT(!stringNode.IsNull()); + UNIT_ASSERT(stringNode.HasValue()); + } + + Y_UNIT_TEST(TestComplexConstructors) { + const TNode listNode = TNode::CreateList({"one", 2, "tree"}); + const auto expectedListValue = std::vector<TNode>({"one", 2, "tree"}); + UNIT_ASSERT_VALUES_EQUAL(listNode.AsList(), expectedListValue); + + const TNode mapNode = TNode::CreateMap({{"one", 1}, {"two", 2u}}); + const auto expectedMapValue = THashMap<TString, TNode>({{"one", 1}, {"two", 2u}}); + UNIT_ASSERT_VALUES_EQUAL(mapNode.AsMap(), expectedMapValue); + } + + Y_UNIT_TEST(TestNodeMap) { + TNode nodeMap = TNode()("foo", "bar")("bar", "baz"); + UNIT_ASSERT(nodeMap.IsMap()); + UNIT_ASSERT_EQUAL(nodeMap.GetType(), TNode::Map); + UNIT_ASSERT_VALUES_EQUAL(nodeMap.Size(), 2); + + UNIT_ASSERT(nodeMap.HasKey("foo")); + UNIT_ASSERT(!nodeMap.HasKey("42")); + UNIT_ASSERT_EQUAL(nodeMap["foo"], TNode("bar")); + UNIT_ASSERT_EQUAL(nodeMap["bar"], TNode("baz")); + + // const version of operator[] + UNIT_ASSERT_EQUAL(static_cast<const TNode&>(nodeMap)["42"].GetType(), TNode::Undefined); + UNIT_ASSERT(!nodeMap.HasKey("42")); + + // nonconst version of operator[] + UNIT_ASSERT_EQUAL(nodeMap["42"].GetType(), TNode::Undefined); + UNIT_ASSERT(nodeMap.HasKey("42")); + + nodeMap("rock!!!", TNode() + ("Pink", "Floyd") + ("Purple", "Deep")); + + TNode copyNode; + copyNode = nodeMap; + UNIT_ASSERT_EQUAL(copyNode["foo"], TNode("bar")); + UNIT_ASSERT_EQUAL(copyNode["bar"], TNode("baz")); + UNIT_ASSERT(copyNode["42"].GetType() == TNode::Undefined); + UNIT_ASSERT_EQUAL(copyNode["rock!!!"]["Purple"], TNode("Deep")); + } + + Y_UNIT_TEST(TestNodeList) { + TNode nodeList = TNode().Add("foo").Add(42).Add(3.14); + UNIT_ASSERT(nodeList.IsList()); + UNIT_ASSERT_EQUAL(nodeList.GetType(), TNode::List); + UNIT_ASSERT_VALUES_EQUAL(nodeList.Size(), 3); + + UNIT_ASSERT_EQUAL(nodeList[1], TNode(42)); + nodeList.Add(TNode().Add("ls").Add("pwd")); + + TNode copyNode; + copyNode = nodeList; + UNIT_ASSERT_EQUAL(copyNode[0], TNode("foo")); + UNIT_ASSERT_EQUAL(copyNode[3][1], TNode("pwd")); + } + + Y_UNIT_TEST(TestInsertingMethodsFromTemporaryObjects) { + // check that .Add(...) doesn't return lvalue reference to temporary object + { + const TNode& nodeList = TNode().Add(0).Add("pass").Add(0); + UNIT_ASSERT_EQUAL(nodeList[1], TNode("pass")); + } + + // check that .operator()(...) doesn't return lvalue reference to temporary object + { + const TNode& nodeMap = TNode()("1", 0)("2", "pass")("3", 0); + UNIT_ASSERT_EQUAL(nodeMap["2"], TNode("pass")); + } + } + + Y_UNIT_TEST(TestAttributes) { + TNode node = TNode()("lee", 42)("faa", 54); + UNIT_ASSERT(!node.HasAttributes()); + node.Attributes()("foo", true)("bar", false); + UNIT_ASSERT(node.HasAttributes()); + + { + TNode copyNode; + UNIT_ASSERT(!copyNode.HasAttributes()); + copyNode = node; + UNIT_ASSERT(copyNode.HasAttributes()); + UNIT_ASSERT_EQUAL(copyNode.GetAttributes()["foo"], TNode(true)); + } + + { + TNode movedWithoutAttributes(42); + movedWithoutAttributes.Attributes()("one", 1)("two", 2); + movedWithoutAttributes.MoveWithoutAttributes(TNode(node)); + UNIT_ASSERT(movedWithoutAttributes.IsMap()); + UNIT_ASSERT_EQUAL(movedWithoutAttributes["lee"], TNode(42)); + UNIT_ASSERT_EQUAL(movedWithoutAttributes.GetAttributes()["one"], TNode(1)); + UNIT_ASSERT(!movedWithoutAttributes.GetAttributes().HasKey("foo")); + } + + { + TNode copyNode = node; + UNIT_ASSERT(copyNode.HasAttributes()); + UNIT_ASSERT(copyNode.GetAttributes().HasKey("foo")); + copyNode.ClearAttributes(); + UNIT_ASSERT(!copyNode.HasAttributes()); + UNIT_ASSERT(!copyNode.GetAttributes().HasKey("foo")); + } + + { + TNode copyNode = node; + UNIT_ASSERT(copyNode.HasAttributes()); + UNIT_ASSERT(copyNode.GetAttributes().HasKey("foo")); + copyNode.Clear(); + UNIT_ASSERT(!copyNode.HasAttributes()); + UNIT_ASSERT(!copyNode.GetAttributes().HasKey("foo")); + } + } + + Y_UNIT_TEST(TestEq) { + TNode nodeNoAttributes = TNode()("lee", 42)("faa", 54); + TNode node = nodeNoAttributes; + node.Attributes()("foo", true)("bar", false); + UNIT_ASSERT(node != nodeNoAttributes); + UNIT_ASSERT(nodeNoAttributes != node); + TNode copyNode = node; + UNIT_ASSERT(copyNode == node); + UNIT_ASSERT(node == copyNode); + } + + Y_UNIT_TEST(TestComparison) { + using namespace NYT::NNodeCmp; + { + TNode nodeNoAttributes = TNode()("lee", 42)("faa", 54); + TNode node = nodeNoAttributes; + node.Attributes()("foo", true)("bar", false); + UNIT_ASSERT_EXCEPTION(node > nodeNoAttributes, TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(node >= nodeNoAttributes, TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(nodeNoAttributes < node, TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(nodeNoAttributes <= node, TNode::TTypeError); + } + { + TNode nodeMap = TNode()("map", 23); + TNode nodeList = TNode::CreateList(); + UNIT_ASSERT_EXCEPTION(nodeList > nodeMap, TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(nodeMap < nodeList, TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(nodeMap >= nodeMap, TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(nodeList <= nodeList, TNode::TTypeError); + } + { + TNode node1("aaa"); + TNode node2("bbb"); + TNode node3("ccc"); + UNIT_ASSERT(node1 < node2); + UNIT_ASSERT(node1 <= node2); + UNIT_ASSERT(node1 < node3); + UNIT_ASSERT(node1 <= node3); + UNIT_ASSERT(!(node3 < node1)); + UNIT_ASSERT(!(node1 > node3)); + UNIT_ASSERT(!(node3 <= node1)); + UNIT_ASSERT(!(node1 >= node3)); + + UNIT_ASSERT(node3 > node2); + UNIT_ASSERT(node3 >= node2); + UNIT_ASSERT(node3 > node1); + UNIT_ASSERT(node3 >= node1); + + UNIT_ASSERT(node1 <= node1); + UNIT_ASSERT(node1 >= node1); + } + { + TNode node1(23); + TNode node2("bbb"); + TNode node3 = TNode::CreateEntity(); + + UNIT_ASSERT(node1 > node2); + UNIT_ASSERT(node1 >= node2); + UNIT_ASSERT(node2 < node1); + UNIT_ASSERT(node2 <= node1); + + UNIT_ASSERT(!(node1 < node2)); + UNIT_ASSERT(!(node1 <= node2)); + UNIT_ASSERT(!(node2 > node1)); + UNIT_ASSERT(!(node2 >= node1)); + + UNIT_ASSERT(node1 < node3); + UNIT_ASSERT(node2 < node3); + UNIT_ASSERT(node3 <= node3); + UNIT_ASSERT(!(node3 < node3)); + UNIT_ASSERT(!(node3 > node3)); + UNIT_ASSERT(!(node2 >= node3)); + } + } + + Y_UNIT_TEST(TestSaveLoad) { + TNode node = TNode()("foo", "bar")("baz", 42); + node.Attributes()["attr_name"] = "attr_value"; + + TString bytes; + { + TStringOutput s(bytes); + ::Save(&s, node); + } + + TNode nodeCopy; + { + TStringInput s(bytes); + ::Load(&s, nodeCopy); + } + + UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy); + } + + Y_UNIT_TEST(TestIntCast) { + TNode node = 1ull << 31; + UNIT_ASSERT(node.IsUint64()); + UNIT_ASSERT_EXCEPTION(node.IntCast<i32>(), TNode::TTypeError); + UNIT_ASSERT(node.IntCast<ui32>() == static_cast<ui32>(node.AsUint64())); + UNIT_ASSERT(node.IntCast<i64>() == static_cast<i64>(node.AsUint64())); + UNIT_ASSERT(node.IntCast<ui64>() == node.AsUint64()); + + node = 1ull << 63; + UNIT_ASSERT(node.IsUint64()); + UNIT_ASSERT_EXCEPTION(node.IntCast<i64>(), TNode::TTypeError); + UNIT_ASSERT(node.IntCast<ui64>() == node.AsUint64()); + + node = 12345; + UNIT_ASSERT(node.IsInt64()); + UNIT_ASSERT_EXCEPTION(node.IntCast<i8>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(node.IntCast<ui8>(), TNode::TTypeError); + UNIT_ASSERT(node.IntCast<i16>() == static_cast<i16>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<ui16>() == static_cast<ui16>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<i32>() == static_cast<i32>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<ui32>() == static_cast<ui32>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<i64>() == node.AsInt64()); + UNIT_ASSERT(node.IntCast<ui64>() == static_cast<ui64>(node.AsInt64())); + + node = -5; + UNIT_ASSERT(node.IsInt64()); + UNIT_ASSERT(node.IntCast<i8>() == static_cast<i8>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<i16>() == static_cast<i16>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<i32>() == static_cast<i32>(node.AsInt64())); + UNIT_ASSERT(node.IntCast<i64>() == node.AsInt64()); + UNIT_ASSERT_EXCEPTION(node.IntCast<ui8>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(node.IntCast<ui16>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(node.IntCast<ui32>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(node.IntCast<ui64>(), TNode::TTypeError); + } + + Y_UNIT_TEST(TestConvertToString) { + UNIT_ASSERT_VALUES_EQUAL(TNode(5).ConvertTo<TString>(), "5"); + UNIT_ASSERT_VALUES_EQUAL(TNode(123432423).ConvertTo<TString>(), "123432423"); + UNIT_ASSERT_VALUES_EQUAL(TNode(123456789012345678ll).ConvertTo<TString>(), "123456789012345678"); + UNIT_ASSERT_VALUES_EQUAL(TNode(123456789012345678ull).ConvertTo<TString>(), "123456789012345678"); + UNIT_ASSERT_VALUES_EQUAL(TNode(-123456789012345678ll).ConvertTo<TString>(), "-123456789012345678"); + UNIT_ASSERT_VALUES_EQUAL(TNode(true).ConvertTo<TString>(), "1"); + UNIT_ASSERT_VALUES_EQUAL(TNode(false).ConvertTo<TString>(), "0"); + UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<TString>(), "5.3"); + } + + Y_UNIT_TEST(TestConvertFromString) { + UNIT_ASSERT_VALUES_EQUAL(TNode("123456789012345678").ConvertTo<ui64>(), 123456789012345678ull); + UNIT_ASSERT_VALUES_EQUAL(TNode("123456789012345678").ConvertTo<i64>(), 123456789012345678); + UNIT_ASSERT_VALUES_EQUAL(TNode(ToString(1ull << 63)).ConvertTo<ui64>(), 1ull << 63); + UNIT_ASSERT_EXCEPTION(TNode(ToString(1ull << 63)).ConvertTo<i64>(), TFromStringException); + UNIT_ASSERT_VALUES_EQUAL(TNode("5.34").ConvertTo<double>(), 5.34); + } + + Y_UNIT_TEST(TestConvertDoubleInt) { + UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<i8>(), 5); + UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<ui8>(), 5); + UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<i64>(), 5); + UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<ui64>(), 5); + + UNIT_ASSERT_VALUES_EQUAL(TNode(-5.3).ConvertTo<i8>(), -5); + UNIT_ASSERT_VALUES_EQUAL(TNode(-5.3).ConvertTo<i64>(), -5); + UNIT_ASSERT_EXCEPTION(TNode(-5.3).ConvertTo<ui8>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(TNode(-5.3).ConvertTo<ui64>(), TNode::TTypeError); + + UNIT_ASSERT_VALUES_EQUAL(TNode(127.0).ConvertTo<i8>(), 127); + UNIT_ASSERT_EXCEPTION(TNode(128.0).ConvertTo<i8>(), TNode::TTypeError); + UNIT_ASSERT_VALUES_EQUAL(TNode(255.0).ConvertTo<ui8>(), 255); + UNIT_ASSERT_EXCEPTION(TNode(256.0).ConvertTo<ui8>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(TNode(1e100).ConvertTo<i64>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(TNode(1e100).ConvertTo<ui64>(), TNode::TTypeError); + { + double v = 1ull << 63; + TNode node = v; + UNIT_ASSERT(node.IsDouble()); + UNIT_ASSERT_EXCEPTION(node.ConvertTo<i64>(), TNode::TTypeError); + UNIT_ASSERT_VALUES_EQUAL(node.ConvertTo<ui64>(), static_cast<ui64>(v)); + } + { + double v = (double)(1ull << 63) + (1ull << 63); + TNode node = v; + UNIT_ASSERT(node.IsDouble()); + UNIT_ASSERT_EXCEPTION(node.ConvertTo<i64>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(node.ConvertTo<ui64>(), TNode::TTypeError); + } + UNIT_ASSERT_EXCEPTION(TNode(NAN).ConvertTo<ui64>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(TNode(NAN).ConvertTo<i64>(), TNode::TTypeError); + + UNIT_ASSERT_EXCEPTION(TNode(INFINITY).ConvertTo<ui64>(), TNode::TTypeError); + UNIT_ASSERT_EXCEPTION(TNode(INFINITY).ConvertTo<i64>(), TNode::TTypeError); + } + + Y_UNIT_TEST(TestConvertToBool) { + UNIT_ASSERT_VALUES_EQUAL(TNode("true").ConvertTo<bool>(), true); + UNIT_ASSERT_VALUES_EQUAL(TNode("TRUE").ConvertTo<bool>(), true); + UNIT_ASSERT_VALUES_EQUAL(TNode("false").ConvertTo<bool>(), false); + UNIT_ASSERT_VALUES_EQUAL(TNode("FALSE").ConvertTo<bool>(), false); + UNIT_ASSERT_VALUES_EQUAL(TNode(1).ConvertTo<bool>(), true); + UNIT_ASSERT_VALUES_EQUAL(TNode(0).ConvertTo<bool>(), false); + UNIT_ASSERT_EXCEPTION(TNode("random").ConvertTo<bool>(), TFromStringException); + UNIT_ASSERT_EXCEPTION(TNode("").ConvertTo<bool>(), TFromStringException); + } + + Y_UNIT_TEST(TestCanonicalSerialization) { + auto node = TNode() + ("ca", "ca")("c", "c")("a", "a")("b", "b") + ("bb", TNode() + ("ii", "ii")("i", "i")("jj", "jj")); + node.Attributes() = TNode()("za", "za")("z", "z")("xxx", "xxx")("xx", "xx"); + UNIT_ASSERT_VALUES_EQUAL(NodeToCanonicalYsonString(node), + "<\"xx\"=\"xx\";\"xxx\"=\"xxx\";\"z\"=\"z\";\"za\"=\"za\">" + "{\"a\"=\"a\";\"b\"=\"b\";\"bb\"=" + "{\"i\"=\"i\";\"ii\"=\"ii\";\"jj\"=\"jj\"};" + "\"c\"=\"c\";\"ca\"=\"ca\"}"); + } + + Y_UNIT_TEST(OperatorEqualSubnode) { + TNode node; + node["a"]["b"] = "c"; + + node = node["a"]; + node = node["b"]; + + UNIT_ASSERT_VALUES_EQUAL(node.AsString(), "c"); + } + + Y_UNIT_TEST(TestMapGetters) { + auto node = TNode::CreateMap() + ("string", "7") + ("int64", 3) + ("uint64", 5u) + ("double", -3.5) + ("list", TNode::CreateList().Add(5)) + ("map", TNode::CreateMap()("key", "value")); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>("string"), "7"); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsString("string"), "7"); + UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<i64>("string"), 7); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<i64>("int64"), 3); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsInt64("int64"), 3); + UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<ui64>("int64"), 3u); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<ui64>("uint64"), 5u); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsUint64("uint64"), 5u); + UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<i64>("uint64"), 5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>("uint64"), "5"); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<double>("double"), -3.5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsDouble("double"), -3.5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>("double"), "-3.5"); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TListType>("list")[0].AsInt64(), 5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsList("list")[0].AsInt64(), 5); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TMapType>("map")["key"].AsString(), "value"); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsMap("map")["key"].AsString(), "value"); + + // mutable accessor + auto& childString = node.ChildAs<TString>("string"); + childString = "yaddayadda"; + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>("string"), "yaddayadda"); + } + + Y_UNIT_TEST(TestListGetters) { + auto node = TNode::CreateList() + .Add("7") + .Add(3) + .Add(5u) + .Add(-3.5) + .Add(TNode::CreateList().Add(5)) + .Add(TNode::CreateMap()("key", "value")); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>(0), "7"); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsString(0), "7"); + UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<i64>(0), 7); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<i64>(1), 3); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsInt64(1), 3); + UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<ui64>(1), 3u); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<ui64>(2), 5u); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsUint64(2), 5u); + UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<i64>(2), 5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>(2), "5"); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<double>(3), -3.5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsDouble(3), -3.5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>(3), "-3.5"); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TListType>(4)[0].AsInt64(), 5); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsList(4)[0].AsInt64(), 5); + + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TMapType>(5)["key"].AsString(), "value"); + UNIT_ASSERT_VALUES_EQUAL(node.ChildAsMap(5)["key"].AsString(), "value"); + + // mutable accessor + auto& childString = node.ChildAs<TString>(0); + childString = "yaddayadda"; + UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>(0), "yaddayadda"); + } +} diff --git a/library/cpp/yson/node/node_visitor.cpp b/library/cpp/yson/node/node_visitor.cpp new file mode 100644 index 00000000000..899fbfa02aa --- /dev/null +++ b/library/cpp/yson/node/node_visitor.cpp @@ -0,0 +1,152 @@ +#include "node_visitor.h" + +#include <util/generic/algorithm.h> +#include <util/string/printf.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +template <typename Fun> +void Iterate(const TNode::TMapType& nodeMap, bool sortByKey, Fun action) +{ + if (sortByKey) { + TVector<TNode::TMapType::const_iterator> iterators; + for (auto it = nodeMap.begin(); it != nodeMap.end(); ++it) { + iterators.push_back(it); + } + SortBy(iterators, [](TNode::TMapType::const_iterator it) { return it->first; }); + for (const auto& it : iterators) { + action(*it); + } + } else { + ForEach(nodeMap.begin(), nodeMap.end(), action); + } +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +TNodeVisitor::TNodeVisitor(NYson::IYsonConsumer* consumer, bool sortMapKeys) + : Consumer_(consumer) + , SortMapKeys_(sortMapKeys) +{ } + +void TNodeVisitor::Visit(const TNode& node) +{ + VisitAny(node); +} + +void TNodeVisitor::VisitAny(const TNode& node) +{ + if (node.HasAttributes()) { + Consumer_->OnBeginAttributes(); + Iterate(node.GetAttributes().AsMap(), SortMapKeys_, [&](const std::pair<TString, TNode>& item) { + Consumer_->OnKeyedItem(item.first); + if (item.second.IsUndefined()) { + ythrow TNode::TTypeError() << "unable to visit attribute value of type " + << TNode::EType::Undefined << "; attribute name: `" << item.first << '\'' ; + } + VisitAny(item.second); + }); + Consumer_->OnEndAttributes(); + } + + switch (node.GetType()) { + case TNode::String: + VisitString(node); + break; + case TNode::Int64: + VisitInt64(node); + break; + case TNode::Uint64: + VisitUint64(node); + break; + case TNode::Double: + VisitDouble(node); + break; + case TNode::Bool: + VisitBool(node); + break; + case TNode::List: + VisitList(node.AsList()); + break; + case TNode::Map: + VisitMap(node.AsMap()); + break; + case TNode::Null: + VisitEntity(); + break; + case TNode::Undefined: + ythrow TNode::TTypeError() << "unable to visit TNode of type " << node.GetType(); + default: + Y_FAIL("Unexpected type: %d", node.GetType()); + } +} + +void TNodeVisitor::VisitString(const TNode& node) +{ + Consumer_->OnStringScalar(node.AsString()); +} + +void TNodeVisitor::VisitInt64(const TNode& node) +{ + Consumer_->OnInt64Scalar(node.AsInt64()); +} + +void TNodeVisitor::VisitUint64(const TNode& node) +{ + Consumer_->OnUint64Scalar(node.AsUint64()); +} + +void TNodeVisitor::VisitDouble(const TNode& node) +{ + Consumer_->OnDoubleScalar(node.AsDouble()); +} + +void TNodeVisitor::VisitBool(const TNode& node) +{ + Consumer_->OnBooleanScalar(node.AsBool()); +} + +void TNodeVisitor::VisitList(const TNode::TListType& nodeList) +{ + Consumer_->OnBeginList(); + size_t index = 0; + for (const auto& item : nodeList) { + Consumer_->OnListItem(); + if (item.IsUndefined()) { + ythrow TNode::TTypeError() << "unable to visit list node child of type " + << TNode::EType::Undefined << "; list index: " << index; + } + VisitAny(item); + ++index; + } + Consumer_->OnEndList(); +} + +void TNodeVisitor::VisitMap(const TNode::TMapType& nodeMap) +{ + Consumer_->OnBeginMap(); + Iterate(nodeMap, SortMapKeys_, [&](const std::pair<TString, TNode>& item) { + Consumer_->OnKeyedItem(item.first); + if (item.second.IsUndefined()) { + ythrow TNode::TTypeError() << "unable to visit map node child of type " + << TNode::EType::Undefined << "; map key: `" << item.first << '\'' ; + } + VisitAny(item.second); + }); + Consumer_->OnEndMap(); +} + +void TNodeVisitor::VisitEntity() +{ + Consumer_->OnEntity(); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/node_visitor.h b/library/cpp/yson/node/node_visitor.h new file mode 100644 index 00000000000..db258323092 --- /dev/null +++ b/library/cpp/yson/node/node_visitor.h @@ -0,0 +1,37 @@ +#pragma once + +#include "node.h" + +#include <library/cpp/yson/consumer.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +class TNodeVisitor +{ +public: + TNodeVisitor(NYson::IYsonConsumer* consumer, bool sortMapKeys = false); + + void Visit(const TNode& node); + void VisitMap(const TNode::TMapType& nodeMap); + void VisitList(const TNode::TListType& nodeMap); + +private: + NYson::IYsonConsumer* Consumer_; + bool SortMapKeys_; + +private: + void VisitAny(const TNode& node); + + void VisitString(const TNode& node); + void VisitInt64(const TNode& node); + void VisitUint64(const TNode& node); + void VisitDouble(const TNode& node); + void VisitBool(const TNode& node); + void VisitEntity(); +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/pybind/node.cpp b/library/cpp/yson/node/pybind/node.cpp new file mode 100644 index 00000000000..79beba36471 --- /dev/null +++ b/library/cpp/yson/node/pybind/node.cpp @@ -0,0 +1,105 @@ +#include "node.h" + +#include <library/cpp/yson/node/node.h> + +#include <library/cpp/pybind/cast.h> + +#include <Python.h> + +namespace NYT { + + PyObject* BuildPyObject(const TNode& node) { + switch (node.GetType()) { + case TNode::Bool: + return NPyBind::BuildPyObject(node.AsBool()); + case TNode::Int64: + return NPyBind::BuildPyObject(node.AsInt64()); + case TNode::Uint64: + return NPyBind::BuildPyObject(node.AsUint64()); + case TNode::Double: + return NPyBind::BuildPyObject(node.AsDouble()); + case TNode::String: + return NPyBind::BuildPyObject(node.AsString()); + case TNode::List: + return NPyBind::BuildPyObject(node.AsList()); + case TNode::Map: + return NPyBind::BuildPyObject(node.AsMap()); + case TNode::Null: + Py_RETURN_NONE; + case TNode::Undefined: + ythrow TNode::TTypeError() << "BuildPyObject called for undefined TNode"; + } + } + +} // namespace NYT + +namespace NPyBind { + + template <> + bool FromPyObject(PyObject* obj, NYT::TNode& res) { + if (obj == Py_None) { + res = NYT::TNode::CreateEntity(); + return true; + } + if (PyBool_Check(obj)) { + res = false; + return FromPyObject(obj, res.As<bool>()); + } + if (PyFloat_Check(obj)) { + res = 0.0; + return FromPyObject(obj, res.As<double>()); + } +#if PY_MAJOR_VERSION < 3 + if (PyString_Check(obj)) { + res = TString(); + return FromPyObject(obj, res.As<TString>()); + } +#else + if (PyUnicode_Check(obj)) { + res = TString(); + return FromPyObject(obj, res.As<TString>()); + } + if (PyBytes_Check(obj)) { + res = TString(); + return FromPyObject(obj, res.As<TString>()); + } +#endif + if (PyList_Check(obj)) { + res = NYT::TNode::CreateList(); + return FromPyObject(obj, res.AsList()); + } + if (PyDict_Check(obj)) { + res = NYT::TNode::CreateMap(); + return FromPyObject(obj, res.AsMap()); + } +#if PY_MAJOR_VERSION < 3 + if (PyInt_Check(obj)) { + auto valAsLong = PyInt_AsLong(obj); + if (valAsLong == -1 && PyErr_Occurred()) { + return false; + } + res = valAsLong; + return true; + } +#endif + if (PyLong_Check(obj)) { + int overflow = 0; + auto valAsLong = PyLong_AsLongAndOverflow(obj, &overflow); + if (!overflow) { + if (valAsLong == -1 && PyErr_Occurred()) { + return false; + } + res = valAsLong; + return true; + } + auto valAsULong = PyLong_AsUnsignedLong(obj); + if (valAsULong == static_cast<decltype(valAsULong)>(-1) && PyErr_Occurred()) { + return false; + } + res = valAsULong; + return true; + } + return false; + } + +} // namespace NPyBind diff --git a/library/cpp/yson/node/pybind/node.h b/library/cpp/yson/node/pybind/node.h new file mode 100644 index 00000000000..65f7236de68 --- /dev/null +++ b/library/cpp/yson/node/pybind/node.h @@ -0,0 +1,9 @@ +#pragma once + +#include <Python.h> + +#include <library/cpp/yson/node/node.h> + +namespace NYT { + PyObject* BuildPyObject(const TNode& val); +} diff --git a/library/cpp/yson/node/pybind/ya.make b/library/cpp/yson/node/pybind/ya.make new file mode 100644 index 00000000000..97b7583e96f --- /dev/null +++ b/library/cpp/yson/node/pybind/ya.make @@ -0,0 +1,16 @@ +PY23_NATIVE_LIBRARY() + +OWNER( + inngonch + g:yt +) + +PEERDIR( + library/cpp/pybind + library/cpp/yson/node +) +SRCS( + node.cpp +) + +END() diff --git a/library/cpp/yson/node/serialize.cpp b/library/cpp/yson/node/serialize.cpp new file mode 100644 index 00000000000..aeb467622bd --- /dev/null +++ b/library/cpp/yson/node/serialize.cpp @@ -0,0 +1,101 @@ +#include "serialize.h" + +#include "node_visitor.h" + +#include <library/cpp/yson/consumer.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +void Serialize(const TString& value, NYson::IYsonConsumer* consumer) +{ + consumer->OnStringScalar(value); +} + +void Serialize(const TStringBuf& value, NYson::IYsonConsumer* consumer) +{ + consumer->OnStringScalar(value); +} + +void Serialize(const char* value, NYson::IYsonConsumer* consumer) +{ + consumer->OnStringScalar(value); +} + +void Deserialize(TString& value, const TNode& node) +{ + value = node.AsString(); +} + +#define SERIALIZE_SIGNED(type) \ +void Serialize(type value, NYson::IYsonConsumer* consumer) \ +{ \ + consumer->OnInt64Scalar(static_cast<i64>(value)); \ +} + +#define SERIALIZE_UNSIGNED(type) \ +void Serialize(type value, NYson::IYsonConsumer* consumer) \ +{ \ + consumer->OnUint64Scalar(static_cast<ui64>(value)); \ +} + +SERIALIZE_SIGNED(signed char); +SERIALIZE_SIGNED(short); +SERIALIZE_SIGNED(int); +SERIALIZE_SIGNED(long); +SERIALIZE_SIGNED(long long); + +SERIALIZE_UNSIGNED(unsigned char); +SERIALIZE_UNSIGNED(unsigned short); +SERIALIZE_UNSIGNED(unsigned int); +SERIALIZE_UNSIGNED(unsigned long); +SERIALIZE_UNSIGNED(unsigned long long); + +#undef SERIALIZE_SIGNED +#undef SERIALIZE_UNSIGNED + +void Deserialize(i64& value, const TNode& node) +{ + value = node.AsInt64(); +} + +void Deserialize(ui64& value, const TNode& node) +{ + value = node.AsUint64(); +} + +void Serialize(double value, NYson::IYsonConsumer* consumer) +{ + consumer->OnDoubleScalar(value); +} + +void Deserialize(double& value, const TNode& node) +{ + value = node.AsDouble(); +} + +void Serialize(bool value, NYson::IYsonConsumer* consumer) +{ + consumer->OnBooleanScalar(value); +} + +void Deserialize(bool& value, const TNode& node) +{ + value = node.AsBool(); +} + +void Serialize(const TNode& node, NYson::IYsonConsumer* consumer) +{ + TNodeVisitor visitor(consumer); + visitor.Visit(node); +} + +void Deserialize(TNode& value, const TNode& node) +{ + value = node; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/serialize.h b/library/cpp/yson/node/serialize.h new file mode 100644 index 00000000000..99b598a44c3 --- /dev/null +++ b/library/cpp/yson/node/serialize.h @@ -0,0 +1,45 @@ +#pragma once + +#include "node.h" + +namespace NYT { + +namespace NYson { +struct IYsonConsumer; +} // namespace NYson + +//////////////////////////////////////////////////////////////////////////////// + +void Serialize(const TString& value, NYson::IYsonConsumer* consumer); +void Serialize(const TStringBuf& value, NYson::IYsonConsumer* consumer); +void Serialize(const char* value, NYson::IYsonConsumer* consumer); +void Deserialize(TString& value, const TNode& node); + +void Serialize(signed char value, NYson::IYsonConsumer* consumer); +void Serialize(short value, NYson::IYsonConsumer* consumer); +void Serialize(int value, NYson::IYsonConsumer* consumer); +void Serialize(long value, NYson::IYsonConsumer* consumer); +void Serialize(long long value, NYson::IYsonConsumer* consumer); +void Deserialize(i64& value, const TNode& node); + +void Serialize(unsigned char value, NYson::IYsonConsumer* consumer); +void Serialize(unsigned short value, NYson::IYsonConsumer* consumer); +void Serialize(unsigned int value, NYson::IYsonConsumer* consumer); +void Serialize(unsigned long value, NYson::IYsonConsumer* consumer); +void Serialize(unsigned long long value, NYson::IYsonConsumer* consumer); +void Deserialize(ui64& value, const TNode& node); + +void Serialize(double value, NYson::IYsonConsumer* consumer); +void Deserialize(double& value, const TNode& node); + +void Serialize(bool value, NYson::IYsonConsumer* consumer); +void Deserialize(bool& value, const TNode& node); + +void Serialize(const TNode& node, NYson::IYsonConsumer* consumer); +void Deserialize(TNode& value, const TNode& node); + +void Serialize(const THashMap<TString, TString>& renameColumns, NYson::IYsonConsumer* consumer); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yson/node/ut/ya.make b/library/cpp/yson/node/ut/ya.make new file mode 100644 index 00000000000..f49a0bf7dfb --- /dev/null +++ b/library/cpp/yson/node/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/yson/node) + +OWNER( + ermolovd + g:yt +) + +SRCS( + node_ut.cpp +) + +END() diff --git a/library/cpp/yson/node/ya.make b/library/cpp/yson/node/ya.make new file mode 100644 index 00000000000..a082b293c46 --- /dev/null +++ b/library/cpp/yson/node/ya.make @@ -0,0 +1,25 @@ +LIBRARY() + +GENERATE_ENUM_SERIALIZATION(node.h) + +PEERDIR( + library/cpp/yson + library/cpp/yson/json +) + +OWNER( + ermolovd + g:yt +) + +SRCS( + node.cpp + node_io.cpp + node_builder.cpp + node_visitor.cpp + serialize.cpp +) + +END() + +RECURSE_FOR_TESTS(ut) diff --git a/library/cpp/yson/parser.cpp b/library/cpp/yson/parser.cpp new file mode 100644 index 00000000000..783f9b90479 --- /dev/null +++ b/library/cpp/yson/parser.cpp @@ -0,0 +1,179 @@ +#include "parser.h" +#include "consumer.h" +#include "format.h" +#include "parser_detail.h" + +#include <util/stream/input.h> +#include <util/generic/buffer.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + class TYsonParser::TImpl { + public: + TImpl( + NYT::NYson::IYsonConsumer* consumer, + IInputStream* stream, + EYsonType type, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit = Nothing()) + : Consumer_(consumer) + , Stream_(stream) + , Type_(type) + , EnableLinePositionInfo_(enableLinePositionInfo) + , MemoryLimit_(memoryLimit) + { + } + + void Parse() { + TBuffer buffer(64 << 10); + ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStreamReader>( + TStreamReader(Stream_, buffer.Data(), buffer.Capacity()), + Consumer_, + Type_, + EnableLinePositionInfo_, + MemoryLimit_); + } + + private: + NYT::NYson::IYsonConsumer* Consumer_; + IInputStream* Stream_; + EYsonType Type_; + bool EnableLinePositionInfo_; + TMaybe<ui64> MemoryLimit_; + }; + + //////////////////////////////////////////////////////////////////////////////// + + TYsonParser::TYsonParser( + NYT::NYson::IYsonConsumer* consumer, + IInputStream* stream, + EYsonType type, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) + : Impl(new TImpl(consumer, stream, type, enableLinePositionInfo, memoryLimit)) + { + } + + TYsonParser::~TYsonParser() { + } + + void TYsonParser::Parse() { + Impl->Parse(); + } + + //////////////////////////////////////////////////////////////////////////////// + + class TStatelessYsonParser::TImpl { + private: + THolder<TStatelessYsonParserImplBase> Impl; + + public: + TImpl( + NYT::NYson::IYsonConsumer* consumer, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) + : Impl( + enableLinePositionInfo + ? static_cast<TStatelessYsonParserImplBase*>(new TStatelessYsonParserImpl<NYT::NYson::IYsonConsumer, true>(consumer, memoryLimit)) + : static_cast<TStatelessYsonParserImplBase*>(new TStatelessYsonParserImpl<NYT::NYson::IYsonConsumer, false>(consumer, memoryLimit))) + { + } + + void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) { + Impl->Parse(data, type); + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + TStatelessYsonParser::TStatelessYsonParser( + NYT::NYson::IYsonConsumer* consumer, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) + : Impl(new TImpl(consumer, enableLinePositionInfo, memoryLimit)) + { + } + + TStatelessYsonParser::~TStatelessYsonParser() { + } + + void TStatelessYsonParser::Parse(const TStringBuf& data, EYsonType type) { + Impl->Parse(data, type); + } + + //////////////////////////////////////////////////////////////////////////////// + + void ParseYsonStringBuffer( + const TStringBuf& buffer, + NYT::NYson::IYsonConsumer* consumer, + EYsonType type, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) { + ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStringReader>( + TStringReader(buffer.begin(), buffer.end()), + consumer, + type, + enableLinePositionInfo, + memoryLimit); + } + + //////////////////////////////////////////////////////////////////////////////// + + class TYsonListParser::TImpl { + public: + TImpl( + NYT::NYson::IYsonConsumer* consumer, + IInputStream* stream, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit = Nothing()) + : Consumer_(consumer) + , Stream_(stream) + , EnableLinePositionInfo_(enableLinePositionInfo) + , MemoryLimit_(memoryLimit) + , Buffer_(64 << 10) + , Reader_(Stream_, Buffer_.Data(), Buffer_.Capacity()) + { + } + + bool Parse() { + if (!Impl_) { + Impl_.Reset( + EnableLinePositionInfo_ + ? static_cast<TYsonListParserImplBase*>(new TYsonListParserImpl<NYT::NYson::IYsonConsumer, TStreamReader, true>(Reader_, Consumer_, MemoryLimit_)) + : static_cast<TYsonListParserImplBase*>(new TYsonListParserImpl<NYT::NYson::IYsonConsumer, TStreamReader, false>(Reader_, Consumer_, MemoryLimit_))); + } + return Impl_->Parse(); + } + + private: + NYT::NYson::IYsonConsumer* Consumer_; + IInputStream* Stream_; + bool EnableLinePositionInfo_; + TMaybe<ui64> MemoryLimit_; + TBuffer Buffer_; + TStreamReader Reader_; + THolder<TYsonListParserImplBase> Impl_; + }; + + //////////////////////////////////////////////////////////////////////////////// + + TYsonListParser::TYsonListParser( + NYT::NYson::IYsonConsumer* consumer, + IInputStream* stream, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) + : Impl(new TImpl(consumer, stream, enableLinePositionInfo, memoryLimit)) + { + } + + TYsonListParser::~TYsonListParser() { + } + + bool TYsonListParser::Parse() { + return Impl->Parse(); + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/parser.h b/library/cpp/yson/parser.h new file mode 100644 index 00000000000..dce35a8cd40 --- /dev/null +++ b/library/cpp/yson/parser.h @@ -0,0 +1,83 @@ +#pragma once + +#include "public.h" + +#include <util/generic/maybe.h> +#include <util/generic/ptr.h> + +class IInputStream; + +namespace NYT::NYson { +struct IYsonConsumer; +} // namespace NYT::NYson + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + class TYsonParser { + public: + TYsonParser( + NYT::NYson::IYsonConsumer* consumer, + IInputStream* stream, + EYsonType type = ::NYson::EYsonType::Node, + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); + + ~TYsonParser(); + + void Parse(); + + private: + class TImpl; + THolder<TImpl> Impl; + }; + + //////////////////////////////////////////////////////////////////////////////// + + class TStatelessYsonParser { + public: + TStatelessYsonParser( + NYT::NYson::IYsonConsumer* consumer, + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); + + ~TStatelessYsonParser(); + + void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node); + + private: + class TImpl; + THolder<TImpl> Impl; + }; + + //////////////////////////////////////////////////////////////////////////////// + + class TYsonListParser { + public: + TYsonListParser( + NYT::NYson::IYsonConsumer* consumer, + IInputStream* stream, + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); + + ~TYsonListParser(); + + bool Parse(); // Returns false, if there is no more list items + + private: + class TImpl; + THolder<TImpl> Impl; + }; + + //////////////////////////////////////////////////////////////////////////////// + + void ParseYsonStringBuffer( + const TStringBuf& buffer, + NYT::NYson::IYsonConsumer* consumer, + EYsonType type = ::NYson::EYsonType::Node, + bool enableLinePositionInfo = false, + TMaybe<ui64> memoryLimit = Nothing()); + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/parser_detail.h b/library/cpp/yson/parser_detail.h new file mode 100644 index 00000000000..44223caf125 --- /dev/null +++ b/library/cpp/yson/parser_detail.h @@ -0,0 +1,381 @@ +#pragma once + +#include "detail.h" + +namespace NYson { + namespace NDetail { + //////////////////////////////////////////////////////////////////////////////// + + template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> + class TParser + : public TLexerBase<TBlockStream, EnableLinePositionInfo> { + private: + using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; + TConsumer* Consumer; + + public: + TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) + : TBase(blockStream, memoryLimit) + , Consumer(consumer) + { + } + + void DoParse(EYsonType ysonType) { + switch (ysonType) { + case ::NYson::EYsonType::Node: + ParseNode<true>(); + break; + + case ::NYson::EYsonType::ListFragment: + ParseListFragment<true>(EndSymbol); + break; + + case ::NYson::EYsonType::MapFragment: + ParseMapFragment<true>(EndSymbol); + break; + + default: + Y_FAIL("unreachable"); + } + + while (!(TBase::IsFinished() && TBase::IsEmpty())) { + if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) { + ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found"; + } else if (!TBase::IsEmpty()) { + TBase::Advance(1); + } + } + } + + bool DoParseListFragment(bool first) { + bool ret = first ? first : ParseListSeparator<true>(EndSymbol); + return ret && ParseListItem<true>(EndSymbol); + } + + void ParseAttributes() { + Consumer->OnBeginAttributes(); + ParseMapFragment(EndAttributesSymbol); + TBase::SkipCharToken(EndAttributesSymbol); + Consumer->OnEndAttributes(); + } + + void ParseMap() { + Consumer->OnBeginMap(); + ParseMapFragment(EndMapSymbol); + TBase::SkipCharToken(EndMapSymbol); + Consumer->OnEndMap(); + } + + void ParseList() { + Consumer->OnBeginList(); + ParseListFragment(EndListSymbol); + TBase::SkipCharToken(EndListSymbol); + Consumer->OnEndList(); + } + + template <bool AllowFinish> + void ParseNode() { + return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar()); + } + + template <bool AllowFinish> + void ParseNode(char ch) { + if (ch == BeginAttributesSymbol) { + TBase::Advance(1); + ParseAttributes(); + ch = TBase::SkipSpaceAndGetChar(); + } + + switch (ch) { + case BeginMapSymbol: + TBase::Advance(1); + ParseMap(); + break; + + case BeginListSymbol: + TBase::Advance(1); + ParseList(); + break; + + case '"': { + TBase::Advance(1); + TStringBuf value; + TBase::ReadQuotedString(&value); + Consumer->OnStringScalar(value); + break; + } + case StringMarker: { + TBase::Advance(1); + TStringBuf value; + TBase::ReadBinaryString(&value); + Consumer->OnStringScalar(value); + break; + } + case Int64Marker: { + TBase::Advance(1); + i64 value; + TBase::ReadBinaryInt64(&value); + Consumer->OnInt64Scalar(value); + break; + } + case Uint64Marker: { + TBase::Advance(1); + ui64 value; + TBase::ReadBinaryUint64(&value); + Consumer->OnUint64Scalar(value); + break; + } + case DoubleMarker: { + TBase::Advance(1); + double value; + TBase::ReadBinaryDouble(&value); + Consumer->OnDoubleScalar(value); + break; + } + case FalseMarker: { + TBase::Advance(1); + Consumer->OnBooleanScalar(false); + break; + } + case TrueMarker: { + TBase::Advance(1); + Consumer->OnBooleanScalar(true); + break; + } + case EntitySymbol: + TBase::Advance(1); + Consumer->OnEntity(); + break; + + default: { + if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state + ReadNumeric<AllowFinish>(); + } else if (isalpha((unsigned char)ch) || ch == '_') { + TStringBuf value; + TBase::template ReadUnquotedString<AllowFinish>(&value); + Consumer->OnStringScalar(value); + } else if (ch == '%') { + TBase::Advance(1); + ch = TBase::template GetChar<AllowFinish>(); + if (ch == 't' || ch == 'f') { + Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>()); + } else { + Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>()); + } + } else { + ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node"; + } + } + } + } + + void ParseKey() { + return ParseKey(TBase::SkipSpaceAndGetChar()); + } + + void ParseKey(char ch) { + switch (ch) { + case '"': { + TBase::Advance(1); + TStringBuf value; + TBase::ReadQuotedString(&value); + Consumer->OnKeyedItem(value); + break; + } + case StringMarker: { + TBase::Advance(1); + TStringBuf value; + TBase::ReadBinaryString(&value); + Consumer->OnKeyedItem(value); + break; + } + default: { + if (isalpha(ch) || ch == '_') { + TStringBuf value; + TBase::ReadUnquotedString(&value); + Consumer->OnKeyedItem(value); + } else { + ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key"; + } + } + } + } + + template <bool AllowFinish> + void ParseMapFragment(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + while (ch != endSymbol) { + ParseKey(ch); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == KeyValueSeparatorSymbol) { + TBase::Advance(1); + } else { + ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found"; + } + ParseNode<AllowFinish>(); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == KeyedItemSeparatorSymbol) { + TBase::Advance(1); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + } else if (ch != endSymbol) { + ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol + << "' or '" << endSymbol << "' but '" << ch << "' found"; + } + } + } + + void ParseMapFragment(char endSymbol) { + ParseMapFragment<false>(endSymbol); + } + + template <bool AllowFinish> + bool ParseListItem(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch != endSymbol) { + Consumer->OnListItem(); + ParseNode<AllowFinish>(ch); + return true; + } + return false; + } + + template <bool AllowFinish> + bool ParseListSeparator(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == ListItemSeparatorSymbol) { + TBase::Advance(1); + return true; + } else if (ch != endSymbol) { + ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol + << "' or '" << endSymbol << "' but '" << ch << "' found"; + } + return false; + } + + template <bool AllowFinish> + void ParseListFragment(char endSymbol) { + while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) { + } + } + + void ParseListFragment(char endSymbol) { + ParseListFragment<false>(endSymbol); + } + + template <bool AllowFinish> + void ReadNumeric() { + TStringBuf valueBuffer; + ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); + + if (numericResult == ENumericResult::Double) { + double value; + try { + value = FromString<double>(valueBuffer); + } catch (yexception& e) { + // This exception is wrapped in parser. + ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e; + } + Consumer->OnDoubleScalar(value); + } else if (numericResult == ENumericResult::Int64) { + i64 value; + try { + value = FromString<i64>(valueBuffer); + } catch (yexception& e) { + // This exception is wrapped in parser. + ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e; + } + Consumer->OnInt64Scalar(value); + } else if (numericResult == ENumericResult::Uint64) { + ui64 value; + try { + value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)); + } catch (yexception& e) { + // This exception is wrapped in parser. + ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e; + } + Consumer->OnUint64Scalar(value); + } + } + }; + + //////////////////////////////////////////////////////////////////////////////// + + } + + template <class TConsumer, class TBlockStream> + void ParseYsonStreamImpl( + const TBlockStream& blockStream, + NYT::NYson::IYsonConsumer* consumer, + EYsonType parsingMode, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) { + if (enableLinePositionInfo) { + using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>; + TImpl impl(blockStream, consumer, memoryLimit); + impl.DoParse(parsingMode); + } else { + using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>; + TImpl impl(blockStream, consumer, memoryLimit); + impl.DoParse(parsingMode); + } + } + + class TStatelessYsonParserImplBase { + public: + virtual void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) = 0; + + virtual ~TStatelessYsonParserImplBase() { + } + }; + + template <class TConsumer, bool EnableLinePositionInfo> + class TStatelessYsonParserImpl + : public TStatelessYsonParserImplBase { + private: + using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>; + TParser Parser; + + public: + TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit) + : Parser(TStringReader(), consumer, memoryLimit) + { + } + + void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override { + Parser.SetBuffer(data.begin(), data.end()); + Parser.DoParse(type); + } + }; + + class TYsonListParserImplBase { + public: + virtual bool Parse() = 0; + + virtual ~TYsonListParserImplBase() { + } + }; + + template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> + class TYsonListParserImpl + : public TYsonListParserImplBase { + private: + using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>; + TParser Parser; + bool First = true; + + public: + TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) + : Parser(blockStream, consumer, memoryLimit) + { + } + + bool Parse() override { + bool ret = Parser.DoParseListFragment(First); + First = false; + return ret; + } + }; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/public.h b/library/cpp/yson/public.h new file mode 100644 index 00000000000..1ed793592ba --- /dev/null +++ b/library/cpp/yson/public.h @@ -0,0 +1,30 @@ +#pragma once + +#include <library/cpp/yt/misc/enum.h> +#include <util/generic/yexception.h> + +#include <library/cpp/yt/yson_string/public.h> +#include <library/cpp/yt/yson/public.h> + +namespace NYson { + + //////////////////////////////////////////////////////////////////////////////// + + using NYT::NYson::EYsonFormat; + using NYT::NYson::EYsonType; + + class TYsonStringBuf; + + struct TYsonConsumerBase; + + class TYsonWriter; + class TYsonParser; + class TStatelessYsonParser; + class TYsonListParser; + + class TYsonException + : public yexception {}; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/string-inl.h b/library/cpp/yson/string-inl.h new file mode 100644 index 00000000000..92e9ab4531e --- /dev/null +++ b/library/cpp/yson/string-inl.h @@ -0,0 +1,57 @@ +#pragma once + +#ifndef STRING_INL_H_ +#error "Direct inclusion of this file is not allowed, include string.h" +// For the sake of sane code completion. +#include "string.h" +#endif + +#include <util/str_stl.h> + +namespace NYson { + +//////////////////////////////////////////////////////////////////////////////// + +namespace NDetail { + +template <typename TLeft, typename TRight> +bool Equals(const TLeft& lhs, const TRight& rhs) +{ + auto lhsNull = !lhs.operator bool(); + auto rhsNull = !rhs.operator bool(); + if (lhsNull != rhsNull) { + return false; + } + if (lhsNull && rhsNull) { + return true; + } + return + lhs.AsStringBuf() == rhs.AsStringBuf() && + lhs.GetType() == rhs.GetType(); +} + +} // namespace NDetail + +inline bool operator == (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs) +{ + return NDetail::Equals(lhs, rhs); +} + +inline bool operator != (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs) +{ + return !(lhs == rhs); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson + +//! A hasher for TYsonStringBuf +template <> +struct THash<NYson::TYsonStringBuf> +{ + size_t operator () (const NYson::TYsonStringBuf& str) const + { + return THash<TStringBuf>()(str.AsStringBuf()); + } +}; diff --git a/library/cpp/yson/token.cpp b/library/cpp/yson/token.cpp new file mode 100644 index 00000000000..c8584c8c2ea --- /dev/null +++ b/library/cpp/yson/token.cpp @@ -0,0 +1,236 @@ +#include "token.h" + +#include <util/string/vector.h> +#include <util/string/printf.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + ETokenType CharToTokenType(char ch) { + switch (ch) { + case ';': + return ETokenType::Semicolon; + case '=': + return ETokenType::Equals; + case '{': + return ETokenType::LeftBrace; + case '}': + return ETokenType::RightBrace; + case '#': + return ETokenType::Hash; + case '[': + return ETokenType::LeftBracket; + case ']': + return ETokenType::RightBracket; + case '<': + return ETokenType::LeftAngle; + case '>': + return ETokenType::RightAngle; + case '(': + return ETokenType::LeftParenthesis; + case ')': + return ETokenType::RightParenthesis; + case '+': + return ETokenType::Plus; + case ':': + return ETokenType::Colon; + case ',': + return ETokenType::Comma; + default: + return ETokenType::EndOfStream; + } + } + + char TokenTypeToChar(ETokenType type) { + switch (type) { + case ETokenType::Semicolon: + return ';'; + case ETokenType::Equals: + return '='; + case ETokenType::Hash: + return '#'; + case ETokenType::LeftBracket: + return '['; + case ETokenType::RightBracket: + return ']'; + case ETokenType::LeftBrace: + return '{'; + case ETokenType::RightBrace: + return '}'; + case ETokenType::LeftAngle: + return '<'; + case ETokenType::RightAngle: + return '>'; + case ETokenType::LeftParenthesis: + return '('; + case ETokenType::RightParenthesis: + return ')'; + case ETokenType::Plus: + return '+'; + case ETokenType::Colon: + return ':'; + case ETokenType::Comma: + return ','; + default: + Y_FAIL("unreachable"); + } + } + + TString TokenTypeToString(ETokenType type) { + return TString(1, TokenTypeToChar(type)); + } + + //////////////////////////////////////////////////////////////////////////////// + + const TToken TToken::EndOfStream; + + TToken::TToken() + : Type_(ETokenType::EndOfStream) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(0.0) + , BooleanValue(false) + { + } + + TToken::TToken(ETokenType type) + : Type_(type) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(0.0) + , BooleanValue(false) + { + switch (type) { + case ETokenType::String: + case ETokenType::Int64: + case ETokenType::Uint64: + case ETokenType::Double: + case ETokenType::Boolean: + Y_FAIL("unreachable"); + default: + break; + } + } + + TToken::TToken(const TStringBuf& stringValue) + : Type_(ETokenType::String) + , StringValue(stringValue) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(0.0) + , BooleanValue(false) + { + } + + TToken::TToken(i64 int64Value) + : Type_(ETokenType::Int64) + , Int64Value(int64Value) + , Uint64Value(0) + , DoubleValue(0.0) + { + } + + TToken::TToken(ui64 uint64Value) + : Type_(ETokenType::Uint64) + , Int64Value(0) + , Uint64Value(uint64Value) + , DoubleValue(0.0) + , BooleanValue(false) + { + } + + TToken::TToken(double doubleValue) + : Type_(ETokenType::Double) + , Int64Value(0) + , Uint64Value(0) + , DoubleValue(doubleValue) + , BooleanValue(false) + { + } + + TToken::TToken(bool booleanValue) + : Type_(ETokenType::Boolean) + , Int64Value(0) + , DoubleValue(0.0) + , BooleanValue(booleanValue) + { + } + + bool TToken::IsEmpty() const { + return Type_ == ETokenType::EndOfStream; + } + + const TStringBuf& TToken::GetStringValue() const { + CheckType(ETokenType::String); + return StringValue; + } + + i64 TToken::GetInt64Value() const { + CheckType(ETokenType::Int64); + return Int64Value; + } + + ui64 TToken::GetUint64Value() const { + CheckType(ETokenType::Uint64); + return Uint64Value; + } + + double TToken::GetDoubleValue() const { + CheckType(ETokenType::Double); + return DoubleValue; + } + + bool TToken::GetBooleanValue() const { + CheckType(ETokenType::Boolean); + return BooleanValue; + } + + void TToken::CheckType(ETokenType expectedType) const { + if (Type_ != expectedType) { + if (Type_ == ETokenType::EndOfStream) { + ythrow TYsonException() << "Unexpected end of stream (ExpectedType: " << TokenTypeToString(expectedType) << ")"; + } else { + ythrow TYsonException() << "Unexpected token (Token: '" << ToString(*this) + << "', Type: " << TokenTypeToString(Type_) + << ", ExpectedType: " << TokenTypeToString(expectedType) << ")"; + } + } + } + + void TToken::Reset() { + Type_ = ETokenType::EndOfStream; + Int64Value = 0; + Uint64Value = 0; + DoubleValue = 0.0; + StringValue = TStringBuf(); + BooleanValue = false; + } + + TString ToString(const TToken& token) { + switch (token.GetType()) { + case ETokenType::EndOfStream: + return TString(); + + case ETokenType::String: + return TString(token.GetStringValue()); + + case ETokenType::Int64: + return ::ToString(token.GetInt64Value()); + + case ETokenType::Uint64: + return ::ToString(token.GetUint64Value()); + + case ETokenType::Double: + return ::ToString(token.GetDoubleValue()); + + case ETokenType::Boolean: + return token.GetBooleanValue() ? "true" : "false"; + + default: + return TokenTypeToString(token.GetType()); + } + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/token.h b/library/cpp/yson/token.h new file mode 100644 index 00000000000..7283e569504 --- /dev/null +++ b/library/cpp/yson/token.h @@ -0,0 +1,93 @@ +#pragma once + +#include "public.h" + +#include <util/generic/strbuf.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + enum ETokenType { + EndOfStream, + + String, + Int64, + Uint64, + Double, + Boolean, + + // Special values: + // YSON + Semicolon, // ; + Equals, // = + Hash, // # + LeftBracket, // [ + RightBracket, // ] + LeftBrace, // { + RightBrace, // } + LeftAngle, // < + RightAngle, // > + + // Table ranges + LeftParenthesis, // ( + RightParenthesis, // ) + Plus, // + + Colon, // : + Comma, // , + }; + + //////////////////////////////////////////////////////////////////////////////// + + ETokenType CharToTokenType(char ch); + char TokenTypeToChar(ETokenType type); + TString TokenTypeToString(ETokenType type); + + //////////////////////////////////////////////////////////////////////////////// + + class TLexerImpl; + + //////////////////////////////////////////////////////////////////////////////// + + class TToken { + public: + static const TToken EndOfStream; + + TToken(); + TToken(ETokenType type); + explicit TToken(const TStringBuf& stringValue); + explicit TToken(i64 int64Value); + explicit TToken(ui64 int64Value); + explicit TToken(double doubleValue); + explicit TToken(bool booleanValue); + + ETokenType GetType() const { + return Type_; + } + + bool IsEmpty() const; + const TStringBuf& GetStringValue() const; + i64 GetInt64Value() const; + ui64 GetUint64Value() const; + double GetDoubleValue() const; + bool GetBooleanValue() const; + + void CheckType(ETokenType expectedType) const; + void Reset(); + + private: + friend class TLexerImpl; + + ETokenType Type_; + + TStringBuf StringValue; + i64 Int64Value; + ui64 Uint64Value; + double DoubleValue; + bool BooleanValue; + }; + + TString ToString(const TToken& token); + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/tokenizer.cpp b/library/cpp/yson/tokenizer.cpp new file mode 100644 index 00000000000..06760170d48 --- /dev/null +++ b/library/cpp/yson/tokenizer.cpp @@ -0,0 +1,37 @@ +#include "tokenizer.h" + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + TTokenizer::TTokenizer(const TStringBuf& input) + : Input(input) + , Parsed(0) + { + } + + bool TTokenizer::ParseNext() { + Input = Input.Tail(Parsed); + Token.Reset(); + Parsed = Lexer.GetToken(Input, &Token); + return !CurrentToken().IsEmpty(); + } + + const TToken& TTokenizer::CurrentToken() const { + return Token; + } + + ETokenType TTokenizer::GetCurrentType() const { + return CurrentToken().GetType(); + } + + TStringBuf TTokenizer::GetCurrentSuffix() const { + return Input.Tail(Parsed); + } + + const TStringBuf& TTokenizer::CurrentInput() const { + return Input; + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/tokenizer.h b/library/cpp/yson/tokenizer.h new file mode 100644 index 00000000000..0576aace95c --- /dev/null +++ b/library/cpp/yson/tokenizer.h @@ -0,0 +1,28 @@ +#pragma once + +#include "public.h" +#include "lexer.h" + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + class TTokenizer { + public: + explicit TTokenizer(const TStringBuf& input); + + bool ParseNext(); + const TToken& CurrentToken() const; + ETokenType GetCurrentType() const; + TStringBuf GetCurrentSuffix() const; + const TStringBuf& CurrentInput() const; + + private: + TStringBuf Input; + TToken Token; + TStatelessLexer Lexer; + size_t Parsed; + }; + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/varint.cpp b/library/cpp/yson/varint.cpp new file mode 100644 index 00000000000..d538ee3cffa --- /dev/null +++ b/library/cpp/yson/varint.cpp @@ -0,0 +1,71 @@ +#include "varint.h" + +#include "zigzag.h" + +#include <util/generic/yexception.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + int WriteVarUInt64(IOutputStream* output, ui64 value) { + bool stop = false; + int bytesWritten = 0; + while (!stop) { + ++bytesWritten; + ui8 byte = static_cast<ui8>(value | 0x80); + value >>= 7; + if (value == 0) { + stop = true; + byte &= 0x7F; + } + output->Write(byte); + } + return bytesWritten; + } + + int WriteVarInt32(IOutputStream* output, i32 value) { + return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode32(value))); + } + + int WriteVarInt64(IOutputStream* output, i64 value) { + return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode64(value))); + } + + int ReadVarUInt64(IInputStream* input, ui64* value) { + size_t count = 0; + ui64 result = 0; + + ui8 byte = 0; + do { + if (7 * count > 8 * sizeof(ui64)) { + ythrow yexception() << "The data is too long to read ui64"; + } + if (input->Read(&byte, 1) != 1) { + ythrow yexception() << "The data is too long to read ui64"; + } + result |= (static_cast<ui64>(byte & 0x7F)) << (7 * count); + ++count; + } while (byte & 0x80); + + *value = result; + return count; + } + + int ReadVarInt32(IInputStream* input, i32* value) { + ui64 varInt; + int bytesRead = ReadVarUInt64(input, &varInt); + if (varInt > Max<ui32>()) { + ythrow yexception() << "The data is too long to read ui64"; + } + *value = ZigZagDecode32(static_cast<ui32>(varInt)); + return bytesRead; + } + + int ReadVarInt64(IInputStream* input, i64* value) { + ui64 varInt; + int bytesRead = ReadVarUInt64(input, &varInt); + *value = ZigZagDecode64(varInt); + return bytesRead; + } + +} // namespace NYson diff --git a/library/cpp/yson/varint.h b/library/cpp/yson/varint.h new file mode 100644 index 00000000000..80b1184e57f --- /dev/null +++ b/library/cpp/yson/varint.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/stream/input.h> +#include <util/stream/output.h> +#include <util/system/defaults.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + // Various functions that read/write varints from/to a stream. + + // Returns the number of bytes written. + int WriteVarUInt64(IOutputStream* output, ui64 value); + int WriteVarInt32(IOutputStream* output, i32 value); + int WriteVarInt64(IOutputStream* output, i64 value); + + // Returns the number of bytes read. + int ReadVarUInt64(IInputStream* input, ui64* value); + int ReadVarInt32(IInputStream* input, i32* value); + int ReadVarInt64(IInputStream* input, i64* value); + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/writer.cpp b/library/cpp/yson/writer.cpp new file mode 100644 index 00000000000..054459f9f5f --- /dev/null +++ b/library/cpp/yson/writer.cpp @@ -0,0 +1,355 @@ +#include "writer.h" + +#include "detail.h" +#include "format.h" +#include "parser.h" +#include "varint.h" +#include "zigzag.h" + +#include <util/string/cast.h> + +#include <cmath> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + // Copied from <util/string/escape.cpp> + namespace { + inline char HexDigit(char value) { + Y_ASSERT(value < 16); + if (value < 10) + return '0' + value; + else + return 'A' + value - 10; + } + + inline char OctDigit(char value) { + Y_ASSERT(value < 8); + return '0' + value; + } + + inline bool IsPrintable(char c) { + return c >= 32 && c <= 126; + } + + inline bool IsHexDigit(char c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } + + inline bool IsOctDigit(char c) { + return c >= '0' && c <= '7'; + } + + const size_t ESCAPE_C_BUFFER_SIZE = 4; + + inline size_t EscapeC(unsigned char c, char next, char r[ESCAPE_C_BUFFER_SIZE]) { + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (IsPrintable(c)) { + r[0] = c; + return 1; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (c < 8 && !IsOctDigit(next)) { + r[0] = '\\'; + r[1] = OctDigit(c); + return 2; + } else if (!IsHexDigit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = HexDigit((c & 0xF0) >> 4); + r[3] = HexDigit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = OctDigit((c & 0700) >> 6); + r[2] = OctDigit((c & 0070) >> 3); + r[3] = OctDigit((c & 0007) >> 0); + return 4; + } + } + + void EscapeC(const char* str, size_t len, IOutputStream& output) { + char buffer[ESCAPE_C_BUFFER_SIZE]; + + size_t i, j; + for (i = 0, j = 0; i < len; ++i) { + size_t rlen = EscapeC(str[i], (i + 1 < len ? str[i + 1] : 0), buffer); + + if (rlen > 1) { + output.Write(str + j, i - j); + j = i + 1; + output.Write(buffer, rlen); + } + } + + if (j > 0) { + output.Write(str + j, len - j); + } else { + output.Write(str, len); + } + } + + TString FloatToStringWithNanInf(double value) { + if (std::isfinite(value)) { + return ::ToString(value); + } + + static const TStringBuf nanLiteral = "%nan"; + static const TStringBuf infLiteral = "%inf"; + static const TStringBuf negativeInfLiteral = "%-inf"; + + TStringBuf str; + if (std::isnan(value)) { + str = nanLiteral; + } else if (value > 0) { + str = infLiteral; + } else { + str = negativeInfLiteral; + } + return TString(str.data(), str.size()); + } + + } + + //////////////////////////////////////////////////////////////////////////////// + + TYsonWriter::TYsonWriter( + IOutputStream* stream, + EYsonFormat format, + EYsonType type, + bool enableRaw) + : Stream(stream) + , Format(format) + , Type(type) + , EnableRaw(enableRaw) + , Depth(0) + , BeforeFirstItem(true) + { + Y_ASSERT(stream); + } + + void TYsonWriter::WriteIndent() { + for (int i = 0; i < IndentSize * Depth; ++i) { + Stream->Write(' '); + } + } + + bool TYsonWriter::IsTopLevelFragmentContext() const { + return Depth == 0 && (Type == ::NYson::EYsonType::ListFragment || Type == ::NYson::EYsonType::MapFragment); + } + + void TYsonWriter::EndNode() { + if (IsTopLevelFragmentContext()) { + ETokenType separatorToken = + Type == ::NYson::EYsonType::ListFragment + ? ListItemSeparatorToken + : KeyedItemSeparatorToken; + Stream->Write(TokenTypeToChar(separatorToken)); + if (Format == EYsonFormat::Text || Format == EYsonFormat::Pretty) { + Stream->Write('\n'); + } + } + } + + void TYsonWriter::BeginCollection(ETokenType beginToken) { + Stream->Write(TokenTypeToChar(beginToken)); + ++Depth; + BeforeFirstItem = true; + } + + void TYsonWriter::CollectionItem(ETokenType separatorToken) { + if (!IsTopLevelFragmentContext()) { + if (!BeforeFirstItem) { + Stream->Write(TokenTypeToChar(separatorToken)); + } + + if (Format == EYsonFormat::Pretty) { + Stream->Write('\n'); + WriteIndent(); + } + } + + BeforeFirstItem = false; + } + + void TYsonWriter::EndCollection(ETokenType endToken) { + --Depth; + if (Format == EYsonFormat::Pretty && !BeforeFirstItem) { + Stream->Write('\n'); + WriteIndent(); + } + Stream->Write(TokenTypeToChar(endToken)); + BeforeFirstItem = false; + } + + void TYsonWriter::WriteStringScalar(const TStringBuf& value) { + if (Format == EYsonFormat::Binary) { + Stream->Write(NDetail::StringMarker); + WriteVarInt32(Stream, static_cast<i32>(value.length())); + Stream->Write(value.begin(), value.length()); + } else { + Stream->Write('"'); + EscapeC(value.data(), value.length(), *Stream); + Stream->Write('"'); + } + } + + void TYsonWriter::OnStringScalar(TStringBuf value) { + WriteStringScalar(value); + EndNode(); + } + + void TYsonWriter::OnInt64Scalar(i64 value) { + if (Format == EYsonFormat::Binary) { + Stream->Write(NDetail::Int64Marker); + WriteVarInt64(Stream, value); + } else { + Stream->Write(::ToString(value)); + } + EndNode(); + } + + void TYsonWriter::OnUint64Scalar(ui64 value) { + if (Format == EYsonFormat::Binary) { + Stream->Write(NDetail::Uint64Marker); + WriteVarUInt64(Stream, value); + } else { + Stream->Write(::ToString(value)); + Stream->Write("u"); + } + EndNode(); + } + + void TYsonWriter::OnDoubleScalar(double value) { + if (Format == EYsonFormat::Binary) { + Stream->Write(NDetail::DoubleMarker); + Stream->Write(&value, sizeof(double)); + } else { + auto str = FloatToStringWithNanInf(value); + Stream->Write(str); + if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) { + Stream->Write("."); + } + } + EndNode(); + } + + void TYsonWriter::OnBooleanScalar(bool value) { + if (Format == EYsonFormat::Binary) { + Stream->Write(value ? NDetail::TrueMarker : NDetail::FalseMarker); + } else { + Stream->Write(value ? "%true" : "%false"); + } + EndNode(); + } + + void TYsonWriter::OnEntity() { + Stream->Write(TokenTypeToChar(EntityToken)); + EndNode(); + } + + void TYsonWriter::OnBeginList() { + BeginCollection(BeginListToken); + } + + void TYsonWriter::OnListItem() { + CollectionItem(ListItemSeparatorToken); + } + + void TYsonWriter::OnEndList() { + EndCollection(EndListToken); + EndNode(); + } + + void TYsonWriter::OnBeginMap() { + BeginCollection(BeginMapToken); + } + + void TYsonWriter::OnKeyedItem(TStringBuf key) { + CollectionItem(KeyedItemSeparatorToken); + + WriteStringScalar(key); + + if (Format == NYson::EYsonFormat::Pretty) { + Stream->Write(' '); + } + Stream->Write(TokenTypeToChar(KeyValueSeparatorToken)); + if (Format == NYson::EYsonFormat::Pretty) { + Stream->Write(' '); + } + + BeforeFirstItem = false; + } + + void TYsonWriter::OnEndMap() { + EndCollection(EndMapToken); + EndNode(); + } + + void TYsonWriter::OnBeginAttributes() { + BeginCollection(BeginAttributesToken); + } + + void TYsonWriter::OnEndAttributes() { + EndCollection(EndAttributesToken); + if (Format == NYson::EYsonFormat::Pretty) { + Stream->Write(' '); + } + } + + void TYsonWriter::OnRaw(TStringBuf yson, EYsonType type) { + if (EnableRaw) { + Stream->Write(yson); + BeforeFirstItem = false; + } else { + TYsonConsumerBase::OnRaw(yson, type); + } + } + + TYsonWriter::TState TYsonWriter::State() const { + TState state; + state.Depth = Depth; + state.BeforeFirstItem = BeforeFirstItem; + return state; + } + + void TYsonWriter::Reset(const TState& state) { + Depth = state.Depth; + BeforeFirstItem = state.BeforeFirstItem; + } + + //////////////////////////////////////////////////////////////////////////////// + + void ReformatYsonStream( + IInputStream* input, + IOutputStream* output, + EYsonFormat format, + EYsonType type) { + TYsonWriter writer(output, format, type); + TYsonParser parser(&writer, input, type); + parser.Parse(); + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/writer.h b/library/cpp/yson/writer.h new file mode 100644 index 00000000000..40f5d7d5014 --- /dev/null +++ b/library/cpp/yson/writer.h @@ -0,0 +1,89 @@ +#pragma once + +#include "public.h" +#include "token.h" +#include "consumer.h" + +#include <util/generic/noncopyable.h> + +class IOutputStream; +class IZeroCopyInput; + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + class TYsonWriter + : public TYsonConsumerBase, + private TNonCopyable { + public: + class TState { + private: + int Depth; + bool BeforeFirstItem; + + friend class TYsonWriter; + }; + + public: + TYsonWriter( + IOutputStream* stream, + EYsonFormat format = EYsonFormat::Binary, + EYsonType type = ::NYson::EYsonType::Node, + bool enableRaw = false); + + void OnStringScalar(TStringBuf value) override; + void OnInt64Scalar(i64 value) override; + void OnUint64Scalar(ui64 value) override; + void OnDoubleScalar(double value) override; + void OnBooleanScalar(bool value) override; + void OnEntity() override; + + void OnBeginList() override; + void OnListItem() override; + void OnEndList() override; + + void OnBeginMap() override; + void OnKeyedItem(TStringBuf key) override; + void OnEndMap() override; + + void OnBeginAttributes() override; + void OnEndAttributes() override; + + void OnRaw(TStringBuf yson, EYsonType type = ::NYson::EYsonType::Node) override; + + TState State() const; + void Reset(const TState& state); + + protected: + IOutputStream* Stream; + EYsonFormat Format; + EYsonType Type; + bool EnableRaw; + + int Depth; + bool BeforeFirstItem; + + static const int IndentSize = 4; + + void WriteIndent(); + void WriteStringScalar(const TStringBuf& value); + + void BeginCollection(ETokenType beginToken); + void CollectionItem(ETokenType separatorToken); + void EndCollection(ETokenType endToken); + + bool IsTopLevelFragmentContext() const; + void EndNode(); + }; + + //////////////////////////////////////////////////////////////////////////////// + + void ReformatYsonStream( + IInputStream* input, + IOutputStream* output, + EYsonFormat format = EYsonFormat::Binary, + EYsonType type = ::NYson::EYsonType::Node); + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson diff --git a/library/cpp/yson/ya.make b/library/cpp/yson/ya.make new file mode 100644 index 00000000000..c55a189b105 --- /dev/null +++ b/library/cpp/yson/ya.make @@ -0,0 +1,23 @@ +LIBRARY() + +OWNER( + ermolovd + g:yt +) + +PEERDIR( + library/cpp/yt/misc + library/cpp/yt/yson +) + +SRCS( + consumer.cpp + lexer.cpp + parser.cpp + token.cpp + tokenizer.cpp + varint.cpp + writer.cpp +) + +END() diff --git a/library/cpp/yson/zigzag.h b/library/cpp/yson/zigzag.h new file mode 100644 index 00000000000..2f1190508fb --- /dev/null +++ b/library/cpp/yson/zigzag.h @@ -0,0 +1,31 @@ +#pragma once + +#include <util/system/defaults.h> + +namespace NYson { + //////////////////////////////////////////////////////////////////////////////// + + //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x| + //! Actually taken 'as is' from protobuf/wire_format_lite.h + + inline ui32 ZigZagEncode32(i32 n) { + // Note: the right-shift must be arithmetic + return (ui32(n) << 1) ^ (n >> 31); + } + + inline i32 ZigZagDecode32(ui32 n) { + return (n >> 1) ^ -static_cast<i32>(n & 1); + } + + inline ui64 ZigZagEncode64(i64 n) { + // Note: the right-shift must be arithmetic + return (ui64(n) << 1) ^ (n >> 63); + } + + inline i64 ZigZagDecode64(ui64 n) { + return (n >> 1) ^ -static_cast<i64>(n & 1); + } + + //////////////////////////////////////////////////////////////////////////////// + +} // namespace NYson |