summaryrefslogtreecommitdiffstats
path: root/library/cpp/yson
diff options
context:
space:
mode:
authorDevtools Arcadia <[email protected]>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <[email protected]>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yson
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yson')
-rw-r--r--library/cpp/yson/consumer.cpp15
-rw-r--r--library/cpp/yson/consumer.h13
-rw-r--r--library/cpp/yson/detail.h806
-rw-r--r--library/cpp/yson/format.h25
-rw-r--r--library/cpp/yson/json/json_writer.cpp220
-rw-r--r--library/cpp/yson/json/json_writer.h89
-rw-r--r--library/cpp/yson/json/ya.make17
-rw-r--r--library/cpp/yson/json/yson2json_adapter.cpp82
-rw-r--r--library/cpp/yson/json/yson2json_adapter.h53
-rw-r--r--library/cpp/yson/lexer.cpp43
-rw-r--r--library/cpp/yson/lexer.h26
-rw-r--r--library/cpp/yson/lexer_detail.h296
-rw-r--r--library/cpp/yson/node/node.cpp915
-rw-r--r--library/cpp/yson/node/node.h523
-rw-r--r--library/cpp/yson/node/node_builder.cpp96
-rw-r--r--library/cpp/yson/node/node_builder.h46
-rw-r--r--library/cpp/yson/node/node_io.cpp154
-rw-r--r--library/cpp/yson/node/node_io.h40
-rw-r--r--library/cpp/yson/node/node_ut.cpp484
-rw-r--r--library/cpp/yson/node/node_visitor.cpp152
-rw-r--r--library/cpp/yson/node/node_visitor.h37
-rw-r--r--library/cpp/yson/node/pybind/node.cpp105
-rw-r--r--library/cpp/yson/node/pybind/node.h9
-rw-r--r--library/cpp/yson/node/pybind/ya.make16
-rw-r--r--library/cpp/yson/node/serialize.cpp101
-rw-r--r--library/cpp/yson/node/serialize.h45
-rw-r--r--library/cpp/yson/node/ut/ya.make12
-rw-r--r--library/cpp/yson/node/ya.make25
-rw-r--r--library/cpp/yson/parser.cpp179
-rw-r--r--library/cpp/yson/parser.h83
-rw-r--r--library/cpp/yson/parser_detail.h381
-rw-r--r--library/cpp/yson/public.h30
-rw-r--r--library/cpp/yson/string-inl.h57
-rw-r--r--library/cpp/yson/token.cpp236
-rw-r--r--library/cpp/yson/token.h93
-rw-r--r--library/cpp/yson/tokenizer.cpp37
-rw-r--r--library/cpp/yson/tokenizer.h28
-rw-r--r--library/cpp/yson/varint.cpp71
-rw-r--r--library/cpp/yson/varint.h24
-rw-r--r--library/cpp/yson/writer.cpp355
-rw-r--r--library/cpp/yson/writer.h89
-rw-r--r--library/cpp/yson/ya.make23
-rw-r--r--library/cpp/yson/zigzag.h31
43 files changed, 6162 insertions, 0 deletions
diff --git a/library/cpp/yson/consumer.cpp b/library/cpp/yson/consumer.cpp
new file mode 100644
index 00000000000..40ae452978c
--- /dev/null
+++ b/library/cpp/yson/consumer.cpp
@@ -0,0 +1,15 @@
+#include "consumer.h"
+#include "string.h"
+#include "parser.h"
+
+namespace NYson {
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ void TYsonConsumerBase::OnRaw(TStringBuf str, NYT::NYson::EYsonType type) {
+ ParseYsonStringBuffer(str, this, type);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/consumer.h b/library/cpp/yson/consumer.h
new file mode 100644
index 00000000000..d5a9d663355
--- /dev/null
+++ b/library/cpp/yson/consumer.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <library/cpp/yt/yson/consumer.h>
+
+#include <util/generic/strbuf.h>
+#include <util/system/defaults.h>
+
+namespace NYson {
+ struct TYsonConsumerBase
+ : public virtual NYT::NYson::IYsonConsumer {
+ void OnRaw(TStringBuf ysonNode, NYT::NYson::EYsonType type) override;
+ };
+} // namespace NYson
diff --git a/library/cpp/yson/detail.h b/library/cpp/yson/detail.h
new file mode 100644
index 00000000000..27f5e8ffff2
--- /dev/null
+++ b/library/cpp/yson/detail.h
@@ -0,0 +1,806 @@
+#pragma once
+
+#include "public.h"
+#include "zigzag.h"
+
+#include <util/generic/vector.h>
+#include <util/generic/maybe.h>
+#include <util/generic/buffer.h>
+#include <util/string/escape.h>
+#include <util/string/cast.h>
+#include <util/stream/input.h>
+
+namespace NYson {
+ namespace NDetail {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ //! Indicates the beginning of a list.
+ const char BeginListSymbol = '[';
+ //! Indicates the end of a list.
+ const char EndListSymbol = ']';
+
+ //! Indicates the beginning of a map.
+ const char BeginMapSymbol = '{';
+ //! Indicates the end of a map.
+ const char EndMapSymbol = '}';
+
+ //! Indicates the beginning of an attribute map.
+ const char BeginAttributesSymbol = '<';
+ //! Indicates the end of an attribute map.
+ const char EndAttributesSymbol = '>';
+
+ //! Separates items in lists.
+ const char ListItemSeparatorSymbol = ';';
+ //! Separates items in maps, attributes.
+ const char KeyedItemSeparatorSymbol = ';';
+ //! Separates keys from values in maps.
+ const char KeyValueSeparatorSymbol = '=';
+
+ //! Indicates an entity.
+ const char EntitySymbol = '#';
+
+ //! Indicates end of stream.
+ const char EndSymbol = '\0';
+
+ //! Marks the beginning of a binary string literal.
+ const char StringMarker = '\x01';
+ //! Marks the beginning of a binary i64 literal.
+ const char Int64Marker = '\x02';
+ //! Marks the beginning of a binary double literal.
+ const char DoubleMarker = '\x03';
+ //! Marks true and false values of boolean.
+ const char FalseMarker = '\x04';
+ const char TrueMarker = '\x05';
+ //! Marks the beginning of a binary ui64 literal.
+ const char Uint64Marker = '\x06';
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template <bool EnableLinePositionInfo>
+ class TPositionInfo;
+
+ template <>
+ class TPositionInfo<true> {
+ private:
+ int Offset;
+ int Line;
+ int Column;
+
+ public:
+ TPositionInfo()
+ : Offset(0)
+ , Line(1)
+ , Column(1)
+ {
+ }
+
+ void OnRangeConsumed(const char* begin, const char* end) {
+ Offset += end - begin;
+ for (auto current = begin; current != end; ++current) {
+ ++Column;
+ if (*current == '\n') { //TODO: memchr
+ ++Line;
+ Column = 1;
+ }
+ }
+ }
+ };
+
+ template <>
+ class TPositionInfo<false> {
+ private:
+ int Offset;
+
+ public:
+ TPositionInfo()
+ : Offset(0)
+ {
+ }
+
+ void OnRangeConsumed(const char* begin, const char* end) {
+ Offset += end - begin;
+ }
+ };
+
+ template <class TBlockStream, class TPositionBase>
+ class TCharStream
+ : public TBlockStream,
+ public TPositionBase {
+ public:
+ TCharStream(const TBlockStream& blockStream)
+ : TBlockStream(blockStream)
+ {
+ }
+
+ bool IsEmpty() const {
+ return TBlockStream::Begin() == TBlockStream::End();
+ }
+
+ template <bool AllowFinish>
+ void Refresh() {
+ while (IsEmpty() && !TBlockStream::IsFinished()) {
+ TBlockStream::RefreshBlock();
+ }
+ if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) {
+ ythrow TYsonException() << "Premature end of yson stream";
+ }
+ }
+
+ void Refresh() {
+ return Refresh<false>();
+ }
+
+ template <bool AllowFinish>
+ char GetChar() {
+ Refresh<AllowFinish>();
+ return !IsEmpty() ? *TBlockStream::Begin() : '\0';
+ }
+
+ char GetChar() {
+ return GetChar<false>();
+ }
+
+ void Advance(size_t bytes) {
+ TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes);
+ TBlockStream::Advance(bytes);
+ }
+
+ size_t Length() const {
+ return TBlockStream::End() - TBlockStream::Begin();
+ }
+ };
+
+ template <class TBaseStream>
+ class TCodedStream
+ : public TBaseStream {
+ private:
+ static const int MaxVarintBytes = 10;
+ static const int MaxVarint32Bytes = 5;
+
+ const ui8* BeginByte() const {
+ return reinterpret_cast<const ui8*>(TBaseStream::Begin());
+ }
+
+ const ui8* EndByte() const {
+ return reinterpret_cast<const ui8*>(TBaseStream::End());
+ }
+
+ // Following functions is an adaptation Protobuf code from coded_stream.cc
+ bool ReadVarint32FromArray(ui32* value) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+ const ui8* ptr = BeginByte();
+ ui32 b;
+ ui32 result;
+
+ b = *(ptr++);
+ result = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= (b & 0x7F) << 14;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= (b & 0x7F) << 21;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ result |= b << 28;
+ if (!(b & 0x80))
+ goto done;
+
+ // If the input is larger than 32 bits, we still need to read it all
+ // and discard the high-order bits.
+
+ for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) {
+ b = *(ptr++);
+ if (!(b & 0x80))
+ goto done;
+ }
+
+ // We have overrun the maximum size of a Varint (10 bytes). Assume
+ // the data is corrupt.
+ return false;
+
+ done:
+ TBaseStream::Advance(ptr - BeginByte());
+ *value = result;
+ return true;
+ }
+
+ bool ReadVarint32Fallback(ui32* value) {
+ if (BeginByte() + MaxVarint32Bytes <= EndByte() ||
+ // Optimization: If the Varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80)))
+ {
+ return ReadVarint32FromArray(value);
+ } else {
+ // Really slow case: we will incur the cost of an extra function call here,
+ // but moving this out of line reduces the size of this function, which
+ // improves the common case. In micro benchmarks, this is worth about 10-15%
+ return ReadVarint32Slow(value);
+ }
+ }
+
+ bool ReadVarint32Slow(ui32* value) {
+ ui64 result;
+ // Directly invoke ReadVarint64Fallback, since we already tried to optimize
+ // for one-byte Varints.
+ if (ReadVarint64Fallback(&result)) {
+ *value = static_cast<ui32>(result);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool ReadVarint64Slow(ui64* value) {
+ // Slow path: This read might cross the end of the buffer, so we
+ // need to check and refresh the buffer if and when it does.
+
+ ui64 result = 0;
+ int count = 0;
+ ui32 b;
+
+ do {
+ if (count == MaxVarintBytes) {
+ return false;
+ }
+ while (BeginByte() == EndByte()) {
+ TBaseStream::Refresh();
+ }
+ b = *BeginByte();
+ result |= static_cast<ui64>(b & 0x7F) << (7 * count);
+ TBaseStream::Advance(1);
+ ++count;
+ } while (b & 0x80);
+
+ *value = result;
+ return true;
+ }
+
+ bool ReadVarint64Fallback(ui64* value) {
+ if (BeginByte() + MaxVarintBytes <= EndByte() ||
+ // Optimization: If the Varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80)))
+ {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+
+ const ui8* ptr = BeginByte();
+ ui32 b;
+
+ // Splitting into 32-bit pieces gives better performance on 32-bit
+ // processors.
+ ui32 part0 = 0, part1 = 0, part2 = 0;
+
+ b = *(ptr++);
+ part0 = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part0 |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part0 |= (b & 0x7F) << 14;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part0 |= (b & 0x7F) << 21;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 |= (b & 0x7F) << 14;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part1 |= (b & 0x7F) << 21;
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part2 = (b & 0x7F);
+ if (!(b & 0x80))
+ goto done;
+ b = *(ptr++);
+ part2 |= (b & 0x7F) << 7;
+ if (!(b & 0x80))
+ goto done;
+
+ // We have overrun the maximum size of a Varint (10 bytes). The data
+ // must be corrupt.
+ return false;
+
+ done:
+ TBaseStream::Advance(ptr - BeginByte());
+ *value = (static_cast<ui64>(part0)) |
+ (static_cast<ui64>(part1) << 28) |
+ (static_cast<ui64>(part2) << 56);
+ return true;
+ } else {
+ return ReadVarint64Slow(value);
+ }
+ }
+
+ public:
+ TCodedStream(const TBaseStream& baseStream)
+ : TBaseStream(baseStream)
+ {
+ }
+
+ bool ReadVarint64(ui64* value) {
+ if (BeginByte() < EndByte() && *BeginByte() < 0x80) {
+ *value = *BeginByte();
+ TBaseStream::Advance(1);
+ return true;
+ } else {
+ return ReadVarint64Fallback(value);
+ }
+ }
+
+ bool ReadVarint32(ui32* value) {
+ if (BeginByte() < EndByte() && *BeginByte() < 0x80) {
+ *value = *BeginByte();
+ TBaseStream::Advance(1);
+ return true;
+ } else {
+ return ReadVarint32Fallback(value);
+ }
+ }
+ };
+
+ enum ENumericResult {
+ Int64 = 0,
+ Uint64 = 1,
+ Double = 2
+ };
+
+ template <class TBlockStream, bool EnableLinePositionInfo>
+ class TLexerBase
+ : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> {
+ private:
+ using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>;
+ TVector<char> Buffer_;
+ TMaybe<ui64> MemoryLimit_;
+
+ void CheckMemoryLimit() {
+ if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) {
+ ythrow TYsonException()
+ << "Memory limit exceeded while parsing YSON stream: allocated "
+ << Buffer_.capacity() << ", limit " << (*MemoryLimit_);
+ }
+ }
+
+ public:
+ TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit)
+ : TBaseStream(blockStream)
+ , MemoryLimit_(memoryLimit)
+ {
+ }
+
+ protected:
+ /// Lexer routines
+
+ template <bool AllowFinish>
+ ENumericResult ReadNumeric(TStringBuf* value) {
+ Buffer_.clear();
+ ENumericResult result = ENumericResult::Int64;
+ while (true) {
+ char ch = TBaseStream::template GetChar<AllowFinish>();
+ if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-'
+ Buffer_.push_back(ch);
+ } else if (ch == '.' || ch == 'e' || ch == 'E') {
+ Buffer_.push_back(ch);
+ result = ENumericResult::Double;
+ } else if (ch == 'u') {
+ Buffer_.push_back(ch);
+ result = ENumericResult::Uint64;
+ } else if (isalpha(ch)) {
+ ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal";
+ } else {
+ break;
+ }
+ CheckMemoryLimit();
+ TBaseStream::Advance(1);
+ }
+
+ *value = TStringBuf(Buffer_.data(), Buffer_.size());
+ return result;
+ }
+
+ template <bool AllowFinish>
+ double ReadNanOrInf() {
+ static const TStringBuf nanString = "nan";
+ static const TStringBuf infString = "inf";
+ static const TStringBuf plusInfString = "+inf";
+ static const TStringBuf minusInfString = "-inf";
+
+ TStringBuf expectedString;
+ double expectedValue;
+ char ch = TBaseStream::template GetChar<AllowFinish>();
+ switch (ch) {
+ case '+':
+ expectedString = plusInfString;
+ expectedValue = std::numeric_limits<double>::infinity();
+ break;
+ case '-':
+ expectedString = minusInfString;
+ expectedValue = -std::numeric_limits<double>::infinity();
+ break;
+ case 'i':
+ expectedString = infString;
+ expectedValue = std::numeric_limits<double>::infinity();
+ break;
+ case 'n':
+ expectedString = nanString;
+ expectedValue = std::numeric_limits<double>::quiet_NaN();
+ break;
+ default:
+ ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'";
+ }
+
+ for (size_t i = 0; i < expectedString.size(); ++i) {
+ if (expectedString[i] != ch) {
+ ythrow TYsonException()
+ << "Incorrect %-literal prefix "
+ << "'" << expectedString.SubStr(0, i) << ch << "',"
+ << "expected " << expectedString;
+ }
+ TBaseStream::Advance(1);
+ ch = TBaseStream::template GetChar<AllowFinish>();
+ }
+
+ return expectedValue;
+ }
+
+ void ReadQuotedString(TStringBuf* value) {
+ Buffer_.clear();
+ while (true) {
+ if (TBaseStream::IsEmpty()) {
+ TBaseStream::Refresh();
+ }
+ char ch = *TBaseStream::Begin();
+ TBaseStream::Advance(1);
+ if (ch != '"') {
+ Buffer_.push_back(ch);
+ } else {
+ // We must count the number of '\' at the end of StringValue
+ // to check if it's not \"
+ int slashCount = 0;
+ int length = Buffer_.size();
+ while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') {
+ ++slashCount;
+ }
+ if (slashCount % 2 == 0) {
+ break;
+ } else {
+ Buffer_.push_back(ch);
+ }
+ }
+ CheckMemoryLimit();
+ }
+
+ auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size());
+ Buffer_.clear();
+ Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size());
+ CheckMemoryLimit();
+ *value = TStringBuf(Buffer_.data(), Buffer_.size());
+ }
+
+ template <bool AllowFinish>
+ void ReadUnquotedString(TStringBuf* value) {
+ Buffer_.clear();
+ while (true) {
+ char ch = TBaseStream::template GetChar<AllowFinish>();
+ if (isalpha(ch) || isdigit(ch) ||
+ ch == '_' || ch == '-' || ch == '%' || ch == '.') {
+ Buffer_.push_back(ch);
+ } else {
+ break;
+ }
+ CheckMemoryLimit();
+ TBaseStream::Advance(1);
+ }
+ *value = TStringBuf(Buffer_.data(), Buffer_.size());
+ }
+
+ void ReadUnquotedString(TStringBuf* value) {
+ return ReadUnquotedString<false>(value);
+ }
+
+ void ReadBinaryString(TStringBuf* value) {
+ ui32 ulength = 0;
+ if (!TBaseStream::ReadVarint32(&ulength)) {
+ ythrow TYsonException() << "Error parsing varint value";
+ }
+
+ i32 length = ZigZagDecode32(ulength);
+ if (length < 0) {
+ ythrow TYsonException() << "Negative binary string literal length " << length;
+ }
+
+ if (TBaseStream::Begin() + length <= TBaseStream::End()) {
+ *value = TStringBuf(TBaseStream::Begin(), length);
+ TBaseStream::Advance(length);
+ } else { // reading in Buffer
+ size_t needToRead = length;
+ Buffer_.clear();
+ while (needToRead) {
+ if (TBaseStream::IsEmpty()) {
+ TBaseStream::Refresh();
+ continue;
+ }
+ size_t readingBytes = Min(needToRead, TBaseStream::Length());
+
+ Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes);
+ CheckMemoryLimit();
+ needToRead -= readingBytes;
+ TBaseStream::Advance(readingBytes);
+ }
+ *value = TStringBuf(Buffer_.data(), Buffer_.size());
+ }
+ }
+
+ template <bool AllowFinish>
+ bool ReadBoolean() {
+ Buffer_.clear();
+
+ static TStringBuf trueString = "true";
+ static TStringBuf falseString = "false";
+
+ auto throwIncorrectBoolean = [&]() {
+ ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size());
+ };
+
+ Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
+ TBaseStream::Advance(1);
+ if (Buffer_[0] == trueString[0]) {
+ for (size_t i = 1; i < trueString.size(); ++i) {
+ Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
+ TBaseStream::Advance(1);
+ if (Buffer_.back() != trueString[i]) {
+ throwIncorrectBoolean();
+ }
+ }
+ return true;
+ } else if (Buffer_[0] == falseString[0]) {
+ for (size_t i = 1; i < falseString.size(); ++i) {
+ Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
+ TBaseStream::Advance(1);
+ if (Buffer_.back() != falseString[i]) {
+ throwIncorrectBoolean();
+ }
+ }
+ return false;
+ } else {
+ throwIncorrectBoolean();
+ }
+
+ Y_FAIL("unreachable");
+ ;
+ }
+
+ void ReadBinaryInt64(i64* result) {
+ ui64 uvalue;
+ if (!TBaseStream::ReadVarint64(&uvalue)) {
+ ythrow TYsonException() << "Error parsing varint value";
+ }
+ *result = ZigZagDecode64(uvalue);
+ }
+
+ void ReadBinaryUint64(ui64* result) {
+ ui64 uvalue;
+ if (!TBaseStream::ReadVarint64(&uvalue)) {
+ ythrow TYsonException() << "Error parsing varint value";
+ }
+ *result = uvalue;
+ }
+
+ void ReadBinaryDouble(double* value) {
+ size_t needToRead = sizeof(double);
+
+ while (needToRead != 0) {
+ if (TBaseStream::IsEmpty()) {
+ TBaseStream::Refresh();
+ continue;
+ }
+
+ size_t chunkSize = Min(needToRead, TBaseStream::Length());
+ if (chunkSize == 0) {
+ ythrow TYsonException() << "Error parsing binary double literal";
+ }
+ std::copy(
+ TBaseStream::Begin(),
+ TBaseStream::Begin() + chunkSize,
+ reinterpret_cast<char*>(value) + (sizeof(double) - needToRead));
+ needToRead -= chunkSize;
+ TBaseStream::Advance(chunkSize);
+ }
+ }
+
+ /// Helpers
+ void SkipCharToken(char symbol) {
+ char ch = SkipSpaceAndGetChar();
+ if (ch != symbol) {
+ ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'";
+ }
+
+ TBaseStream::Advance(1);
+ }
+
+ static bool IsSpaceFast(char ch) {
+ static const ui8 lookupTable[] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ return lookupTable[static_cast<ui8>(ch)];
+ }
+
+ template <bool AllowFinish>
+ char SkipSpaceAndGetChar() {
+ if (!TBaseStream::IsEmpty()) {
+ char ch = *TBaseStream::Begin();
+ if (!IsSpaceFast(ch)) {
+ return ch;
+ }
+ }
+ return SkipSpaceAndGetCharFallback<AllowFinish>();
+ }
+
+ char SkipSpaceAndGetChar() {
+ return SkipSpaceAndGetChar<false>();
+ }
+
+ template <bool AllowFinish>
+ char SkipSpaceAndGetCharFallback() {
+ while (true) {
+ if (TBaseStream::IsEmpty()) {
+ if (TBaseStream::IsFinished()) {
+ return '\0';
+ }
+ TBaseStream::template Refresh<AllowFinish>();
+ continue;
+ }
+ if (!IsSpaceFast(*TBaseStream::Begin())) {
+ break;
+ }
+ TBaseStream::Advance(1);
+ }
+ return TBaseStream::template GetChar<AllowFinish>();
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TStringReader {
+ private:
+ const char* BeginPtr;
+ const char* EndPtr;
+
+ public:
+ TStringReader()
+ : BeginPtr(nullptr)
+ , EndPtr(nullptr)
+ {
+ }
+
+ TStringReader(const char* begin, const char* end)
+ : BeginPtr(begin)
+ , EndPtr(end)
+ {
+ }
+
+ const char* Begin() const {
+ return BeginPtr;
+ }
+
+ const char* End() const {
+ return EndPtr;
+ }
+
+ void RefreshBlock() {
+ Y_FAIL("unreachable");
+ }
+
+ void Advance(size_t bytes) {
+ BeginPtr += bytes;
+ }
+
+ bool IsFinished() const {
+ return true;
+ }
+
+ void SetBuffer(const char* begin, const char* end) {
+ BeginPtr = begin;
+ EndPtr = end;
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TStreamReader {
+ public:
+ TStreamReader(
+ IInputStream* stream,
+ char* buffer,
+ size_t bufferSize)
+ : Stream(stream)
+ , Buffer(buffer)
+ , BufferSize(bufferSize)
+ {
+ BeginPtr = EndPtr = Buffer;
+ FinishFlag = false;
+ }
+
+ const char* Begin() const {
+ return BeginPtr;
+ }
+
+ const char* End() const {
+ return EndPtr;
+ }
+
+ void RefreshBlock() {
+ size_t bytes = Stream->Read(Buffer, BufferSize);
+ BeginPtr = Buffer;
+ EndPtr = Buffer + bytes;
+ FinishFlag = (bytes == 0);
+ }
+
+ void Advance(size_t bytes) {
+ BeginPtr += bytes;
+ }
+
+ bool IsFinished() const {
+ return FinishFlag;
+ }
+
+ private:
+ IInputStream* Stream;
+ char* Buffer;
+ size_t BufferSize;
+
+ const char* BeginPtr;
+ const char* EndPtr;
+ bool FinishFlag;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/format.h b/library/cpp/yson/format.h
new file mode 100644
index 00000000000..2ff6dc9f6e2
--- /dev/null
+++ b/library/cpp/yson/format.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "token.h"
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ const ETokenType BeginListToken = LeftBracket;
+ const ETokenType EndListToken = RightBracket;
+
+ const ETokenType BeginMapToken = LeftBrace;
+ const ETokenType EndMapToken = RightBrace;
+
+ const ETokenType BeginAttributesToken = LeftAngle;
+ const ETokenType EndAttributesToken = RightAngle;
+
+ const ETokenType ListItemSeparatorToken = Semicolon;
+ const ETokenType KeyedItemSeparatorToken = Semicolon;
+ const ETokenType KeyValueSeparatorToken = Equals;
+
+ const ETokenType EntityToken = Hash;
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/json/json_writer.cpp b/library/cpp/yson/json/json_writer.cpp
new file mode 100644
index 00000000000..87481256ecd
--- /dev/null
+++ b/library/cpp/yson/json/json_writer.cpp
@@ -0,0 +1,220 @@
+#include "json_writer.h"
+
+#include <library/cpp/json/json_writer.h>
+
+namespace NYT {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ static bool IsSpecialJsonKey(const TStringBuf& key) {
+ return key.size() > 0 && key[0] == '$';
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TJsonWriter::TJsonWriter(
+ IOutputStream* output,
+ ::NYson::EYsonType type,
+ EJsonFormat format,
+ EJsonAttributesMode attributesMode,
+ ESerializedBoolFormat booleanFormat)
+ : TJsonWriter(
+ output,
+ NJson::TJsonWriterConfig{}.SetFormatOutput(format == JF_PRETTY),
+ type,
+ attributesMode,
+ booleanFormat
+ )
+ {}
+
+ TJsonWriter::TJsonWriter(
+ IOutputStream* output,
+ NJson::TJsonWriterConfig config,
+ ::NYson::EYsonType type,
+ EJsonAttributesMode attributesMode,
+ ESerializedBoolFormat booleanFormat)
+ : Output(output)
+ , Type(type)
+ , AttributesMode(attributesMode)
+ , BooleanFormat(booleanFormat)
+ , Depth(0)
+ {
+ if (Type == ::NYson::EYsonType::MapFragment) {
+ ythrow ::NYson::TYsonException() << ("Map fragments are not supported by Json");
+ }
+
+ UnderlyingJsonWriter.Reset(new NJson::TJsonWriter(
+ output,
+ config));
+ JsonWriter = UnderlyingJsonWriter.Get();
+ HasAttributes = false;
+ InAttributesBalance = 0;
+ }
+
+ void TJsonWriter::EnterNode() {
+ if (AttributesMode == JAM_NEVER) {
+ HasAttributes = false;
+ } else if (AttributesMode == JAM_ON_DEMAND) {
+ // Do nothing
+ } else if (AttributesMode == JAM_ALWAYS) {
+ if (!HasAttributes) {
+ JsonWriter->OpenMap();
+ JsonWriter->Write("$attributes");
+ JsonWriter->OpenMap();
+ JsonWriter->CloseMap();
+ }
+ HasAttributes = true;
+ }
+ HasUnfoldedStructureStack.push_back(HasAttributes);
+
+ if (HasAttributes) {
+ JsonWriter->Write("$value");
+ HasAttributes = false;
+ }
+
+ Depth += 1;
+ }
+
+ void TJsonWriter::LeaveNode() {
+ Y_ASSERT(!HasUnfoldedStructureStack.empty());
+ if (HasUnfoldedStructureStack.back()) {
+ // Close map of the {$attributes, $value}
+ JsonWriter->CloseMap();
+ }
+ HasUnfoldedStructureStack.pop_back();
+
+ Depth -= 1;
+
+ if (Depth == 0 && Type == ::NYson::EYsonType::ListFragment && InAttributesBalance == 0) {
+ JsonWriter->Flush();
+ Output->Write("\n");
+ }
+ }
+
+ bool TJsonWriter::IsWriteAllowed() {
+ if (AttributesMode == JAM_NEVER) {
+ return InAttributesBalance == 0;
+ }
+ return true;
+ }
+
+ void TJsonWriter::OnStringScalar(TStringBuf value) {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ WriteStringScalar(value);
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnInt64Scalar(i64 value) {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ JsonWriter->Write(value);
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnUint64Scalar(ui64 value) {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ JsonWriter->Write(value);
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnDoubleScalar(double value) {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ JsonWriter->Write(value);
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnBooleanScalar(bool value) {
+ if (IsWriteAllowed()) {
+ if (BooleanFormat == SBF_STRING) {
+ OnStringScalar(value ? "true" : "false");
+ } else {
+ EnterNode();
+ JsonWriter->Write(value);
+ LeaveNode();
+ }
+ }
+ }
+
+ void TJsonWriter::OnEntity() {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ JsonWriter->WriteNull();
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnBeginList() {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ JsonWriter->OpenArray();
+ }
+ }
+
+ void TJsonWriter::OnListItem() {
+ }
+
+ void TJsonWriter::OnEndList() {
+ if (IsWriteAllowed()) {
+ JsonWriter->CloseArray();
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnBeginMap() {
+ if (IsWriteAllowed()) {
+ EnterNode();
+ JsonWriter->OpenMap();
+ }
+ }
+
+ void TJsonWriter::OnKeyedItem(TStringBuf name) {
+ if (IsWriteAllowed()) {
+ if (IsSpecialJsonKey(name)) {
+ WriteStringScalar(TString("$") + name);
+ } else {
+ WriteStringScalar(name);
+ }
+ }
+ }
+
+ void TJsonWriter::OnEndMap() {
+ if (IsWriteAllowed()) {
+ JsonWriter->CloseMap();
+ LeaveNode();
+ }
+ }
+
+ void TJsonWriter::OnBeginAttributes() {
+ InAttributesBalance += 1;
+ if (AttributesMode != JAM_NEVER) {
+ JsonWriter->OpenMap();
+ JsonWriter->Write("$attributes");
+ JsonWriter->OpenMap();
+ }
+ }
+
+ void TJsonWriter::OnEndAttributes() {
+ InAttributesBalance -= 1;
+ if (AttributesMode != JAM_NEVER) {
+ HasAttributes = true;
+ JsonWriter->CloseMap();
+ }
+ }
+
+ void TJsonWriter::WriteStringScalar(const TStringBuf& value) {
+ JsonWriter->Write(value);
+ }
+
+ void TJsonWriter::Flush() {
+ JsonWriter->Flush();
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+}
diff --git a/library/cpp/yson/json/json_writer.h b/library/cpp/yson/json/json_writer.h
new file mode 100644
index 00000000000..d84ac0de530
--- /dev/null
+++ b/library/cpp/yson/json/json_writer.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include <library/cpp/yson/public.h>
+#include <library/cpp/yson/consumer.h>
+
+#include <library/cpp/json/json_writer.h>
+
+#include <util/generic/vector.h>
+
+namespace NYT {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ enum EJsonFormat {
+ JF_TEXT,
+ JF_PRETTY
+ };
+
+ enum EJsonAttributesMode {
+ JAM_NEVER,
+ JAM_ON_DEMAND,
+ JAM_ALWAYS
+ };
+
+ enum ESerializedBoolFormat {
+ SBF_BOOLEAN,
+ SBF_STRING
+ };
+
+ class TJsonWriter
+ : public ::NYson::TYsonConsumerBase {
+ public:
+ TJsonWriter(
+ IOutputStream* output,
+ ::NYson::EYsonType type = ::NYson::EYsonType::Node,
+ EJsonFormat format = JF_TEXT,
+ EJsonAttributesMode attributesMode = JAM_ON_DEMAND,
+ ESerializedBoolFormat booleanFormat = SBF_STRING);
+
+ TJsonWriter(
+ IOutputStream* output,
+ NJson::TJsonWriterConfig config,
+ ::NYson::EYsonType type = ::NYson::EYsonType::Node,
+ EJsonAttributesMode attributesMode = JAM_ON_DEMAND,
+ ESerializedBoolFormat booleanFormat = SBF_STRING);
+
+ void Flush();
+
+ void OnStringScalar(TStringBuf value) override;
+ void OnInt64Scalar(i64 value) override;
+ void OnUint64Scalar(ui64 value) override;
+ void OnDoubleScalar(double value) override;
+ void OnBooleanScalar(bool value) override;
+
+ void OnEntity() override;
+
+ void OnBeginList() override;
+ void OnListItem() override;
+ void OnEndList() override;
+
+ void OnBeginMap() override;
+ void OnKeyedItem(TStringBuf key) override;
+ void OnEndMap() override;
+
+ void OnBeginAttributes() override;
+ void OnEndAttributes() override;
+
+ private:
+ THolder<NJson::TJsonWriter> UnderlyingJsonWriter;
+ NJson::TJsonWriter* JsonWriter;
+ IOutputStream* Output;
+ ::NYson::EYsonType Type;
+ EJsonAttributesMode AttributesMode;
+ ESerializedBoolFormat BooleanFormat;
+
+ void WriteStringScalar(const TStringBuf& value);
+
+ void EnterNode();
+ void LeaveNode();
+ bool IsWriteAllowed();
+
+ TVector<bool> HasUnfoldedStructureStack;
+ int InAttributesBalance;
+ bool HasAttributes;
+ int Depth;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+}
diff --git a/library/cpp/yson/json/ya.make b/library/cpp/yson/json/ya.make
new file mode 100644
index 00000000000..625a6b231e7
--- /dev/null
+++ b/library/cpp/yson/json/ya.make
@@ -0,0 +1,17 @@
+LIBRARY()
+
+OWNER(
+ ermolovd
+ g:yt
+)
+
+SRCS(
+ json_writer.cpp
+ yson2json_adapter.cpp
+)
+
+PEERDIR(
+ library/cpp/json
+)
+
+END()
diff --git a/library/cpp/yson/json/yson2json_adapter.cpp b/library/cpp/yson/json/yson2json_adapter.cpp
new file mode 100644
index 00000000000..b5e7c49d4d8
--- /dev/null
+++ b/library/cpp/yson/json/yson2json_adapter.cpp
@@ -0,0 +1,82 @@
+#include "yson2json_adapter.h"
+
+namespace NYT {
+ TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException)
+ : NJson::TJsonCallbacks(throwException)
+ , Impl_(impl)
+ {
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnNull() {
+ WrapIfListItem();
+ Impl_->OnEntity();
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnBoolean(bool val) {
+ WrapIfListItem();
+ Impl_->OnBooleanScalar(val);
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnInteger(long long val) {
+ WrapIfListItem();
+ Impl_->OnInt64Scalar(val);
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnUInteger(unsigned long long val) {
+ WrapIfListItem();
+ Impl_->OnUint64Scalar(val);
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnString(const TStringBuf& val) {
+ WrapIfListItem();
+ Impl_->OnStringScalar(val);
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnDouble(double val) {
+ WrapIfListItem();
+ Impl_->OnDoubleScalar(val);
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnOpenArray() {
+ WrapIfListItem();
+ State_.ContextStack.push(true);
+ Impl_->OnBeginList();
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnCloseArray() {
+ State_.ContextStack.pop();
+ Impl_->OnEndList();
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnOpenMap() {
+ WrapIfListItem();
+ State_.ContextStack.push(false);
+ Impl_->OnBeginMap();
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnCloseMap() {
+ State_.ContextStack.pop();
+ Impl_->OnEndMap();
+ return true;
+ }
+
+ bool TYson2JsonCallbacksAdapter::OnMapKey(const TStringBuf& val) {
+ Impl_->OnKeyedItem(val);
+ return true;
+ }
+
+ void TYson2JsonCallbacksAdapter::WrapIfListItem() {
+ if (!State_.ContextStack.empty() && State_.ContextStack.top()) {
+ Impl_->OnListItem();
+ }
+ }
+}
diff --git a/library/cpp/yson/json/yson2json_adapter.h b/library/cpp/yson/json/yson2json_adapter.h
new file mode 100644
index 00000000000..da1bf5ba709
--- /dev/null
+++ b/library/cpp/yson/json/yson2json_adapter.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <library/cpp/yson/consumer.h>
+
+#include <library/cpp/json/json_reader.h>
+
+#include <util/generic/stack.h>
+
+namespace NYT {
+ class TYson2JsonCallbacksAdapter
+ : public NJson::TJsonCallbacks {
+ public:
+ class TState {
+ private:
+ // Stores current context stack
+ // If true - we are in a list
+ // If false - we are in a map
+ TStack<bool> ContextStack;
+
+ friend class TYson2JsonCallbacksAdapter;
+ };
+
+ public:
+ TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException = false);
+
+ bool OnNull() override;
+ bool OnBoolean(bool val) override;
+ bool OnInteger(long long val) override;
+ bool OnUInteger(unsigned long long val) override;
+ bool OnString(const TStringBuf& val) override;
+ bool OnDouble(double val) override;
+ bool OnOpenArray() override;
+ bool OnCloseArray() override;
+ bool OnOpenMap() override;
+ bool OnCloseMap() override;
+ bool OnMapKey(const TStringBuf& val) override;
+
+ TState State() const {
+ return State_;
+ }
+
+ void Reset(const TState& state) {
+ State_ = state;
+ }
+
+ private:
+ void WrapIfListItem();
+
+ private:
+ ::NYson::TYsonConsumerBase* Impl_;
+ TState State_;
+ };
+}
diff --git a/library/cpp/yson/lexer.cpp b/library/cpp/yson/lexer.cpp
new file mode 100644
index 00000000000..5eae94273bf
--- /dev/null
+++ b/library/cpp/yson/lexer.cpp
@@ -0,0 +1,43 @@
+#include "lexer.h"
+#include "lexer_detail.h"
+#include "token.h"
+
+#include <util/generic/ptr.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TStatelessLexer::TImpl {
+ private:
+ THolder<TStatelessYsonLexerImplBase> Impl;
+
+ public:
+ TImpl(bool enableLinePositionInfo = false)
+ : Impl(enableLinePositionInfo
+ ? static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<true>())
+ : static_cast<TStatelessYsonLexerImplBase*>(new TStatelesYsonLexerImpl<false>()))
+ {
+ }
+
+ size_t GetToken(const TStringBuf& data, TToken* token) {
+ return Impl->GetToken(data, token);
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TStatelessLexer::TStatelessLexer()
+ : Impl(new TImpl())
+ {
+ }
+
+ TStatelessLexer::~TStatelessLexer() {
+ }
+
+ size_t TStatelessLexer::GetToken(const TStringBuf& data, TToken* token) {
+ return Impl->GetToken(data, token);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/lexer.h b/library/cpp/yson/lexer.h
new file mode 100644
index 00000000000..d9d701874d4
--- /dev/null
+++ b/library/cpp/yson/lexer.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "public.h"
+#include "token.h"
+
+#include <util/generic/ptr.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TStatelessLexer {
+ public:
+ TStatelessLexer();
+
+ ~TStatelessLexer();
+
+ size_t GetToken(const TStringBuf& data, TToken* token);
+
+ private:
+ class TImpl;
+ THolder<TImpl> Impl;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/lexer_detail.h b/library/cpp/yson/lexer_detail.h
new file mode 100644
index 00000000000..0bba30acdd2
--- /dev/null
+++ b/library/cpp/yson/lexer_detail.h
@@ -0,0 +1,296 @@
+#pragma once
+
+#include "detail.h"
+#include "token.h"
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ namespace NDetail {
+ /*! \internal */
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // EReadStartCase tree representation:
+ // Root = xb
+ // BinaryStringOrOtherSpecialToken = x0b
+ // BinaryString = 00b
+ // OtherSpecialToken = 10b
+ // Other = x1b
+ // BinaryScalar = xx01b
+ // BinaryInt64 = 0001b
+ // BinaryDouble = 0101b
+ // BinaryFalse = 1001b
+ // BinaryTrue = 1101b
+ // Other = xxx11b
+ // Quote = 00011b
+ // DigitOrMinus = 00111b
+ // String = 01011b
+ // Space = 01111b
+ // Plus = 10011b
+ // None = 10111b
+ // Percent = 11011b
+
+ enum EReadStartCase : unsigned {
+ BinaryString = 0, // = 00b
+ OtherSpecialToken = 2, // = 10b
+
+ BinaryInt64 = 1, // = 001b
+ BinaryDouble = 5, // = 101b
+ BinaryFalse = 9, // = 1001b
+ BinaryTrue = 13, // = 1101b
+ BinaryUint64 = 17, // = 10001b
+
+ Quote = 3, // = 00011b
+ DigitOrMinus = 7, // = 00111b
+ String = 11, // = 01011b
+ Space = 15, // = 01111b
+ Plus = 19, // = 10011b
+ None = 23, // = 10111b
+ Percent = 27 // = 11011b
+ };
+
+ template <class TBlockStream, bool EnableLinePositionInfo>
+ class TLexer
+ : public TLexerBase<TBlockStream, EnableLinePositionInfo> {
+ private:
+ using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>;
+
+ static EReadStartCase GetStartState(char ch) {
+#define NN EReadStartCase::None
+#define BS EReadStartCase::BinaryString
+#define BI EReadStartCase::BinaryInt64
+#define BD EReadStartCase::BinaryDouble
+#define BF EReadStartCase::BinaryFalse
+#define BT EReadStartCase::BinaryTrue
+#define BU EReadStartCase::BinaryUint64
+#define SP NN // EReadStartCase::Space
+#define DM EReadStartCase::DigitOrMinus
+#define ST EReadStartCase::String
+#define PL EReadStartCase::Plus
+#define QU EReadStartCase::Quote
+#define PC EReadStartCase::Percent
+#define TT(name) (EReadStartCase(static_cast<ui8>(ETokenType::name) << 2) | EReadStartCase::OtherSpecialToken)
+
+ static const ui8 lookupTable[] =
+ {
+ NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+
+ // 32
+ SP, // ' '
+ NN, // '!'
+ QU, // '"'
+ TT(Hash), // '#'
+ NN, // '$'
+ PC, // '%'
+ NN, // '&'
+ NN, // "'"
+ TT(LeftParenthesis), // '('
+ TT(RightParenthesis), // ')'
+ NN, // '*'
+ PL, // '+'
+ TT(Comma), // ','
+ DM, // '-'
+ NN, // '.'
+ NN, // '/'
+
+ // 48
+ DM, DM, DM, DM, DM, DM, DM, DM, DM, DM, // '0' - '9'
+ TT(Colon), // ':'
+ TT(Semicolon), // ';'
+ TT(LeftAngle), // '<'
+ TT(Equals), // '='
+ TT(RightAngle), // '>'
+ NN, // '?'
+
+ // 64
+ NN, // '@'
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M'
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z'
+ TT(LeftBracket), // '['
+ NN, // '\'
+ TT(RightBracket), // ']'
+ NN, // '^'
+ ST, // '_'
+
+ // 96
+ NN, // '`'
+
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm'
+ ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z'
+ TT(LeftBrace), // '{'
+ NN, // '|'
+ TT(RightBrace), // '}'
+ NN, // '~'
+ NN, // '^?' non-printable
+ // 128
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN,
+ NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN};
+
+#undef NN
+#undef BS
+#undef BI
+#undef BD
+#undef SP
+#undef DM
+#undef ST
+#undef PL
+#undef QU
+#undef TT
+ return static_cast<EReadStartCase>(lookupTable[static_cast<ui8>(ch)]);
+ }
+
+ public:
+ TLexer(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit)
+ : TBase(blockStream, memoryLimit)
+ {
+ }
+
+ void GetToken(TToken* token) {
+ char ch1 = TBase::SkipSpaceAndGetChar();
+ auto state = GetStartState(ch1);
+ auto stateBits = static_cast<unsigned>(state);
+
+ if (ch1 == '\0') {
+ *token = TToken::EndOfStream;
+ return;
+ }
+
+ if (stateBits & 1) { // Other = x1b
+ if (stateBits & 1 << 1) { // Other = xxx11b
+ if (state == EReadStartCase::Quote) {
+ TStringBuf value;
+ TBase::Advance(1);
+ TBase::ReadQuotedString(&value);
+ *token = TToken(value);
+ } else if (state == EReadStartCase::DigitOrMinus) {
+ ReadNumeric<true>(token);
+ } else if (state == EReadStartCase::Plus) {
+ TBase::Advance(1);
+
+ char ch2 = TBase::template GetChar<true>();
+
+ if (!isdigit(ch2)) {
+ *token = TToken(ETokenType::Plus);
+ } else {
+ ReadNumeric<true>(token);
+ }
+ } else if (state == EReadStartCase::String) {
+ TStringBuf value;
+ TBase::template ReadUnquotedString<true>(&value);
+ *token = TToken(value);
+ } else if (state == EReadStartCase::Percent) {
+ TBase::Advance(1);
+ char ch3 = TBase::template GetChar<true>();
+ if (ch3 == 't' || ch3 == 'f') {
+ *token = TToken(TBase::template ReadBoolean<true>());
+ } else {
+ *token = TToken(TBase::template ReadNanOrInf<true>());
+ }
+ } else { // None
+ Y_ASSERT(state == EReadStartCase::None);
+ ythrow TYsonException() << "Unexpected " << ch1;
+ }
+ } else { // BinaryScalar = x01b
+ TBase::Advance(1);
+ if (state == EReadStartCase::BinaryDouble) {
+ double value;
+ TBase::ReadBinaryDouble(&value);
+ *token = TToken(value);
+ } else if (state == EReadStartCase::BinaryInt64) {
+ i64 value;
+ TBase::ReadBinaryInt64(&value);
+ *token = TToken(value);
+ } else if (state == EReadStartCase::BinaryUint64) {
+ ui64 value;
+ TBase::ReadBinaryUint64(&value);
+ *token = TToken(value);
+ } else if (state == EReadStartCase::BinaryFalse) {
+ *token = TToken(false);
+ } else if (state == EReadStartCase::BinaryTrue) {
+ *token = TToken(true);
+ } else {
+ Y_FAIL("unreachable");
+ }
+ }
+ } else { // BinaryStringOrOtherSpecialToken = x0b
+ TBase::Advance(1);
+ if (stateBits & 1 << 1) { // OtherSpecialToken = 10b
+ Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::OtherSpecialToken));
+ *token = TToken(ETokenType(stateBits >> 2));
+ } else { // BinaryString = 00b
+ Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::BinaryString));
+ TStringBuf value;
+ TBase::ReadBinaryString(&value);
+ *token = TToken(value);
+ }
+ }
+ }
+
+ template <bool AllowFinish>
+ void ReadNumeric(TToken* token) {
+ TStringBuf valueBuffer;
+ ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer);
+
+ if (numericResult == ENumericResult::Double) {
+ try {
+ *token = TToken(FromString<double>(valueBuffer));
+ } catch (yexception&) {
+ ythrow TYsonException() << "Error parsing double literal " << valueBuffer;
+ }
+ } else if (numericResult == ENumericResult::Int64) {
+ try {
+ *token = TToken(FromString<i64>(valueBuffer));
+ } catch (yexception&) {
+ ythrow TYsonException() << "Error parsing int64 literal " << valueBuffer;
+ }
+ } else if (numericResult == ENumericResult::Uint64) {
+ try {
+ *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)));
+ } catch (yexception&) {
+ ythrow TYsonException() << "Error parsing uint64 literal " << valueBuffer;
+ }
+ }
+ }
+ };
+ ////////////////////////////////////////////////////////////////////////////////
+ /*! \endinternal */
+ }
+
+ class TStatelessYsonLexerImplBase {
+ public:
+ virtual size_t GetToken(const TStringBuf& data, TToken* token) = 0;
+
+ virtual ~TStatelessYsonLexerImplBase() {
+ }
+ };
+
+ template <bool EnableLinePositionInfo>
+ class TStatelesYsonLexerImpl: public TStatelessYsonLexerImplBase {
+ private:
+ using TLexer = NDetail::TLexer<TStringReader, EnableLinePositionInfo>;
+ TLexer Lexer;
+
+ public:
+ TStatelesYsonLexerImpl()
+ : Lexer(TStringReader(), Nothing())
+ {
+ }
+
+ size_t GetToken(const TStringBuf& data, TToken* token) override {
+ Lexer.SetBuffer(data.begin(), data.end());
+ Lexer.GetToken(token);
+ return Lexer.Begin() - data.begin();
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/node/node.cpp b/library/cpp/yson/node/node.cpp
new file mode 100644
index 00000000000..b39e0707187
--- /dev/null
+++ b/library/cpp/yson/node/node.cpp
@@ -0,0 +1,915 @@
+#include "node.h"
+
+#include "node_io.h"
+
+#include <library/cpp/yson/writer.h>
+
+#include <util/generic/overloaded.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool TNode::TNull::operator==(const TNull&) const {
+ return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool TNode::TUndefined::operator==(const TUndefined&) const {
+ return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NNodeCmp {
+
+bool IsComparableType(const TNode::EType type) {
+ switch (type) {
+ case TNode::String:
+ case TNode::Int64:
+ case TNode::Uint64:
+ case TNode::Double:
+ case TNode::Bool:
+ case TNode::Null:
+ case TNode::Undefined:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool operator<(const TNode& lhs, const TNode& rhs)
+{
+ if (!lhs.GetAttributes().Empty() || !rhs.GetAttributes().Empty()) {
+ ythrow TNode::TTypeError() << "Unsupported attributes comparison";
+ }
+
+ if (!IsComparableType(lhs.GetType()) || !IsComparableType(rhs.GetType())) {
+ ythrow TNode::TTypeError() << "Unsupported types for comparison: " << lhs.GetType() << " with " << rhs.GetType();
+ }
+
+ if (lhs.GetType() != rhs.GetType()) {
+ return lhs.GetType() < rhs.GetType();
+ }
+
+ switch (lhs.GetType()) {
+ case TNode::String:
+ return lhs.AsString() < rhs.AsString();
+ case TNode::Int64:
+ return lhs.AsInt64() < rhs.AsInt64();
+ case TNode::Uint64:
+ return lhs.AsUint64() < rhs.AsUint64();
+ case TNode::Double:
+ return lhs.AsDouble() < rhs.AsDouble();
+ case TNode::Bool:
+ return lhs.AsBool() < rhs.AsBool();
+ case TNode::Null:
+ case TNode::Undefined:
+ return false;
+ default:
+ Y_FAIL("Unexpected type: %d", lhs.GetType());
+ }
+}
+
+bool operator>(const TNode& lhs, const TNode& rhs)
+{
+ return rhs < lhs;
+}
+
+bool operator<=(const TNode& lhs, const TNode& rhs)
+{
+ return !(lhs > rhs);
+}
+
+bool operator>=(const TNode& lhs, const TNode& rhs)
+{
+ return !(lhs < rhs);
+}
+
+} // namespace NNodeCmp
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNode::TNode()
+ : Value_(TUndefined{})
+{ }
+
+TNode::TNode(const char* s)
+ : Value_(TString(s))
+{ }
+
+TNode::TNode(TStringBuf s)
+ : Value_(TString(s))
+{ }
+
+TNode::TNode(std::string_view s)
+ : Value_(TString(s))
+{ }
+
+TNode::TNode(const std::string& s)
+ : Value_(TString(s))
+{ }
+
+TNode::TNode(TString s)
+ : Value_(std::move(s))
+{ }
+
+TNode::TNode(int i)
+ : Value_(static_cast<i64>(i))
+{ }
+
+
+TNode::TNode(unsigned int ui)
+ : Value_(static_cast<ui64>(ui))
+{ }
+
+TNode::TNode(long i)
+ : Value_(static_cast<i64>(i))
+{ }
+
+TNode::TNode(unsigned long ui)
+ : Value_(static_cast<ui64>(ui))
+{ }
+
+TNode::TNode(long long i)
+ : Value_(static_cast<i64>(i))
+{ }
+
+TNode::TNode(unsigned long long ui)
+ : Value_(static_cast<ui64>(ui))
+{ }
+
+TNode::TNode(double d)
+ : Value_(d)
+{ }
+
+TNode::TNode(bool b)
+ : Value_(b)
+{ }
+
+TNode::TNode(TMapType map)
+ : Value_(std::move(map))
+{ }
+
+TNode::TNode(const TNode& rhs)
+ : TNode()
+{
+ if (rhs.Attributes_) {
+ CreateAttributes();
+ *Attributes_ = *rhs.Attributes_;
+ }
+ Value_ = rhs.Value_;
+}
+
+TNode& TNode::operator=(const TNode& rhs)
+{
+ if (this != &rhs) {
+ TNode tmp = rhs;
+ Move(std::move(tmp));
+ }
+ return *this;
+}
+
+TNode::TNode(TNode&& rhs) noexcept
+ : TNode()
+{
+ Move(std::move(rhs));
+}
+
+TNode& TNode::operator=(TNode&& rhs) noexcept
+{
+ if (this != &rhs) {
+ TNode tmp = std::move(rhs);
+ Move(std::move(tmp));
+ }
+ return *this;
+}
+
+TNode::~TNode() = default;
+
+void TNode::Clear()
+{
+ ClearAttributes();
+ Value_ = TUndefined();
+}
+
+bool TNode::IsString() const
+{
+ return std::holds_alternative<TString>(Value_);
+}
+
+bool TNode::IsInt64() const
+{
+ return std::holds_alternative<i64>(Value_);
+}
+
+bool TNode::IsUint64() const
+{
+ return std::holds_alternative<ui64>(Value_);
+}
+
+bool TNode::IsDouble() const
+{
+ return std::holds_alternative<double>(Value_);
+}
+
+bool TNode::IsBool() const
+{
+ return std::holds_alternative<bool>(Value_);
+}
+
+bool TNode::IsList() const
+{
+ return std::holds_alternative<TListType>(Value_);
+}
+
+bool TNode::IsMap() const
+{
+ return std::holds_alternative<TMapType>(Value_);
+}
+
+bool TNode::IsEntity() const
+{
+ return IsNull();
+}
+
+bool TNode::IsNull() const
+{
+ return std::holds_alternative<TNull>(Value_);
+}
+
+bool TNode::IsUndefined() const
+{
+ return std::holds_alternative<TUndefined>(Value_);
+}
+
+bool TNode::HasValue() const
+{
+ return !IsNull() && !IsUndefined();
+}
+
+bool TNode::Empty() const
+{
+ switch (GetType()) {
+ case String:
+ return std::get<TString>(Value_).empty();
+ case List:
+ return std::get<TListType>(Value_).empty();
+ case Map:
+ return std::get<TMapType>(Value_).empty();
+ default:
+ ythrow TTypeError() << "Empty() called for type " << GetType();
+ }
+}
+
+size_t TNode::Size() const
+{
+ switch (GetType()) {
+ case String:
+ return std::get<TString>(Value_).size();
+ case List:
+ return std::get<TListType>(Value_).size();
+ case Map:
+ return std::get<TMapType>(Value_).size();
+ default:
+ ythrow TTypeError() << "Size() called for type " << GetType();
+ }
+}
+
+TNode::EType TNode::GetType() const
+{
+ return std::visit(TOverloaded{
+ [](const TUndefined&) { return Undefined; },
+ [](const TString&) { return String; },
+ [](i64) { return Int64; },
+ [](ui64) { return Uint64; },
+ [](double) { return Double; },
+ [](bool) { return Bool; },
+ [](const TListType&) { return List; },
+ [](const TMapType&) { return Map; },
+ [](const TNull&) { return Null; }
+ }, Value_);
+}
+
+const TString& TNode::AsString() const
+{
+ CheckType(String);
+ return std::get<TString>(Value_);
+}
+
+i64 TNode::AsInt64() const
+{
+ CheckType(Int64);
+ return std::get<i64>(Value_);
+}
+
+ui64 TNode::AsUint64() const
+{
+ CheckType(Uint64);
+ return std::get<ui64>(Value_);
+}
+
+double TNode::AsDouble() const
+{
+ CheckType(Double);
+ return std::get<double>(Value_);
+}
+
+bool TNode::AsBool() const
+{
+ CheckType(Bool);
+ return std::get<bool>(Value_);
+}
+
+const TNode::TListType& TNode::AsList() const
+{
+ CheckType(List);
+ return std::get<TListType>(Value_);
+}
+
+const TNode::TMapType& TNode::AsMap() const
+{
+ CheckType(Map);
+ return std::get<TMapType>(Value_);
+}
+
+TNode::TListType& TNode::AsList()
+{
+ CheckType(List);
+ return std::get<TListType>(Value_);
+}
+
+TNode::TMapType& TNode::AsMap()
+{
+ CheckType(Map);
+ return std::get<TMapType>(Value_);
+}
+
+const TString& TNode::UncheckedAsString() const noexcept
+{
+ return std::get<TString>(Value_);
+}
+
+i64 TNode::UncheckedAsInt64() const noexcept
+{
+ return std::get<i64>(Value_);
+}
+
+ui64 TNode::UncheckedAsUint64() const noexcept
+{
+ return std::get<ui64>(Value_);
+}
+
+double TNode::UncheckedAsDouble() const noexcept
+{
+ return std::get<double>(Value_);
+}
+
+bool TNode::UncheckedAsBool() const noexcept
+{
+ return std::get<bool>(Value_);
+}
+
+const TNode::TListType& TNode::UncheckedAsList() const noexcept
+{
+ return std::get<TListType>(Value_);
+}
+
+const TNode::TMapType& TNode::UncheckedAsMap() const noexcept
+{
+ return std::get<TMapType>(Value_);
+}
+
+TNode::TListType& TNode::UncheckedAsList() noexcept
+{
+ return std::get<TListType>(Value_);
+}
+
+TNode::TMapType& TNode::UncheckedAsMap() noexcept
+{
+ return std::get<TMapType>(Value_);
+}
+
+TNode TNode::CreateList()
+{
+ TNode node;
+ node.Value_ = TListType{};
+ return node;
+}
+
+TNode TNode::CreateList(TListType list)
+{
+ TNode node;
+ node.Value_ = std::move(list);
+ return node;
+}
+
+TNode TNode::CreateMap()
+{
+ TNode node;
+ node.Value_ = TMapType{};
+ return node;
+}
+
+TNode TNode::CreateMap(TMapType map)
+{
+ TNode node;
+ node.Value_ = std::move(map);
+ return node;
+}
+
+TNode TNode::CreateEntity()
+{
+ TNode node;
+ node.Value_ = TNull{};
+ return node;
+}
+
+const TNode& TNode::operator[](size_t index) const
+{
+ CheckType(List);
+ return std::get<TListType>(Value_)[index];
+}
+
+TNode& TNode::operator[](size_t index)
+{
+ CheckType(List);
+ return std::get<TListType>(Value_)[index];
+}
+
+const TNode& TNode::At(size_t index) const {
+ CheckType(List);
+ const auto& list = std::get<TListType>(Value_);
+ if (index >= list.size()) {
+ ythrow TLookupError() << "List out-of-range: requested index=" << index << ", but size=" << list.size();
+ }
+ return list[index];
+}
+
+TNode& TNode::At(size_t index) {
+ CheckType(List);
+ auto& list = std::get<TListType>(Value_);
+ if (index >= list.size()) {
+ ythrow TLookupError() << "List out-of-range: requested index=" << index << ", but size=" << list.size();
+ }
+ return list[index];
+}
+
+TNode& TNode::Add() &
+{
+ AssureList();
+ return std::get<TListType>(Value_).emplace_back();
+}
+
+TNode TNode::Add() &&
+{
+ return std::move(Add());
+}
+
+TNode& TNode::Add(const TNode& node) &
+{
+ AssureList();
+ std::get<TListType>(Value_).emplace_back(node);
+ return *this;
+}
+
+TNode TNode::Add(const TNode& node) &&
+{
+ return std::move(Add(node));
+}
+
+TNode& TNode::Add(TNode&& node) &
+{
+ AssureList();
+ std::get<TListType>(Value_).emplace_back(std::move(node));
+ return *this;
+}
+
+TNode TNode::Add(TNode&& node) &&
+{
+ return std::move(Add(std::move(node)));
+}
+
+bool TNode::HasKey(const TStringBuf key) const
+{
+ CheckType(Map);
+ return std::get<TMapType>(Value_).contains(key);
+}
+
+TNode& TNode::operator()(const TString& key, const TNode& value) &
+{
+ AssureMap();
+ std::get<TMapType>(Value_)[key] = value;
+ return *this;
+}
+
+TNode TNode::operator()(const TString& key, const TNode& value) &&
+{
+ return std::move(operator()(key, value));
+}
+
+TNode& TNode::operator()(const TString& key, TNode&& value) &
+{
+ AssureMap();
+ std::get<TMapType>(Value_)[key] = std::move(value);
+ return *this;
+}
+
+TNode TNode::operator()(const TString& key, TNode&& value) &&
+{
+ return std::move(operator()(key, std::move(value)));
+}
+
+const TNode& TNode::operator[](const TStringBuf key) const
+{
+ CheckType(Map);
+ static TNode notFound;
+ const auto& map = std::get<TMapType>(Value_);
+ TMapType::const_iterator i = map.find(key);
+ if (i == map.end()) {
+ return notFound;
+ } else {
+ return i->second;
+ }
+}
+
+TNode& TNode::operator[](const TStringBuf key)
+{
+ AssureMap();
+ return std::get<TMapType>(Value_)[key];
+}
+
+const TNode& TNode::At(const TStringBuf key) const {
+ CheckType(Map);
+ const auto& map = std::get<TMapType>(Value_);
+ TMapType::const_iterator i = map.find(key);
+ if (i == map.end()) {
+ ythrow TLookupError() << "Cannot find key " << key;
+ } else {
+ return i->second;
+ }
+}
+
+TNode& TNode::At(const TStringBuf key) {
+ CheckType(Map);
+ auto& map = std::get<TMapType>(Value_);
+ TMapType::iterator i = map.find(key);
+ if (i == map.end()) {
+ ythrow TLookupError() << "Cannot find key " << key;
+ } else {
+ return i->second;
+ }
+}
+
+const TString& TNode::ChildAsString(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsString();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+i64 TNode::ChildAsInt64(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsInt64();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+ui64 TNode::ChildAsUint64(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsUint64();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+double TNode::ChildAsDouble(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsDouble();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+bool TNode::ChildAsBool(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsBool();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+const TNode::TListType& TNode::ChildAsList(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsList();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+const TNode::TMapType& TNode::ChildAsMap(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.AsMap();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+TNode::TListType& TNode::ChildAsList(const TStringBuf key) {
+ auto& node = At(key);
+ try {
+ return node.AsList();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+TNode::TMapType& TNode::ChildAsMap(const TStringBuf key) {
+ auto& node = At(key);
+ try {
+ return node.AsMap();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+const TString& TNode::ChildAsString(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsString();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+i64 TNode::ChildAsInt64(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsInt64();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+ui64 TNode::ChildAsUint64(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsUint64();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+double TNode::ChildAsDouble(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsDouble();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+bool TNode::ChildAsBool(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsBool();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+const TNode::TListType& TNode::ChildAsList(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsList();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+const TNode::TMapType& TNode::ChildAsMap(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.AsMap();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+TNode::TListType& TNode::ChildAsList(size_t index) {
+ auto& node = At(index);
+ try {
+ return node.AsList();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+TNode::TMapType& TNode::ChildAsMap(size_t index) {
+ auto& node = At(index);
+ try {
+ return node.AsMap();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+bool TNode::HasAttributes() const
+{
+ return Attributes_ && !Attributes_->Empty();
+}
+
+void TNode::ClearAttributes()
+{
+ if (Attributes_) {
+ Attributes_.Destroy();
+ }
+}
+
+const TNode& TNode::GetAttributes() const
+{
+ static TNode notFound = TNode::CreateMap();
+ if (!Attributes_) {
+ return notFound;
+ }
+ return *Attributes_;
+}
+
+TNode& TNode::Attributes()
+{
+ if (!Attributes_) {
+ CreateAttributes();
+ }
+ return *Attributes_;
+}
+
+void TNode::MoveWithoutAttributes(TNode&& rhs)
+{
+ Value_ = std::move(rhs.Value_);
+ rhs.Clear();
+}
+
+void TNode::Move(TNode&& rhs)
+{
+ Value_ = std::move(rhs.Value_);
+ Attributes_ = std::move(rhs.Attributes_);
+}
+
+void TNode::CheckType(EType type) const
+{
+ Y_ENSURE_EX(GetType() == type,
+ TTypeError() << "TNode type " << type << " expected, actual type " << GetType();
+ );
+}
+
+void TNode::AssureMap()
+{
+ if (std::holds_alternative<TUndefined>(Value_)) {
+ Value_ = TMapType();
+ } else {
+ CheckType(Map);
+ }
+}
+
+void TNode::AssureList()
+{
+ if (std::holds_alternative<TUndefined>(Value_)) {
+ Value_ = TListType();
+ } else {
+ CheckType(List);
+ }
+}
+
+void TNode::CreateAttributes()
+{
+ Attributes_ = MakeHolder<TNode>();
+ Attributes_->Value_ = TMapType();
+}
+
+void TNode::Save(IOutputStream* out) const
+{
+ NodeToYsonStream(*this, out, NYson::EYsonFormat::Binary);
+}
+
+void TNode::Load(IInputStream* in)
+{
+ Clear();
+ *this = NodeFromYsonStream(in, ::NYson::EYsonType::Node);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool operator==(const TNode& lhs, const TNode& rhs)
+{
+ if (std::holds_alternative<TNode::TUndefined>(lhs.Value_) ||
+ std::holds_alternative<TNode::TUndefined>(rhs.Value_))
+ {
+ // TODO: should try to remove this behaviour if nobody uses it.
+ return false;
+ }
+
+ if (lhs.GetType() != rhs.GetType()) {
+ return false;
+ }
+
+ if (lhs.Attributes_) {
+ if (rhs.Attributes_) {
+ if (*lhs.Attributes_ != *rhs.Attributes_) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ } else {
+ if (rhs.Attributes_) {
+ return false;
+ }
+ }
+
+ return rhs.Value_ == lhs.Value_;
+}
+
+bool operator!=(const TNode& lhs, const TNode& rhs)
+{
+ return !(lhs == rhs);
+}
+
+bool GetBool(const TNode& node)
+{
+ if (node.IsBool()) {
+ return node.AsBool();
+ } else if (node.IsString()) {
+ return node.AsString() == "true";
+ } else {
+ ythrow TNode::TTypeError()
+ << "GetBool(): not a boolean or string type";
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node.h b/library/cpp/yson/node/node.h
new file mode 100644
index 00000000000..5f90f95df07
--- /dev/null
+++ b/library/cpp/yson/node/node.h
@@ -0,0 +1,523 @@
+#pragma once
+
+#include <util/generic/bt_exception.h>
+#include <util/generic/cast.h>
+#include <util/generic/hash.h>
+#include <util/generic/variant.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/generic/ylimits.h>
+#include <util/string/cast.h>
+
+#include <cmath>
+#include <variant>
+
+class IInputStream;
+class IOutputStream;
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNode
+{
+public:
+ class TLookupError
+ : public TWithBackTrace<yexception>
+ { };
+
+ class TTypeError
+ : public TWithBackTrace<yexception>
+ { };
+
+ enum EType {
+ Undefined = 0 /*"undefined"*/,
+
+ // NOTE: string representation of all node types
+ // are compatible with server node type (except `Undefined' which is missing on server).
+ String = 1 /*"string_node"*/,
+ Int64 = 2 /*"int64_node"*/,
+ Uint64 = 3 /*"uint64_node"*/,
+ Double = 4 /*"double_node"*/,
+ Bool = 5 /*"boolean_node"*/,
+ List = 6 /*"list_node"*/,
+ Map = 7 /*"map_node"*/,
+ Null = 8 /*"null"*/,
+ };
+
+ using TListType = TVector<TNode>;
+ using TMapType = THashMap<TString, TNode>;
+
+private:
+ struct TNull {
+ bool operator==(const TNull&) const;
+ };
+
+ struct TUndefined {
+ bool operator==(const TUndefined&) const;
+ };
+
+ using TValue = std::variant<
+ bool,
+ i64,
+ ui64,
+ double,
+ TString,
+ TListType,
+ TMapType,
+ TNull,
+ TUndefined
+ >;
+
+public:
+
+ TNode();
+ TNode(const char* s);
+ TNode(TStringBuf s);
+ explicit TNode(std::string_view s);
+ explicit TNode(const std::string& s);
+ TNode(TString s);
+ TNode(int i);
+
+ //this case made speccially for prevent mess cast of EType into TNode through TNode(int) constructor
+ //usual case of error SomeNode == TNode::Undefined <-- SomeNode indeed will be compared with TNode(0) without this method
+ //correct way is SomeNode.GetType() == TNode::Undefined
+ template<class T = EType>
+ Y_FORCE_INLINE TNode(EType)
+ {
+ static_assert(!std::is_same<T, EType>::value, "looks like a mistake, may be you forget .GetType()");
+ }
+
+ //this case made speccially for prevent mess cast of T* into TNode through implicit bool ctr
+ template<class T = int>
+ Y_FORCE_INLINE TNode(const T*) : TNode() {
+ static_assert(!std::is_same<T,T>::value, "looks like a mistake, and pointer have converted to bool");
+ }
+
+ TNode(unsigned int ui);
+ TNode(long i);
+ TNode(unsigned long ui);
+ TNode(long long i);
+ TNode(unsigned long long ui);
+ TNode(double d);
+ TNode(bool b);
+ TNode(TMapType map);
+
+ TNode(const TNode& rhs);
+ TNode& operator=(const TNode& rhs);
+
+ TNode(TNode&& rhs) noexcept;
+ TNode& operator=(TNode&& rhs) noexcept;
+
+ ~TNode();
+
+ void Clear();
+
+ bool IsString() const;
+ bool IsInt64() const;
+ bool IsUint64() const;
+ bool IsDouble() const;
+ bool IsBool() const;
+ bool IsList() const;
+ bool IsMap() const;
+
+ // `IsEntity' is deprecated use `IsNull' instead.
+ bool IsEntity() const;
+ bool IsNull() const;
+ bool IsUndefined() const;
+ // Returns true if TNode is neither Null, nor Undefined
+ bool HasValue() const;
+
+ template<typename T>
+ bool IsOfType() const noexcept;
+
+ // Int64, Uint64, Double, or Bool
+ bool IsArithmetic() const;
+
+ bool Empty() const;
+ size_t Size() const;
+
+ EType GetType() const;
+
+ const TString& AsString() const;
+ i64 AsInt64() const;
+ ui64 AsUint64() const;
+ double AsDouble() const;
+ bool AsBool() const;
+ const TListType& AsList() const;
+ const TMapType& AsMap() const;
+ TListType& AsList();
+ TMapType& AsMap();
+
+ const TString& UncheckedAsString() const noexcept;
+ i64 UncheckedAsInt64() const noexcept;
+ ui64 UncheckedAsUint64() const noexcept;
+ double UncheckedAsDouble() const noexcept;
+ bool UncheckedAsBool() const noexcept;
+ const TListType& UncheckedAsList() const noexcept;
+ const TMapType& UncheckedAsMap() const noexcept;
+ TListType& UncheckedAsList() noexcept;
+ TMapType& UncheckedAsMap() noexcept;
+
+ // integer types cast
+ // makes overflow checks
+ template<typename T>
+ T IntCast() const;
+
+ // integers <-> double <-> string
+ // makes overflow checks
+ template<typename T>
+ T ConvertTo() const;
+
+ template<typename T>
+ T& As();
+
+ template<typename T>
+ const T& As() const;
+
+ static TNode CreateList();
+ static TNode CreateList(TListType list);
+ static TNode CreateMap();
+ static TNode CreateMap(TMapType map);
+ static TNode CreateEntity();
+
+ const TNode& operator[](size_t index) const;
+ TNode& operator[](size_t index);
+ const TNode& At(size_t index) const;
+ TNode& At(size_t index);
+
+ TNode& Add() &;
+ TNode Add() &&;
+ TNode& Add(const TNode& node) &;
+ TNode Add(const TNode& node) &&;
+ TNode& Add(TNode&& node) &;
+ TNode Add(TNode&& node) &&;
+
+ bool HasKey(const TStringBuf key) const;
+
+ TNode& operator()(const TString& key, const TNode& value) &;
+ TNode operator()(const TString& key, const TNode& value) &&;
+ TNode& operator()(const TString& key, TNode&& value) &;
+ TNode operator()(const TString& key, TNode&& value) &&;
+
+ const TNode& operator[](const TStringBuf key) const;
+ TNode& operator[](const TStringBuf key);
+ const TNode& At(const TStringBuf key) const;
+ TNode& At(const TStringBuf key);
+
+ // map getters
+ // works the same way like simple getters
+ const TString& ChildAsString(const TStringBuf key) const;
+ i64 ChildAsInt64(const TStringBuf key) const;
+ ui64 ChildAsUint64(const TStringBuf key) const;
+ double ChildAsDouble(const TStringBuf key) const;
+ bool ChildAsBool(const TStringBuf key) const;
+ const TListType& ChildAsList(const TStringBuf key) const;
+ const TMapType& ChildAsMap(const TStringBuf key) const;
+ TListType& ChildAsList(const TStringBuf key);
+ TMapType& ChildAsMap(const TStringBuf key);
+
+ template<typename T>
+ T ChildIntCast(const TStringBuf key) const;
+
+ template<typename T>
+ T ChildConvertTo(const TStringBuf key) const;
+
+ template<typename T>
+ const T& ChildAs(const TStringBuf key) const;
+
+ template<typename T>
+ T& ChildAs(const TStringBuf key);
+
+ // list getters
+ // works the same way like simple getters
+ const TString& ChildAsString(size_t index) const;
+ i64 ChildAsInt64(size_t index) const;
+ ui64 ChildAsUint64(size_t index) const;
+ double ChildAsDouble(size_t index) const;
+ bool ChildAsBool(size_t index) const;
+ const TListType& ChildAsList(size_t index) const;
+ const TMapType& ChildAsMap(size_t index) const;
+ TListType& ChildAsList(size_t index);
+ TMapType& ChildAsMap(size_t index);
+
+ template<typename T>
+ T ChildIntCast(size_t index) const;
+
+ template<typename T>
+ T ChildConvertTo(size_t index) const;
+
+ template<typename T>
+ const T& ChildAs(size_t index) const;
+
+ template<typename T>
+ T& ChildAs(size_t index);
+
+
+ // attributes
+ bool HasAttributes() const;
+ void ClearAttributes();
+ const TNode& GetAttributes() const;
+ TNode& Attributes();
+
+ void MoveWithoutAttributes(TNode&& rhs);
+
+ // Serialize TNode using binary yson format.
+ // Methods for ysaveload.
+ void Save(IOutputStream* output) const;
+ void Load(IInputStream* input);
+
+private:
+ void Move(TNode&& rhs);
+
+ void CheckType(EType type) const;
+
+ void AssureMap();
+ void AssureList();
+
+ void CreateAttributes();
+
+private:
+ TValue Value_;
+ THolder<TNode> Attributes_;
+
+ friend bool operator==(const TNode& lhs, const TNode& rhs);
+ friend bool operator!=(const TNode& lhs, const TNode& rhs);
+};
+
+bool operator==(const TNode& lhs, const TNode& rhs);
+bool operator!=(const TNode& lhs, const TNode& rhs);
+
+bool GetBool(const TNode& node);
+
+inline bool TNode::IsArithmetic() const {
+ return IsInt64() || IsUint64() || IsDouble() || IsBool();
+}
+
+template<typename T>
+inline T TNode::IntCast() const {
+ if constexpr (std::is_integral<T>::value) {
+ try {
+ switch (GetType()) {
+ case TNode::Uint64:
+ return SafeIntegerCast<T>(AsUint64());
+ case TNode::Int64:
+ return SafeIntegerCast<T>(AsInt64());
+ default:
+ ythrow TTypeError() << "IntCast() called for type " << GetType();
+ }
+ } catch(TBadCastException& exc) {
+ ythrow TTypeError() << "TBadCastException during IntCast(): " << exc.what();
+ }
+ } else {
+ static_assert(sizeof(T) != sizeof(T), "implemented only for std::is_integral types");
+ }
+}
+
+template<typename T>
+inline T TNode::ConvertTo() const {
+ if constexpr (std::is_integral<T>::value) {
+ switch (GetType()) {
+ case NYT::TNode::String:
+ return ::FromString(AsString());
+ case NYT::TNode::Int64:
+ case NYT::TNode::Uint64:
+ return IntCast<T>();
+ case NYT::TNode::Double:
+ if (AsDouble() < Min<T>() || AsDouble() > MaxFloor<T>() || !std::isfinite(AsDouble())) {
+ ythrow TTypeError() << AsDouble() << " can't be converted to " << TypeName<T>();
+ }
+ return AsDouble();
+ case NYT::TNode::Bool:
+ return AsBool();
+ case NYT::TNode::List:
+ case NYT::TNode::Map:
+ case NYT::TNode::Null:
+ case NYT::TNode::Undefined:
+ ythrow TTypeError() << "ConvertTo<" << TypeName<T>() << ">() called for type " << GetType();
+ };
+ } else {
+ static_assert(sizeof(T) != sizeof(T), "should have template specialization");
+ }
+}
+
+template<>
+inline TString TNode::ConvertTo<TString>() const {
+ switch (GetType()) {
+ case NYT::TNode::String:
+ return AsString();
+ case NYT::TNode::Int64:
+ return ::ToString(AsInt64());
+ case NYT::TNode::Uint64:
+ return ::ToString(AsUint64());
+ case NYT::TNode::Double:
+ return ::ToString(AsDouble());
+ case NYT::TNode::Bool:
+ return ::ToString(AsBool());
+ case NYT::TNode::List:
+ case NYT::TNode::Map:
+ case NYT::TNode::Null:
+ case NYT::TNode::Undefined:
+ ythrow TTypeError() << "ConvertTo<TString>() called for type " << GetType();
+ }
+ Y_UNREACHABLE();
+}
+
+template<>
+inline double TNode::ConvertTo<double>() const {
+ switch (GetType()) {
+ case NYT::TNode::String:
+ return ::FromString(AsString());
+ case NYT::TNode::Int64:
+ return AsInt64();
+ case NYT::TNode::Uint64:
+ return AsUint64();
+ case NYT::TNode::Double:
+ return AsDouble();
+ case NYT::TNode::Bool:
+ return AsBool();
+ case NYT::TNode::List:
+ case NYT::TNode::Map:
+ case NYT::TNode::Null:
+ case NYT::TNode::Undefined:
+ ythrow TTypeError() << "ConvertTo<double>() called for type " << GetType();
+ }
+}
+
+template<>
+inline bool TNode::ConvertTo<bool>() const {
+ switch (GetType()) {
+ case NYT::TNode::String:
+ return ::FromString(AsString());
+ case NYT::TNode::Int64:
+ return AsInt64();
+ case NYT::TNode::Uint64:
+ return AsUint64();
+ case NYT::TNode::Double:
+ return AsDouble();
+ case NYT::TNode::Bool:
+ return AsBool();
+ case NYT::TNode::List:
+ case NYT::TNode::Map:
+ case NYT::TNode::Null:
+ case NYT::TNode::Undefined:
+ ythrow TTypeError() << "ConvertTo<bool>() called for type " << GetType();
+ }
+}
+
+template<typename T>
+inline T TNode::ChildIntCast(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.IntCast<T>();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+template<typename T>
+inline T TNode::ChildIntCast(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.IntCast<T>();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+template<typename T>
+inline T TNode::ChildConvertTo(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.ConvertTo<T>();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+template<typename T>
+inline T TNode::ChildConvertTo(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.ConvertTo<T>();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+template<typename T>
+inline const T& TNode::ChildAs(const TStringBuf key) const {
+ const auto& node = At(key);
+ try {
+ return node.As<T>();
+ } catch (TTypeError& e) {
+ e << ", during getting key=" << key;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting key=" << key;
+ }
+}
+
+template<typename T>
+inline const T& TNode::ChildAs(size_t index) const {
+ const auto& node = At(index);
+ try {
+ return node.As<T>();
+ } catch (TTypeError& e) {
+ e << ", during getting index=" << index;
+ throw e;
+ } catch (...) {
+ ythrow TTypeError() << CurrentExceptionMessage() << ", during getting index=" << index;
+ }
+}
+
+template<typename T>
+inline T& TNode::ChildAs(const TStringBuf key) {
+ return const_cast<T&>(static_cast<const TNode*>(this)->ChildAs<T>(key));
+}
+
+template<typename T>
+inline T& TNode::ChildAs(size_t index) {
+ return const_cast<T&>(static_cast<const TNode*>(this)->ChildAs<T>(index));
+}
+
+template<typename T>
+inline bool TNode::IsOfType() const noexcept {
+ return std::holds_alternative<T>(Value_);
+}
+
+template<typename T>
+inline T& TNode::As() {
+ return std::get<T>(Value_);
+}
+
+template<typename T>
+inline const T& TNode::As() const {
+ return std::get<T>(Value_);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NNodeCmp {
+ bool operator<(const TNode& lhs, const TNode& rhs);
+ bool operator<=(const TNode& lhs, const TNode& rhs);
+ bool operator>(const TNode& lhs, const TNode& rhs);
+ bool operator>=(const TNode& lhs, const TNode& rhs);
+ bool IsComparableType(const TNode::EType type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node_builder.cpp b/library/cpp/yson/node/node_builder.cpp
new file mode 100644
index 00000000000..b4431bc77af
--- /dev/null
+++ b/library/cpp/yson/node/node_builder.cpp
@@ -0,0 +1,96 @@
+#include "node_builder.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNodeBuilder::TNodeBuilder(TNode* node)
+{
+ Stack_.push(node);
+}
+
+void TNodeBuilder::OnStringScalar(TStringBuf value)
+{
+ AddNode(value, true);
+}
+
+void TNodeBuilder::OnInt64Scalar(i64 value)
+{
+ AddNode(value, true);
+}
+
+void TNodeBuilder::OnUint64Scalar(ui64 value)
+{
+ AddNode(value, true);
+}
+
+void TNodeBuilder::OnDoubleScalar(double value)
+{
+ AddNode(value, true);
+}
+
+void TNodeBuilder::OnBooleanScalar(bool value)
+{
+ AddNode(value, true);
+}
+
+void TNodeBuilder::OnEntity()
+{
+ AddNode(TNode::CreateEntity(), true);
+}
+
+void TNodeBuilder::OnBeginList()
+{
+ AddNode(TNode::CreateList(), false);
+}
+
+void TNodeBuilder::OnListItem()
+{
+ Stack_.push(&Stack_.top()->Add());
+}
+
+void TNodeBuilder::OnEndList()
+{
+ Stack_.pop();
+}
+
+void TNodeBuilder::OnBeginMap()
+{
+ AddNode(TNode::CreateMap(), false);
+}
+
+void TNodeBuilder::OnKeyedItem(TStringBuf key)
+{
+ Stack_.push(&(*Stack_.top())[TString(key)]);
+}
+
+void TNodeBuilder::OnEndMap()
+{
+ Stack_.pop();
+}
+
+void TNodeBuilder::OnBeginAttributes()
+{
+ Stack_.push(&Stack_.top()->Attributes());
+}
+
+void TNodeBuilder::OnEndAttributes()
+{
+ Stack_.pop();
+}
+
+void TNodeBuilder::OnNode(TNode node)
+{
+ AddNode(std::move(node), true);
+}
+
+void TNodeBuilder::AddNode(TNode value, bool pop)
+{
+ Stack_.top()->MoveWithoutAttributes(std::move(value));
+ if (pop)
+ Stack_.pop();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node_builder.h b/library/cpp/yson/node/node_builder.h
new file mode 100644
index 00000000000..69800016e09
--- /dev/null
+++ b/library/cpp/yson/node/node_builder.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "node.h"
+
+#include <library/cpp/json/json_reader.h>
+
+#include <library/cpp/yson/consumer.h>
+
+#include <util/generic/stack.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNodeBuilder
+ : public ::NYson::TYsonConsumerBase
+{
+public:
+ TNodeBuilder(TNode* node);
+
+ void OnStringScalar(TStringBuf) override;
+ void OnInt64Scalar(i64) override;
+ void OnUint64Scalar(ui64) override;
+ void OnDoubleScalar(double) override;
+ void OnBooleanScalar(bool) override;
+ void OnEntity() override;
+ void OnBeginList() override;
+ void OnListItem() override;
+ void OnEndList() override;
+ void OnBeginMap() override;
+ void OnKeyedItem(TStringBuf) override;
+ void OnEndMap() override;
+ void OnBeginAttributes() override;
+ void OnEndAttributes() override;
+ void OnNode(TNode node);
+
+private:
+ TStack<TNode*> Stack_;
+
+private:
+ inline void AddNode(TNode node, bool pop);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp
new file mode 100644
index 00000000000..294a7f7217f
--- /dev/null
+++ b/library/cpp/yson/node/node_io.cpp
@@ -0,0 +1,154 @@
+#include "node_io.h"
+
+#include "node_builder.h"
+#include "node_visitor.h"
+
+#include <library/cpp/yson/json/json_writer.h>
+#include <library/cpp/yson/parser.h>
+#include <library/cpp/yson/writer.h>
+#include <library/cpp/yson/json/yson2json_adapter.h>
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/json/json_value.h>
+
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+#include <util/stream/str.h>
+#include <util/stream/mem.h>
+
+namespace NYT {
+
+static void WalkJsonTree(const NJson::TJsonValue& jsonValue, NJson::TJsonCallbacks* callbacks)
+{
+ using namespace NJson;
+ switch (jsonValue.GetType()) {
+ case JSON_NULL:
+ callbacks->OnNull();
+ return;
+ case JSON_BOOLEAN:
+ callbacks->OnBoolean(jsonValue.GetBoolean());
+ return;
+ case JSON_INTEGER:
+ callbacks->OnInteger(jsonValue.GetInteger());
+ return;
+ case JSON_UINTEGER:
+ callbacks->OnUInteger(jsonValue.GetUInteger());
+ return;
+ case JSON_DOUBLE:
+ callbacks->OnDouble(jsonValue.GetDouble());
+ return;
+ case JSON_STRING:
+ callbacks->OnString(jsonValue.GetString());
+ return;
+ case JSON_MAP:
+ {
+ callbacks->OnOpenMap();
+ for (const auto& item : jsonValue.GetMap()) {
+ callbacks->OnMapKey(item.first);
+ WalkJsonTree(item.second, callbacks);
+ }
+ callbacks->OnCloseMap();
+ }
+ return;
+ case JSON_ARRAY:
+ {
+ callbacks->OnOpenArray();
+ for (const auto& item : jsonValue.GetArray()) {
+ WalkJsonTree(item, callbacks);
+ }
+ callbacks->OnCloseArray();
+ }
+ return;
+ case JSON_UNDEFINED:
+ ythrow yexception() << "cannot consume undefined json value";
+ return;
+ }
+ Y_UNREACHABLE();
+}
+
+static TNode CreateEmptyNodeByType(::NYson::EYsonType type)
+{
+ TNode result;
+ switch (type) {
+ case ::NYson::EYsonType::ListFragment:
+ result = TNode::CreateList();
+ break;
+ case ::NYson::EYsonType::MapFragment:
+ result = TNode::CreateMap();
+ break;
+ default:
+ break;
+ }
+ return result;
+}
+
+TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type)
+{
+ TMemoryInput stream(input);
+ return NodeFromYsonStream(&stream, type);
+}
+
+TString NodeToYsonString(const TNode& node, NYson::EYsonFormat format)
+{
+ TStringStream stream;
+ NodeToYsonStream(node, &stream, format);
+ return stream.Str();
+}
+
+TString NodeToCanonicalYsonString(const TNode& node, NYson::EYsonFormat format)
+{
+ TStringStream stream;
+ NodeToCanonicalYsonStream(node, &stream, format);
+ return stream.Str();
+}
+
+TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type)
+{
+ TNode result = CreateEmptyNodeByType(type);
+
+ TNodeBuilder builder(&result);
+ ::NYson::TYsonParser parser(&builder, input, type);
+ parser.Parse();
+ return result;
+}
+
+void NodeToYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format)
+{
+ ::NYson::TYsonWriter writer(output, format);
+ TNodeVisitor visitor(&writer);
+ visitor.Visit(node);
+}
+
+void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format)
+{
+ ::NYson::TYsonWriter writer(output, format);
+ TNodeVisitor visitor(&writer, /*sortMapKeys*/ true);
+ visitor.Visit(node);
+}
+
+TNode NodeFromJsonString(const TStringBuf input)
+{
+ TMemoryInput stream(input);
+
+ TNode result;
+
+ TNodeBuilder builder(&result);
+ TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true);
+ NJson::TJsonReaderConfig config;
+ config.DontValidateUtf8 = true;
+ NJson::ReadJson(&stream, &config, &callbacks);
+ return result;
+}
+
+TNode NodeFromJsonValue(const NJson::TJsonValue& input)
+{
+ TNode result;
+ TNodeBuilder builder(&result);
+ TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true);
+ WalkJsonTree(input, &callbacks);
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h
new file mode 100644
index 00000000000..2ad23b658f2
--- /dev/null
+++ b/library/cpp/yson/node/node_io.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "node.h"
+#include <library/cpp/yson/public.h>
+
+namespace NJson {
+ class TJsonValue;
+} // namespace NJson
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Parse TNode from string in YSON format
+TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type = ::NYson::EYsonType::Node);
+
+// Serialize TNode to string in one of YSON formats with random order of maps' keys (don't use in tests)
+TString NodeToYsonString(const TNode& node, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text);
+
+// Same as the latter, but maps' keys are sorted lexicographically (to be used in tests)
+TString NodeToCanonicalYsonString(const TNode& node, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text);
+
+// Parse TNode from stream in YSON format
+TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node);
+
+// Serialize TNode to stream in one of YSON formats with random order of maps' keys (don't use in tests)
+void NodeToYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text);
+
+// Same as the latter, but maps' keys are sorted lexicographically (to be used in tests)
+void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text);
+
+// Parse TNode from string in JSON format
+TNode NodeFromJsonString(const TStringBuf input);
+
+// Convert TJsonValue to TNode
+TNode NodeFromJsonValue(const NJson::TJsonValue& input);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node_ut.cpp b/library/cpp/yson/node/node_ut.cpp
new file mode 100644
index 00000000000..448e99f5753
--- /dev/null
+++ b/library/cpp/yson/node/node_ut.cpp
@@ -0,0 +1,484 @@
+#include "node.h"
+#include "node_io.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/ysaveload.h>
+
+using namespace NYT;
+
+template<>
+void Out<NYT::TNode>(IOutputStream& s, const NYT::TNode& node)
+{
+ s << "TNode:" << NodeToYsonString(node);
+}
+
+Y_UNIT_TEST_SUITE(YtNodeTest) {
+ Y_UNIT_TEST(TestConstsructors) {
+ TNode nodeEmpty;
+ UNIT_ASSERT_EQUAL(nodeEmpty.GetType(), TNode::Undefined);
+
+ TNode nodeString("foobar");
+ UNIT_ASSERT_EQUAL(nodeString.GetType(), TNode::String);
+ UNIT_ASSERT(nodeString.IsString());
+ UNIT_ASSERT_VALUES_EQUAL(nodeString.AsString(), "foobar");
+
+ TNode nodeInt(int(54));
+ UNIT_ASSERT_EQUAL(nodeInt.GetType(), TNode::Int64);
+ UNIT_ASSERT(nodeInt.IsInt64());
+ UNIT_ASSERT(!nodeInt.IsUint64());
+ UNIT_ASSERT_VALUES_EQUAL(nodeInt.AsInt64(), 54ull);
+
+ TNode nodeUint(ui64(42));
+ UNIT_ASSERT_EQUAL(nodeUint.GetType(), TNode::Uint64);
+ UNIT_ASSERT(nodeUint.IsUint64());
+ UNIT_ASSERT(!nodeUint.IsInt64());
+ UNIT_ASSERT_VALUES_EQUAL(nodeUint.AsUint64(), 42ull);
+
+ TNode nodeDouble(double(2.3));
+ UNIT_ASSERT_EQUAL(nodeDouble.GetType(), TNode::Double);
+ UNIT_ASSERT(nodeDouble.IsDouble());
+ UNIT_ASSERT_VALUES_EQUAL(nodeDouble.AsDouble(), double(2.3));
+
+ TNode nodeBool(true);
+ UNIT_ASSERT_EQUAL(nodeBool.GetType(), TNode::Bool);
+ UNIT_ASSERT(nodeBool.IsBool());
+ UNIT_ASSERT_VALUES_EQUAL(nodeBool.AsBool(), true);
+
+ TNode nodeEntity = TNode::CreateEntity();
+ UNIT_ASSERT_EQUAL(nodeEntity.GetType(), TNode::Null);
+ UNIT_ASSERT(nodeEntity.IsEntity());
+ }
+
+ Y_UNIT_TEST(TestPredicates) {
+ const TNode undefinedNode;
+ UNIT_ASSERT(undefinedNode.IsUndefined());
+ UNIT_ASSERT(!undefinedNode.IsNull());
+ UNIT_ASSERT(!undefinedNode.HasValue());
+
+ const TNode nullNode = TNode::CreateEntity();
+ UNIT_ASSERT(!nullNode.IsUndefined());
+ UNIT_ASSERT(nullNode.IsNull());
+ UNIT_ASSERT(!nullNode.HasValue());
+
+ const TNode intNode(int(64));
+ UNIT_ASSERT(!intNode.IsUndefined());
+ UNIT_ASSERT(!intNode.IsNull());
+ UNIT_ASSERT(intNode.HasValue());
+
+ const TNode stringNode("blah");
+ UNIT_ASSERT(!stringNode.IsUndefined());
+ UNIT_ASSERT(!stringNode.IsNull());
+ UNIT_ASSERT(stringNode.HasValue());
+ }
+
+ Y_UNIT_TEST(TestComplexConstructors) {
+ const TNode listNode = TNode::CreateList({"one", 2, "tree"});
+ const auto expectedListValue = std::vector<TNode>({"one", 2, "tree"});
+ UNIT_ASSERT_VALUES_EQUAL(listNode.AsList(), expectedListValue);
+
+ const TNode mapNode = TNode::CreateMap({{"one", 1}, {"two", 2u}});
+ const auto expectedMapValue = THashMap<TString, TNode>({{"one", 1}, {"two", 2u}});
+ UNIT_ASSERT_VALUES_EQUAL(mapNode.AsMap(), expectedMapValue);
+ }
+
+ Y_UNIT_TEST(TestNodeMap) {
+ TNode nodeMap = TNode()("foo", "bar")("bar", "baz");
+ UNIT_ASSERT(nodeMap.IsMap());
+ UNIT_ASSERT_EQUAL(nodeMap.GetType(), TNode::Map);
+ UNIT_ASSERT_VALUES_EQUAL(nodeMap.Size(), 2);
+
+ UNIT_ASSERT(nodeMap.HasKey("foo"));
+ UNIT_ASSERT(!nodeMap.HasKey("42"));
+ UNIT_ASSERT_EQUAL(nodeMap["foo"], TNode("bar"));
+ UNIT_ASSERT_EQUAL(nodeMap["bar"], TNode("baz"));
+
+ // const version of operator[]
+ UNIT_ASSERT_EQUAL(static_cast<const TNode&>(nodeMap)["42"].GetType(), TNode::Undefined);
+ UNIT_ASSERT(!nodeMap.HasKey("42"));
+
+ // nonconst version of operator[]
+ UNIT_ASSERT_EQUAL(nodeMap["42"].GetType(), TNode::Undefined);
+ UNIT_ASSERT(nodeMap.HasKey("42"));
+
+ nodeMap("rock!!!", TNode()
+ ("Pink", "Floyd")
+ ("Purple", "Deep"));
+
+ TNode copyNode;
+ copyNode = nodeMap;
+ UNIT_ASSERT_EQUAL(copyNode["foo"], TNode("bar"));
+ UNIT_ASSERT_EQUAL(copyNode["bar"], TNode("baz"));
+ UNIT_ASSERT(copyNode["42"].GetType() == TNode::Undefined);
+ UNIT_ASSERT_EQUAL(copyNode["rock!!!"]["Purple"], TNode("Deep"));
+ }
+
+ Y_UNIT_TEST(TestNodeList) {
+ TNode nodeList = TNode().Add("foo").Add(42).Add(3.14);
+ UNIT_ASSERT(nodeList.IsList());
+ UNIT_ASSERT_EQUAL(nodeList.GetType(), TNode::List);
+ UNIT_ASSERT_VALUES_EQUAL(nodeList.Size(), 3);
+
+ UNIT_ASSERT_EQUAL(nodeList[1], TNode(42));
+ nodeList.Add(TNode().Add("ls").Add("pwd"));
+
+ TNode copyNode;
+ copyNode = nodeList;
+ UNIT_ASSERT_EQUAL(copyNode[0], TNode("foo"));
+ UNIT_ASSERT_EQUAL(copyNode[3][1], TNode("pwd"));
+ }
+
+ Y_UNIT_TEST(TestInsertingMethodsFromTemporaryObjects) {
+ // check that .Add(...) doesn't return lvalue reference to temporary object
+ {
+ const TNode& nodeList = TNode().Add(0).Add("pass").Add(0);
+ UNIT_ASSERT_EQUAL(nodeList[1], TNode("pass"));
+ }
+
+ // check that .operator()(...) doesn't return lvalue reference to temporary object
+ {
+ const TNode& nodeMap = TNode()("1", 0)("2", "pass")("3", 0);
+ UNIT_ASSERT_EQUAL(nodeMap["2"], TNode("pass"));
+ }
+ }
+
+ Y_UNIT_TEST(TestAttributes) {
+ TNode node = TNode()("lee", 42)("faa", 54);
+ UNIT_ASSERT(!node.HasAttributes());
+ node.Attributes()("foo", true)("bar", false);
+ UNIT_ASSERT(node.HasAttributes());
+
+ {
+ TNode copyNode;
+ UNIT_ASSERT(!copyNode.HasAttributes());
+ copyNode = node;
+ UNIT_ASSERT(copyNode.HasAttributes());
+ UNIT_ASSERT_EQUAL(copyNode.GetAttributes()["foo"], TNode(true));
+ }
+
+ {
+ TNode movedWithoutAttributes(42);
+ movedWithoutAttributes.Attributes()("one", 1)("two", 2);
+ movedWithoutAttributes.MoveWithoutAttributes(TNode(node));
+ UNIT_ASSERT(movedWithoutAttributes.IsMap());
+ UNIT_ASSERT_EQUAL(movedWithoutAttributes["lee"], TNode(42));
+ UNIT_ASSERT_EQUAL(movedWithoutAttributes.GetAttributes()["one"], TNode(1));
+ UNIT_ASSERT(!movedWithoutAttributes.GetAttributes().HasKey("foo"));
+ }
+
+ {
+ TNode copyNode = node;
+ UNIT_ASSERT(copyNode.HasAttributes());
+ UNIT_ASSERT(copyNode.GetAttributes().HasKey("foo"));
+ copyNode.ClearAttributes();
+ UNIT_ASSERT(!copyNode.HasAttributes());
+ UNIT_ASSERT(!copyNode.GetAttributes().HasKey("foo"));
+ }
+
+ {
+ TNode copyNode = node;
+ UNIT_ASSERT(copyNode.HasAttributes());
+ UNIT_ASSERT(copyNode.GetAttributes().HasKey("foo"));
+ copyNode.Clear();
+ UNIT_ASSERT(!copyNode.HasAttributes());
+ UNIT_ASSERT(!copyNode.GetAttributes().HasKey("foo"));
+ }
+ }
+
+ Y_UNIT_TEST(TestEq) {
+ TNode nodeNoAttributes = TNode()("lee", 42)("faa", 54);
+ TNode node = nodeNoAttributes;
+ node.Attributes()("foo", true)("bar", false);
+ UNIT_ASSERT(node != nodeNoAttributes);
+ UNIT_ASSERT(nodeNoAttributes != node);
+ TNode copyNode = node;
+ UNIT_ASSERT(copyNode == node);
+ UNIT_ASSERT(node == copyNode);
+ }
+
+ Y_UNIT_TEST(TestComparison) {
+ using namespace NYT::NNodeCmp;
+ {
+ TNode nodeNoAttributes = TNode()("lee", 42)("faa", 54);
+ TNode node = nodeNoAttributes;
+ node.Attributes()("foo", true)("bar", false);
+ UNIT_ASSERT_EXCEPTION(node > nodeNoAttributes, TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(node >= nodeNoAttributes, TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(nodeNoAttributes < node, TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(nodeNoAttributes <= node, TNode::TTypeError);
+ }
+ {
+ TNode nodeMap = TNode()("map", 23);
+ TNode nodeList = TNode::CreateList();
+ UNIT_ASSERT_EXCEPTION(nodeList > nodeMap, TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(nodeMap < nodeList, TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(nodeMap >= nodeMap, TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(nodeList <= nodeList, TNode::TTypeError);
+ }
+ {
+ TNode node1("aaa");
+ TNode node2("bbb");
+ TNode node3("ccc");
+ UNIT_ASSERT(node1 < node2);
+ UNIT_ASSERT(node1 <= node2);
+ UNIT_ASSERT(node1 < node3);
+ UNIT_ASSERT(node1 <= node3);
+ UNIT_ASSERT(!(node3 < node1));
+ UNIT_ASSERT(!(node1 > node3));
+ UNIT_ASSERT(!(node3 <= node1));
+ UNIT_ASSERT(!(node1 >= node3));
+
+ UNIT_ASSERT(node3 > node2);
+ UNIT_ASSERT(node3 >= node2);
+ UNIT_ASSERT(node3 > node1);
+ UNIT_ASSERT(node3 >= node1);
+
+ UNIT_ASSERT(node1 <= node1);
+ UNIT_ASSERT(node1 >= node1);
+ }
+ {
+ TNode node1(23);
+ TNode node2("bbb");
+ TNode node3 = TNode::CreateEntity();
+
+ UNIT_ASSERT(node1 > node2);
+ UNIT_ASSERT(node1 >= node2);
+ UNIT_ASSERT(node2 < node1);
+ UNIT_ASSERT(node2 <= node1);
+
+ UNIT_ASSERT(!(node1 < node2));
+ UNIT_ASSERT(!(node1 <= node2));
+ UNIT_ASSERT(!(node2 > node1));
+ UNIT_ASSERT(!(node2 >= node1));
+
+ UNIT_ASSERT(node1 < node3);
+ UNIT_ASSERT(node2 < node3);
+ UNIT_ASSERT(node3 <= node3);
+ UNIT_ASSERT(!(node3 < node3));
+ UNIT_ASSERT(!(node3 > node3));
+ UNIT_ASSERT(!(node2 >= node3));
+ }
+ }
+
+ Y_UNIT_TEST(TestSaveLoad) {
+ TNode node = TNode()("foo", "bar")("baz", 42);
+ node.Attributes()["attr_name"] = "attr_value";
+
+ TString bytes;
+ {
+ TStringOutput s(bytes);
+ ::Save(&s, node);
+ }
+
+ TNode nodeCopy;
+ {
+ TStringInput s(bytes);
+ ::Load(&s, nodeCopy);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy);
+ }
+
+ Y_UNIT_TEST(TestIntCast) {
+ TNode node = 1ull << 31;
+ UNIT_ASSERT(node.IsUint64());
+ UNIT_ASSERT_EXCEPTION(node.IntCast<i32>(), TNode::TTypeError);
+ UNIT_ASSERT(node.IntCast<ui32>() == static_cast<ui32>(node.AsUint64()));
+ UNIT_ASSERT(node.IntCast<i64>() == static_cast<i64>(node.AsUint64()));
+ UNIT_ASSERT(node.IntCast<ui64>() == node.AsUint64());
+
+ node = 1ull << 63;
+ UNIT_ASSERT(node.IsUint64());
+ UNIT_ASSERT_EXCEPTION(node.IntCast<i64>(), TNode::TTypeError);
+ UNIT_ASSERT(node.IntCast<ui64>() == node.AsUint64());
+
+ node = 12345;
+ UNIT_ASSERT(node.IsInt64());
+ UNIT_ASSERT_EXCEPTION(node.IntCast<i8>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(node.IntCast<ui8>(), TNode::TTypeError);
+ UNIT_ASSERT(node.IntCast<i16>() == static_cast<i16>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<ui16>() == static_cast<ui16>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<i32>() == static_cast<i32>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<ui32>() == static_cast<ui32>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<i64>() == node.AsInt64());
+ UNIT_ASSERT(node.IntCast<ui64>() == static_cast<ui64>(node.AsInt64()));
+
+ node = -5;
+ UNIT_ASSERT(node.IsInt64());
+ UNIT_ASSERT(node.IntCast<i8>() == static_cast<i8>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<i16>() == static_cast<i16>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<i32>() == static_cast<i32>(node.AsInt64()));
+ UNIT_ASSERT(node.IntCast<i64>() == node.AsInt64());
+ UNIT_ASSERT_EXCEPTION(node.IntCast<ui8>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(node.IntCast<ui16>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(node.IntCast<ui32>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(node.IntCast<ui64>(), TNode::TTypeError);
+ }
+
+ Y_UNIT_TEST(TestConvertToString) {
+ UNIT_ASSERT_VALUES_EQUAL(TNode(5).ConvertTo<TString>(), "5");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(123432423).ConvertTo<TString>(), "123432423");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(123456789012345678ll).ConvertTo<TString>(), "123456789012345678");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(123456789012345678ull).ConvertTo<TString>(), "123456789012345678");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(-123456789012345678ll).ConvertTo<TString>(), "-123456789012345678");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(true).ConvertTo<TString>(), "1");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(false).ConvertTo<TString>(), "0");
+ UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<TString>(), "5.3");
+ }
+
+ Y_UNIT_TEST(TestConvertFromString) {
+ UNIT_ASSERT_VALUES_EQUAL(TNode("123456789012345678").ConvertTo<ui64>(), 123456789012345678ull);
+ UNIT_ASSERT_VALUES_EQUAL(TNode("123456789012345678").ConvertTo<i64>(), 123456789012345678);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(ToString(1ull << 63)).ConvertTo<ui64>(), 1ull << 63);
+ UNIT_ASSERT_EXCEPTION(TNode(ToString(1ull << 63)).ConvertTo<i64>(), TFromStringException);
+ UNIT_ASSERT_VALUES_EQUAL(TNode("5.34").ConvertTo<double>(), 5.34);
+ }
+
+ Y_UNIT_TEST(TestConvertDoubleInt) {
+ UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<i8>(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<ui8>(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<i64>(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(5.3).ConvertTo<ui64>(), 5);
+
+ UNIT_ASSERT_VALUES_EQUAL(TNode(-5.3).ConvertTo<i8>(), -5);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(-5.3).ConvertTo<i64>(), -5);
+ UNIT_ASSERT_EXCEPTION(TNode(-5.3).ConvertTo<ui8>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(TNode(-5.3).ConvertTo<ui64>(), TNode::TTypeError);
+
+ UNIT_ASSERT_VALUES_EQUAL(TNode(127.0).ConvertTo<i8>(), 127);
+ UNIT_ASSERT_EXCEPTION(TNode(128.0).ConvertTo<i8>(), TNode::TTypeError);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(255.0).ConvertTo<ui8>(), 255);
+ UNIT_ASSERT_EXCEPTION(TNode(256.0).ConvertTo<ui8>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(TNode(1e100).ConvertTo<i64>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(TNode(1e100).ConvertTo<ui64>(), TNode::TTypeError);
+ {
+ double v = 1ull << 63;
+ TNode node = v;
+ UNIT_ASSERT(node.IsDouble());
+ UNIT_ASSERT_EXCEPTION(node.ConvertTo<i64>(), TNode::TTypeError);
+ UNIT_ASSERT_VALUES_EQUAL(node.ConvertTo<ui64>(), static_cast<ui64>(v));
+ }
+ {
+ double v = (double)(1ull << 63) + (1ull << 63);
+ TNode node = v;
+ UNIT_ASSERT(node.IsDouble());
+ UNIT_ASSERT_EXCEPTION(node.ConvertTo<i64>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(node.ConvertTo<ui64>(), TNode::TTypeError);
+ }
+ UNIT_ASSERT_EXCEPTION(TNode(NAN).ConvertTo<ui64>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(TNode(NAN).ConvertTo<i64>(), TNode::TTypeError);
+
+ UNIT_ASSERT_EXCEPTION(TNode(INFINITY).ConvertTo<ui64>(), TNode::TTypeError);
+ UNIT_ASSERT_EXCEPTION(TNode(INFINITY).ConvertTo<i64>(), TNode::TTypeError);
+ }
+
+ Y_UNIT_TEST(TestConvertToBool) {
+ UNIT_ASSERT_VALUES_EQUAL(TNode("true").ConvertTo<bool>(), true);
+ UNIT_ASSERT_VALUES_EQUAL(TNode("TRUE").ConvertTo<bool>(), true);
+ UNIT_ASSERT_VALUES_EQUAL(TNode("false").ConvertTo<bool>(), false);
+ UNIT_ASSERT_VALUES_EQUAL(TNode("FALSE").ConvertTo<bool>(), false);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(1).ConvertTo<bool>(), true);
+ UNIT_ASSERT_VALUES_EQUAL(TNode(0).ConvertTo<bool>(), false);
+ UNIT_ASSERT_EXCEPTION(TNode("random").ConvertTo<bool>(), TFromStringException);
+ UNIT_ASSERT_EXCEPTION(TNode("").ConvertTo<bool>(), TFromStringException);
+ }
+
+ Y_UNIT_TEST(TestCanonicalSerialization) {
+ auto node = TNode()
+ ("ca", "ca")("c", "c")("a", "a")("b", "b")
+ ("bb", TNode()
+ ("ii", "ii")("i", "i")("jj", "jj"));
+ node.Attributes() = TNode()("za", "za")("z", "z")("xxx", "xxx")("xx", "xx");
+ UNIT_ASSERT_VALUES_EQUAL(NodeToCanonicalYsonString(node),
+ "<\"xx\"=\"xx\";\"xxx\"=\"xxx\";\"z\"=\"z\";\"za\"=\"za\">"
+ "{\"a\"=\"a\";\"b\"=\"b\";\"bb\"="
+ "{\"i\"=\"i\";\"ii\"=\"ii\";\"jj\"=\"jj\"};"
+ "\"c\"=\"c\";\"ca\"=\"ca\"}");
+ }
+
+ Y_UNIT_TEST(OperatorEqualSubnode) {
+ TNode node;
+ node["a"]["b"] = "c";
+
+ node = node["a"];
+ node = node["b"];
+
+ UNIT_ASSERT_VALUES_EQUAL(node.AsString(), "c");
+ }
+
+ Y_UNIT_TEST(TestMapGetters) {
+ auto node = TNode::CreateMap()
+ ("string", "7")
+ ("int64", 3)
+ ("uint64", 5u)
+ ("double", -3.5)
+ ("list", TNode::CreateList().Add(5))
+ ("map", TNode::CreateMap()("key", "value"));
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>("string"), "7");
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsString("string"), "7");
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<i64>("string"), 7);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<i64>("int64"), 3);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsInt64("int64"), 3);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<ui64>("int64"), 3u);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<ui64>("uint64"), 5u);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsUint64("uint64"), 5u);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<i64>("uint64"), 5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>("uint64"), "5");
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<double>("double"), -3.5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsDouble("double"), -3.5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>("double"), "-3.5");
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TListType>("list")[0].AsInt64(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsList("list")[0].AsInt64(), 5);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TMapType>("map")["key"].AsString(), "value");
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsMap("map")["key"].AsString(), "value");
+
+ // mutable accessor
+ auto& childString = node.ChildAs<TString>("string");
+ childString = "yaddayadda";
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>("string"), "yaddayadda");
+ }
+
+ Y_UNIT_TEST(TestListGetters) {
+ auto node = TNode::CreateList()
+ .Add("7")
+ .Add(3)
+ .Add(5u)
+ .Add(-3.5)
+ .Add(TNode::CreateList().Add(5))
+ .Add(TNode::CreateMap()("key", "value"));
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>(0), "7");
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsString(0), "7");
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<i64>(0), 7);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<i64>(1), 3);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsInt64(1), 3);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<ui64>(1), 3u);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<ui64>(2), 5u);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsUint64(2), 5u);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildIntCast<i64>(2), 5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>(2), "5");
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<double>(3), -3.5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsDouble(3), -3.5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildConvertTo<TString>(3), "-3.5");
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TListType>(4)[0].AsInt64(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsList(4)[0].AsInt64(), 5);
+
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TNode::TMapType>(5)["key"].AsString(), "value");
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAsMap(5)["key"].AsString(), "value");
+
+ // mutable accessor
+ auto& childString = node.ChildAs<TString>(0);
+ childString = "yaddayadda";
+ UNIT_ASSERT_VALUES_EQUAL(node.ChildAs<TString>(0), "yaddayadda");
+ }
+}
diff --git a/library/cpp/yson/node/node_visitor.cpp b/library/cpp/yson/node/node_visitor.cpp
new file mode 100644
index 00000000000..899fbfa02aa
--- /dev/null
+++ b/library/cpp/yson/node/node_visitor.cpp
@@ -0,0 +1,152 @@
+#include "node_visitor.h"
+
+#include <util/generic/algorithm.h>
+#include <util/string/printf.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+template <typename Fun>
+void Iterate(const TNode::TMapType& nodeMap, bool sortByKey, Fun action)
+{
+ if (sortByKey) {
+ TVector<TNode::TMapType::const_iterator> iterators;
+ for (auto it = nodeMap.begin(); it != nodeMap.end(); ++it) {
+ iterators.push_back(it);
+ }
+ SortBy(iterators, [](TNode::TMapType::const_iterator it) { return it->first; });
+ for (const auto& it : iterators) {
+ action(*it);
+ }
+ } else {
+ ForEach(nodeMap.begin(), nodeMap.end(), action);
+ }
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+TNodeVisitor::TNodeVisitor(NYson::IYsonConsumer* consumer, bool sortMapKeys)
+ : Consumer_(consumer)
+ , SortMapKeys_(sortMapKeys)
+{ }
+
+void TNodeVisitor::Visit(const TNode& node)
+{
+ VisitAny(node);
+}
+
+void TNodeVisitor::VisitAny(const TNode& node)
+{
+ if (node.HasAttributes()) {
+ Consumer_->OnBeginAttributes();
+ Iterate(node.GetAttributes().AsMap(), SortMapKeys_, [&](const std::pair<TString, TNode>& item) {
+ Consumer_->OnKeyedItem(item.first);
+ if (item.second.IsUndefined()) {
+ ythrow TNode::TTypeError() << "unable to visit attribute value of type "
+ << TNode::EType::Undefined << "; attribute name: `" << item.first << '\'' ;
+ }
+ VisitAny(item.second);
+ });
+ Consumer_->OnEndAttributes();
+ }
+
+ switch (node.GetType()) {
+ case TNode::String:
+ VisitString(node);
+ break;
+ case TNode::Int64:
+ VisitInt64(node);
+ break;
+ case TNode::Uint64:
+ VisitUint64(node);
+ break;
+ case TNode::Double:
+ VisitDouble(node);
+ break;
+ case TNode::Bool:
+ VisitBool(node);
+ break;
+ case TNode::List:
+ VisitList(node.AsList());
+ break;
+ case TNode::Map:
+ VisitMap(node.AsMap());
+ break;
+ case TNode::Null:
+ VisitEntity();
+ break;
+ case TNode::Undefined:
+ ythrow TNode::TTypeError() << "unable to visit TNode of type " << node.GetType();
+ default:
+ Y_FAIL("Unexpected type: %d", node.GetType());
+ }
+}
+
+void TNodeVisitor::VisitString(const TNode& node)
+{
+ Consumer_->OnStringScalar(node.AsString());
+}
+
+void TNodeVisitor::VisitInt64(const TNode& node)
+{
+ Consumer_->OnInt64Scalar(node.AsInt64());
+}
+
+void TNodeVisitor::VisitUint64(const TNode& node)
+{
+ Consumer_->OnUint64Scalar(node.AsUint64());
+}
+
+void TNodeVisitor::VisitDouble(const TNode& node)
+{
+ Consumer_->OnDoubleScalar(node.AsDouble());
+}
+
+void TNodeVisitor::VisitBool(const TNode& node)
+{
+ Consumer_->OnBooleanScalar(node.AsBool());
+}
+
+void TNodeVisitor::VisitList(const TNode::TListType& nodeList)
+{
+ Consumer_->OnBeginList();
+ size_t index = 0;
+ for (const auto& item : nodeList) {
+ Consumer_->OnListItem();
+ if (item.IsUndefined()) {
+ ythrow TNode::TTypeError() << "unable to visit list node child of type "
+ << TNode::EType::Undefined << "; list index: " << index;
+ }
+ VisitAny(item);
+ ++index;
+ }
+ Consumer_->OnEndList();
+}
+
+void TNodeVisitor::VisitMap(const TNode::TMapType& nodeMap)
+{
+ Consumer_->OnBeginMap();
+ Iterate(nodeMap, SortMapKeys_, [&](const std::pair<TString, TNode>& item) {
+ Consumer_->OnKeyedItem(item.first);
+ if (item.second.IsUndefined()) {
+ ythrow TNode::TTypeError() << "unable to visit map node child of type "
+ << TNode::EType::Undefined << "; map key: `" << item.first << '\'' ;
+ }
+ VisitAny(item.second);
+ });
+ Consumer_->OnEndMap();
+}
+
+void TNodeVisitor::VisitEntity()
+{
+ Consumer_->OnEntity();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/node_visitor.h b/library/cpp/yson/node/node_visitor.h
new file mode 100644
index 00000000000..db258323092
--- /dev/null
+++ b/library/cpp/yson/node/node_visitor.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "node.h"
+
+#include <library/cpp/yson/consumer.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TNodeVisitor
+{
+public:
+ TNodeVisitor(NYson::IYsonConsumer* consumer, bool sortMapKeys = false);
+
+ void Visit(const TNode& node);
+ void VisitMap(const TNode::TMapType& nodeMap);
+ void VisitList(const TNode::TListType& nodeMap);
+
+private:
+ NYson::IYsonConsumer* Consumer_;
+ bool SortMapKeys_;
+
+private:
+ void VisitAny(const TNode& node);
+
+ void VisitString(const TNode& node);
+ void VisitInt64(const TNode& node);
+ void VisitUint64(const TNode& node);
+ void VisitDouble(const TNode& node);
+ void VisitBool(const TNode& node);
+ void VisitEntity();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/pybind/node.cpp b/library/cpp/yson/node/pybind/node.cpp
new file mode 100644
index 00000000000..79beba36471
--- /dev/null
+++ b/library/cpp/yson/node/pybind/node.cpp
@@ -0,0 +1,105 @@
+#include "node.h"
+
+#include <library/cpp/yson/node/node.h>
+
+#include <library/cpp/pybind/cast.h>
+
+#include <Python.h>
+
+namespace NYT {
+
+ PyObject* BuildPyObject(const TNode& node) {
+ switch (node.GetType()) {
+ case TNode::Bool:
+ return NPyBind::BuildPyObject(node.AsBool());
+ case TNode::Int64:
+ return NPyBind::BuildPyObject(node.AsInt64());
+ case TNode::Uint64:
+ return NPyBind::BuildPyObject(node.AsUint64());
+ case TNode::Double:
+ return NPyBind::BuildPyObject(node.AsDouble());
+ case TNode::String:
+ return NPyBind::BuildPyObject(node.AsString());
+ case TNode::List:
+ return NPyBind::BuildPyObject(node.AsList());
+ case TNode::Map:
+ return NPyBind::BuildPyObject(node.AsMap());
+ case TNode::Null:
+ Py_RETURN_NONE;
+ case TNode::Undefined:
+ ythrow TNode::TTypeError() << "BuildPyObject called for undefined TNode";
+ }
+ }
+
+} // namespace NYT
+
+namespace NPyBind {
+
+ template <>
+ bool FromPyObject(PyObject* obj, NYT::TNode& res) {
+ if (obj == Py_None) {
+ res = NYT::TNode::CreateEntity();
+ return true;
+ }
+ if (PyBool_Check(obj)) {
+ res = false;
+ return FromPyObject(obj, res.As<bool>());
+ }
+ if (PyFloat_Check(obj)) {
+ res = 0.0;
+ return FromPyObject(obj, res.As<double>());
+ }
+#if PY_MAJOR_VERSION < 3
+ if (PyString_Check(obj)) {
+ res = TString();
+ return FromPyObject(obj, res.As<TString>());
+ }
+#else
+ if (PyUnicode_Check(obj)) {
+ res = TString();
+ return FromPyObject(obj, res.As<TString>());
+ }
+ if (PyBytes_Check(obj)) {
+ res = TString();
+ return FromPyObject(obj, res.As<TString>());
+ }
+#endif
+ if (PyList_Check(obj)) {
+ res = NYT::TNode::CreateList();
+ return FromPyObject(obj, res.AsList());
+ }
+ if (PyDict_Check(obj)) {
+ res = NYT::TNode::CreateMap();
+ return FromPyObject(obj, res.AsMap());
+ }
+#if PY_MAJOR_VERSION < 3
+ if (PyInt_Check(obj)) {
+ auto valAsLong = PyInt_AsLong(obj);
+ if (valAsLong == -1 && PyErr_Occurred()) {
+ return false;
+ }
+ res = valAsLong;
+ return true;
+ }
+#endif
+ if (PyLong_Check(obj)) {
+ int overflow = 0;
+ auto valAsLong = PyLong_AsLongAndOverflow(obj, &overflow);
+ if (!overflow) {
+ if (valAsLong == -1 && PyErr_Occurred()) {
+ return false;
+ }
+ res = valAsLong;
+ return true;
+ }
+ auto valAsULong = PyLong_AsUnsignedLong(obj);
+ if (valAsULong == static_cast<decltype(valAsULong)>(-1) && PyErr_Occurred()) {
+ return false;
+ }
+ res = valAsULong;
+ return true;
+ }
+ return false;
+ }
+
+} // namespace NPyBind
diff --git a/library/cpp/yson/node/pybind/node.h b/library/cpp/yson/node/pybind/node.h
new file mode 100644
index 00000000000..65f7236de68
--- /dev/null
+++ b/library/cpp/yson/node/pybind/node.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <Python.h>
+
+#include <library/cpp/yson/node/node.h>
+
+namespace NYT {
+ PyObject* BuildPyObject(const TNode& val);
+}
diff --git a/library/cpp/yson/node/pybind/ya.make b/library/cpp/yson/node/pybind/ya.make
new file mode 100644
index 00000000000..97b7583e96f
--- /dev/null
+++ b/library/cpp/yson/node/pybind/ya.make
@@ -0,0 +1,16 @@
+PY23_NATIVE_LIBRARY()
+
+OWNER(
+ inngonch
+ g:yt
+)
+
+PEERDIR(
+ library/cpp/pybind
+ library/cpp/yson/node
+)
+SRCS(
+ node.cpp
+)
+
+END()
diff --git a/library/cpp/yson/node/serialize.cpp b/library/cpp/yson/node/serialize.cpp
new file mode 100644
index 00000000000..aeb467622bd
--- /dev/null
+++ b/library/cpp/yson/node/serialize.cpp
@@ -0,0 +1,101 @@
+#include "serialize.h"
+
+#include "node_visitor.h"
+
+#include <library/cpp/yson/consumer.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TString& value, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnStringScalar(value);
+}
+
+void Serialize(const TStringBuf& value, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnStringScalar(value);
+}
+
+void Serialize(const char* value, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnStringScalar(value);
+}
+
+void Deserialize(TString& value, const TNode& node)
+{
+ value = node.AsString();
+}
+
+#define SERIALIZE_SIGNED(type) \
+void Serialize(type value, NYson::IYsonConsumer* consumer) \
+{ \
+ consumer->OnInt64Scalar(static_cast<i64>(value)); \
+}
+
+#define SERIALIZE_UNSIGNED(type) \
+void Serialize(type value, NYson::IYsonConsumer* consumer) \
+{ \
+ consumer->OnUint64Scalar(static_cast<ui64>(value)); \
+}
+
+SERIALIZE_SIGNED(signed char);
+SERIALIZE_SIGNED(short);
+SERIALIZE_SIGNED(int);
+SERIALIZE_SIGNED(long);
+SERIALIZE_SIGNED(long long);
+
+SERIALIZE_UNSIGNED(unsigned char);
+SERIALIZE_UNSIGNED(unsigned short);
+SERIALIZE_UNSIGNED(unsigned int);
+SERIALIZE_UNSIGNED(unsigned long);
+SERIALIZE_UNSIGNED(unsigned long long);
+
+#undef SERIALIZE_SIGNED
+#undef SERIALIZE_UNSIGNED
+
+void Deserialize(i64& value, const TNode& node)
+{
+ value = node.AsInt64();
+}
+
+void Deserialize(ui64& value, const TNode& node)
+{
+ value = node.AsUint64();
+}
+
+void Serialize(double value, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnDoubleScalar(value);
+}
+
+void Deserialize(double& value, const TNode& node)
+{
+ value = node.AsDouble();
+}
+
+void Serialize(bool value, NYson::IYsonConsumer* consumer)
+{
+ consumer->OnBooleanScalar(value);
+}
+
+void Deserialize(bool& value, const TNode& node)
+{
+ value = node.AsBool();
+}
+
+void Serialize(const TNode& node, NYson::IYsonConsumer* consumer)
+{
+ TNodeVisitor visitor(consumer);
+ visitor.Visit(node);
+}
+
+void Deserialize(TNode& value, const TNode& node)
+{
+ value = node;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/serialize.h b/library/cpp/yson/node/serialize.h
new file mode 100644
index 00000000000..99b598a44c3
--- /dev/null
+++ b/library/cpp/yson/node/serialize.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "node.h"
+
+namespace NYT {
+
+namespace NYson {
+struct IYsonConsumer;
+} // namespace NYson
+
+////////////////////////////////////////////////////////////////////////////////
+
+void Serialize(const TString& value, NYson::IYsonConsumer* consumer);
+void Serialize(const TStringBuf& value, NYson::IYsonConsumer* consumer);
+void Serialize(const char* value, NYson::IYsonConsumer* consumer);
+void Deserialize(TString& value, const TNode& node);
+
+void Serialize(signed char value, NYson::IYsonConsumer* consumer);
+void Serialize(short value, NYson::IYsonConsumer* consumer);
+void Serialize(int value, NYson::IYsonConsumer* consumer);
+void Serialize(long value, NYson::IYsonConsumer* consumer);
+void Serialize(long long value, NYson::IYsonConsumer* consumer);
+void Deserialize(i64& value, const TNode& node);
+
+void Serialize(unsigned char value, NYson::IYsonConsumer* consumer);
+void Serialize(unsigned short value, NYson::IYsonConsumer* consumer);
+void Serialize(unsigned int value, NYson::IYsonConsumer* consumer);
+void Serialize(unsigned long value, NYson::IYsonConsumer* consumer);
+void Serialize(unsigned long long value, NYson::IYsonConsumer* consumer);
+void Deserialize(ui64& value, const TNode& node);
+
+void Serialize(double value, NYson::IYsonConsumer* consumer);
+void Deserialize(double& value, const TNode& node);
+
+void Serialize(bool value, NYson::IYsonConsumer* consumer);
+void Deserialize(bool& value, const TNode& node);
+
+void Serialize(const TNode& node, NYson::IYsonConsumer* consumer);
+void Deserialize(TNode& value, const TNode& node);
+
+void Serialize(const THashMap<TString, TString>& renameColumns, NYson::IYsonConsumer* consumer);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yson/node/ut/ya.make b/library/cpp/yson/node/ut/ya.make
new file mode 100644
index 00000000000..f49a0bf7dfb
--- /dev/null
+++ b/library/cpp/yson/node/ut/ya.make
@@ -0,0 +1,12 @@
+UNITTEST_FOR(library/cpp/yson/node)
+
+OWNER(
+ ermolovd
+ g:yt
+)
+
+SRCS(
+ node_ut.cpp
+)
+
+END()
diff --git a/library/cpp/yson/node/ya.make b/library/cpp/yson/node/ya.make
new file mode 100644
index 00000000000..a082b293c46
--- /dev/null
+++ b/library/cpp/yson/node/ya.make
@@ -0,0 +1,25 @@
+LIBRARY()
+
+GENERATE_ENUM_SERIALIZATION(node.h)
+
+PEERDIR(
+ library/cpp/yson
+ library/cpp/yson/json
+)
+
+OWNER(
+ ermolovd
+ g:yt
+)
+
+SRCS(
+ node.cpp
+ node_io.cpp
+ node_builder.cpp
+ node_visitor.cpp
+ serialize.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(ut)
diff --git a/library/cpp/yson/parser.cpp b/library/cpp/yson/parser.cpp
new file mode 100644
index 00000000000..783f9b90479
--- /dev/null
+++ b/library/cpp/yson/parser.cpp
@@ -0,0 +1,179 @@
+#include "parser.h"
+#include "consumer.h"
+#include "format.h"
+#include "parser_detail.h"
+
+#include <util/stream/input.h>
+#include <util/generic/buffer.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TYsonParser::TImpl {
+ public:
+ TImpl(
+ NYT::NYson::IYsonConsumer* consumer,
+ IInputStream* stream,
+ EYsonType type,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit = Nothing())
+ : Consumer_(consumer)
+ , Stream_(stream)
+ , Type_(type)
+ , EnableLinePositionInfo_(enableLinePositionInfo)
+ , MemoryLimit_(memoryLimit)
+ {
+ }
+
+ void Parse() {
+ TBuffer buffer(64 << 10);
+ ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStreamReader>(
+ TStreamReader(Stream_, buffer.Data(), buffer.Capacity()),
+ Consumer_,
+ Type_,
+ EnableLinePositionInfo_,
+ MemoryLimit_);
+ }
+
+ private:
+ NYT::NYson::IYsonConsumer* Consumer_;
+ IInputStream* Stream_;
+ EYsonType Type_;
+ bool EnableLinePositionInfo_;
+ TMaybe<ui64> MemoryLimit_;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TYsonParser::TYsonParser(
+ NYT::NYson::IYsonConsumer* consumer,
+ IInputStream* stream,
+ EYsonType type,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit)
+ : Impl(new TImpl(consumer, stream, type, enableLinePositionInfo, memoryLimit))
+ {
+ }
+
+ TYsonParser::~TYsonParser() {
+ }
+
+ void TYsonParser::Parse() {
+ Impl->Parse();
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TStatelessYsonParser::TImpl {
+ private:
+ THolder<TStatelessYsonParserImplBase> Impl;
+
+ public:
+ TImpl(
+ NYT::NYson::IYsonConsumer* consumer,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit)
+ : Impl(
+ enableLinePositionInfo
+ ? static_cast<TStatelessYsonParserImplBase*>(new TStatelessYsonParserImpl<NYT::NYson::IYsonConsumer, true>(consumer, memoryLimit))
+ : static_cast<TStatelessYsonParserImplBase*>(new TStatelessYsonParserImpl<NYT::NYson::IYsonConsumer, false>(consumer, memoryLimit)))
+ {
+ }
+
+ void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) {
+ Impl->Parse(data, type);
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TStatelessYsonParser::TStatelessYsonParser(
+ NYT::NYson::IYsonConsumer* consumer,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit)
+ : Impl(new TImpl(consumer, enableLinePositionInfo, memoryLimit))
+ {
+ }
+
+ TStatelessYsonParser::~TStatelessYsonParser() {
+ }
+
+ void TStatelessYsonParser::Parse(const TStringBuf& data, EYsonType type) {
+ Impl->Parse(data, type);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ void ParseYsonStringBuffer(
+ const TStringBuf& buffer,
+ NYT::NYson::IYsonConsumer* consumer,
+ EYsonType type,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit) {
+ ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStringReader>(
+ TStringReader(buffer.begin(), buffer.end()),
+ consumer,
+ type,
+ enableLinePositionInfo,
+ memoryLimit);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TYsonListParser::TImpl {
+ public:
+ TImpl(
+ NYT::NYson::IYsonConsumer* consumer,
+ IInputStream* stream,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit = Nothing())
+ : Consumer_(consumer)
+ , Stream_(stream)
+ , EnableLinePositionInfo_(enableLinePositionInfo)
+ , MemoryLimit_(memoryLimit)
+ , Buffer_(64 << 10)
+ , Reader_(Stream_, Buffer_.Data(), Buffer_.Capacity())
+ {
+ }
+
+ bool Parse() {
+ if (!Impl_) {
+ Impl_.Reset(
+ EnableLinePositionInfo_
+ ? static_cast<TYsonListParserImplBase*>(new TYsonListParserImpl<NYT::NYson::IYsonConsumer, TStreamReader, true>(Reader_, Consumer_, MemoryLimit_))
+ : static_cast<TYsonListParserImplBase*>(new TYsonListParserImpl<NYT::NYson::IYsonConsumer, TStreamReader, false>(Reader_, Consumer_, MemoryLimit_)));
+ }
+ return Impl_->Parse();
+ }
+
+ private:
+ NYT::NYson::IYsonConsumer* Consumer_;
+ IInputStream* Stream_;
+ bool EnableLinePositionInfo_;
+ TMaybe<ui64> MemoryLimit_;
+ TBuffer Buffer_;
+ TStreamReader Reader_;
+ THolder<TYsonListParserImplBase> Impl_;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TYsonListParser::TYsonListParser(
+ NYT::NYson::IYsonConsumer* consumer,
+ IInputStream* stream,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit)
+ : Impl(new TImpl(consumer, stream, enableLinePositionInfo, memoryLimit))
+ {
+ }
+
+ TYsonListParser::~TYsonListParser() {
+ }
+
+ bool TYsonListParser::Parse() {
+ return Impl->Parse();
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/parser.h b/library/cpp/yson/parser.h
new file mode 100644
index 00000000000..dce35a8cd40
--- /dev/null
+++ b/library/cpp/yson/parser.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include "public.h"
+
+#include <util/generic/maybe.h>
+#include <util/generic/ptr.h>
+
+class IInputStream;
+
+namespace NYT::NYson {
+struct IYsonConsumer;
+} // namespace NYT::NYson
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TYsonParser {
+ public:
+ TYsonParser(
+ NYT::NYson::IYsonConsumer* consumer,
+ IInputStream* stream,
+ EYsonType type = ::NYson::EYsonType::Node,
+ bool enableLinePositionInfo = false,
+ TMaybe<ui64> memoryLimit = Nothing());
+
+ ~TYsonParser();
+
+ void Parse();
+
+ private:
+ class TImpl;
+ THolder<TImpl> Impl;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TStatelessYsonParser {
+ public:
+ TStatelessYsonParser(
+ NYT::NYson::IYsonConsumer* consumer,
+ bool enableLinePositionInfo = false,
+ TMaybe<ui64> memoryLimit = Nothing());
+
+ ~TStatelessYsonParser();
+
+ void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node);
+
+ private:
+ class TImpl;
+ THolder<TImpl> Impl;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TYsonListParser {
+ public:
+ TYsonListParser(
+ NYT::NYson::IYsonConsumer* consumer,
+ IInputStream* stream,
+ bool enableLinePositionInfo = false,
+ TMaybe<ui64> memoryLimit = Nothing());
+
+ ~TYsonListParser();
+
+ bool Parse(); // Returns false, if there is no more list items
+
+ private:
+ class TImpl;
+ THolder<TImpl> Impl;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ void ParseYsonStringBuffer(
+ const TStringBuf& buffer,
+ NYT::NYson::IYsonConsumer* consumer,
+ EYsonType type = ::NYson::EYsonType::Node,
+ bool enableLinePositionInfo = false,
+ TMaybe<ui64> memoryLimit = Nothing());
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/parser_detail.h b/library/cpp/yson/parser_detail.h
new file mode 100644
index 00000000000..44223caf125
--- /dev/null
+++ b/library/cpp/yson/parser_detail.h
@@ -0,0 +1,381 @@
+#pragma once
+
+#include "detail.h"
+
+namespace NYson {
+ namespace NDetail {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo>
+ class TParser
+ : public TLexerBase<TBlockStream, EnableLinePositionInfo> {
+ private:
+ using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>;
+ TConsumer* Consumer;
+
+ public:
+ TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
+ : TBase(blockStream, memoryLimit)
+ , Consumer(consumer)
+ {
+ }
+
+ void DoParse(EYsonType ysonType) {
+ switch (ysonType) {
+ case ::NYson::EYsonType::Node:
+ ParseNode<true>();
+ break;
+
+ case ::NYson::EYsonType::ListFragment:
+ ParseListFragment<true>(EndSymbol);
+ break;
+
+ case ::NYson::EYsonType::MapFragment:
+ ParseMapFragment<true>(EndSymbol);
+ break;
+
+ default:
+ Y_FAIL("unreachable");
+ }
+
+ while (!(TBase::IsFinished() && TBase::IsEmpty())) {
+ if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) {
+ ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found";
+ } else if (!TBase::IsEmpty()) {
+ TBase::Advance(1);
+ }
+ }
+ }
+
+ bool DoParseListFragment(bool first) {
+ bool ret = first ? first : ParseListSeparator<true>(EndSymbol);
+ return ret && ParseListItem<true>(EndSymbol);
+ }
+
+ void ParseAttributes() {
+ Consumer->OnBeginAttributes();
+ ParseMapFragment(EndAttributesSymbol);
+ TBase::SkipCharToken(EndAttributesSymbol);
+ Consumer->OnEndAttributes();
+ }
+
+ void ParseMap() {
+ Consumer->OnBeginMap();
+ ParseMapFragment(EndMapSymbol);
+ TBase::SkipCharToken(EndMapSymbol);
+ Consumer->OnEndMap();
+ }
+
+ void ParseList() {
+ Consumer->OnBeginList();
+ ParseListFragment(EndListSymbol);
+ TBase::SkipCharToken(EndListSymbol);
+ Consumer->OnEndList();
+ }
+
+ template <bool AllowFinish>
+ void ParseNode() {
+ return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar());
+ }
+
+ template <bool AllowFinish>
+ void ParseNode(char ch) {
+ if (ch == BeginAttributesSymbol) {
+ TBase::Advance(1);
+ ParseAttributes();
+ ch = TBase::SkipSpaceAndGetChar();
+ }
+
+ switch (ch) {
+ case BeginMapSymbol:
+ TBase::Advance(1);
+ ParseMap();
+ break;
+
+ case BeginListSymbol:
+ TBase::Advance(1);
+ ParseList();
+ break;
+
+ case '"': {
+ TBase::Advance(1);
+ TStringBuf value;
+ TBase::ReadQuotedString(&value);
+ Consumer->OnStringScalar(value);
+ break;
+ }
+ case StringMarker: {
+ TBase::Advance(1);
+ TStringBuf value;
+ TBase::ReadBinaryString(&value);
+ Consumer->OnStringScalar(value);
+ break;
+ }
+ case Int64Marker: {
+ TBase::Advance(1);
+ i64 value;
+ TBase::ReadBinaryInt64(&value);
+ Consumer->OnInt64Scalar(value);
+ break;
+ }
+ case Uint64Marker: {
+ TBase::Advance(1);
+ ui64 value;
+ TBase::ReadBinaryUint64(&value);
+ Consumer->OnUint64Scalar(value);
+ break;
+ }
+ case DoubleMarker: {
+ TBase::Advance(1);
+ double value;
+ TBase::ReadBinaryDouble(&value);
+ Consumer->OnDoubleScalar(value);
+ break;
+ }
+ case FalseMarker: {
+ TBase::Advance(1);
+ Consumer->OnBooleanScalar(false);
+ break;
+ }
+ case TrueMarker: {
+ TBase::Advance(1);
+ Consumer->OnBooleanScalar(true);
+ break;
+ }
+ case EntitySymbol:
+ TBase::Advance(1);
+ Consumer->OnEntity();
+ break;
+
+ default: {
+ if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state
+ ReadNumeric<AllowFinish>();
+ } else if (isalpha((unsigned char)ch) || ch == '_') {
+ TStringBuf value;
+ TBase::template ReadUnquotedString<AllowFinish>(&value);
+ Consumer->OnStringScalar(value);
+ } else if (ch == '%') {
+ TBase::Advance(1);
+ ch = TBase::template GetChar<AllowFinish>();
+ if (ch == 't' || ch == 'f') {
+ Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>());
+ } else {
+ Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>());
+ }
+ } else {
+ ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node";
+ }
+ }
+ }
+ }
+
+ void ParseKey() {
+ return ParseKey(TBase::SkipSpaceAndGetChar());
+ }
+
+ void ParseKey(char ch) {
+ switch (ch) {
+ case '"': {
+ TBase::Advance(1);
+ TStringBuf value;
+ TBase::ReadQuotedString(&value);
+ Consumer->OnKeyedItem(value);
+ break;
+ }
+ case StringMarker: {
+ TBase::Advance(1);
+ TStringBuf value;
+ TBase::ReadBinaryString(&value);
+ Consumer->OnKeyedItem(value);
+ break;
+ }
+ default: {
+ if (isalpha(ch) || ch == '_') {
+ TStringBuf value;
+ TBase::ReadUnquotedString(&value);
+ Consumer->OnKeyedItem(value);
+ } else {
+ ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key";
+ }
+ }
+ }
+ }
+
+ template <bool AllowFinish>
+ void ParseMapFragment(char endSymbol) {
+ char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
+ while (ch != endSymbol) {
+ ParseKey(ch);
+ ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
+ if (ch == KeyValueSeparatorSymbol) {
+ TBase::Advance(1);
+ } else {
+ ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found";
+ }
+ ParseNode<AllowFinish>();
+ ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
+ if (ch == KeyedItemSeparatorSymbol) {
+ TBase::Advance(1);
+ ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
+ } else if (ch != endSymbol) {
+ ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol
+ << "' or '" << endSymbol << "' but '" << ch << "' found";
+ }
+ }
+ }
+
+ void ParseMapFragment(char endSymbol) {
+ ParseMapFragment<false>(endSymbol);
+ }
+
+ template <bool AllowFinish>
+ bool ParseListItem(char endSymbol) {
+ char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
+ if (ch != endSymbol) {
+ Consumer->OnListItem();
+ ParseNode<AllowFinish>(ch);
+ return true;
+ }
+ return false;
+ }
+
+ template <bool AllowFinish>
+ bool ParseListSeparator(char endSymbol) {
+ char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>();
+ if (ch == ListItemSeparatorSymbol) {
+ TBase::Advance(1);
+ return true;
+ } else if (ch != endSymbol) {
+ ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol
+ << "' or '" << endSymbol << "' but '" << ch << "' found";
+ }
+ return false;
+ }
+
+ template <bool AllowFinish>
+ void ParseListFragment(char endSymbol) {
+ while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) {
+ }
+ }
+
+ void ParseListFragment(char endSymbol) {
+ ParseListFragment<false>(endSymbol);
+ }
+
+ template <bool AllowFinish>
+ void ReadNumeric() {
+ TStringBuf valueBuffer;
+ ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer);
+
+ if (numericResult == ENumericResult::Double) {
+ double value;
+ try {
+ value = FromString<double>(valueBuffer);
+ } catch (yexception& e) {
+ // This exception is wrapped in parser.
+ ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e;
+ }
+ Consumer->OnDoubleScalar(value);
+ } else if (numericResult == ENumericResult::Int64) {
+ i64 value;
+ try {
+ value = FromString<i64>(valueBuffer);
+ } catch (yexception& e) {
+ // This exception is wrapped in parser.
+ ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e;
+ }
+ Consumer->OnInt64Scalar(value);
+ } else if (numericResult == ENumericResult::Uint64) {
+ ui64 value;
+ try {
+ value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1));
+ } catch (yexception& e) {
+ // This exception is wrapped in parser.
+ ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e;
+ }
+ Consumer->OnUint64Scalar(value);
+ }
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ }
+
+ template <class TConsumer, class TBlockStream>
+ void ParseYsonStreamImpl(
+ const TBlockStream& blockStream,
+ NYT::NYson::IYsonConsumer* consumer,
+ EYsonType parsingMode,
+ bool enableLinePositionInfo,
+ TMaybe<ui64> memoryLimit) {
+ if (enableLinePositionInfo) {
+ using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>;
+ TImpl impl(blockStream, consumer, memoryLimit);
+ impl.DoParse(parsingMode);
+ } else {
+ using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>;
+ TImpl impl(blockStream, consumer, memoryLimit);
+ impl.DoParse(parsingMode);
+ }
+ }
+
+ class TStatelessYsonParserImplBase {
+ public:
+ virtual void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) = 0;
+
+ virtual ~TStatelessYsonParserImplBase() {
+ }
+ };
+
+ template <class TConsumer, bool EnableLinePositionInfo>
+ class TStatelessYsonParserImpl
+ : public TStatelessYsonParserImplBase {
+ private:
+ using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>;
+ TParser Parser;
+
+ public:
+ TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit)
+ : Parser(TStringReader(), consumer, memoryLimit)
+ {
+ }
+
+ void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override {
+ Parser.SetBuffer(data.begin(), data.end());
+ Parser.DoParse(type);
+ }
+ };
+
+ class TYsonListParserImplBase {
+ public:
+ virtual bool Parse() = 0;
+
+ virtual ~TYsonListParserImplBase() {
+ }
+ };
+
+ template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo>
+ class TYsonListParserImpl
+ : public TYsonListParserImplBase {
+ private:
+ using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>;
+ TParser Parser;
+ bool First = true;
+
+ public:
+ TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
+ : Parser(blockStream, consumer, memoryLimit)
+ {
+ }
+
+ bool Parse() override {
+ bool ret = Parser.DoParseListFragment(First);
+ First = false;
+ return ret;
+ }
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/public.h b/library/cpp/yson/public.h
new file mode 100644
index 00000000000..1ed793592ba
--- /dev/null
+++ b/library/cpp/yson/public.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <library/cpp/yt/misc/enum.h>
+#include <util/generic/yexception.h>
+
+#include <library/cpp/yt/yson_string/public.h>
+#include <library/cpp/yt/yson/public.h>
+
+namespace NYson {
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ using NYT::NYson::EYsonFormat;
+ using NYT::NYson::EYsonType;
+
+ class TYsonStringBuf;
+
+ struct TYsonConsumerBase;
+
+ class TYsonWriter;
+ class TYsonParser;
+ class TStatelessYsonParser;
+ class TYsonListParser;
+
+ class TYsonException
+ : public yexception {};
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/string-inl.h b/library/cpp/yson/string-inl.h
new file mode 100644
index 00000000000..92e9ab4531e
--- /dev/null
+++ b/library/cpp/yson/string-inl.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#ifndef STRING_INL_H_
+#error "Direct inclusion of this file is not allowed, include string.h"
+// For the sake of sane code completion.
+#include "string.h"
+#endif
+
+#include <util/str_stl.h>
+
+namespace NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+template <typename TLeft, typename TRight>
+bool Equals(const TLeft& lhs, const TRight& rhs)
+{
+ auto lhsNull = !lhs.operator bool();
+ auto rhsNull = !rhs.operator bool();
+ if (lhsNull != rhsNull) {
+ return false;
+ }
+ if (lhsNull && rhsNull) {
+ return true;
+ }
+ return
+ lhs.AsStringBuf() == rhs.AsStringBuf() &&
+ lhs.GetType() == rhs.GetType();
+}
+
+} // namespace NDetail
+
+inline bool operator == (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs)
+{
+ return NDetail::Equals(lhs, rhs);
+}
+
+inline bool operator != (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs)
+{
+ return !(lhs == rhs);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
+
+//! A hasher for TYsonStringBuf
+template <>
+struct THash<NYson::TYsonStringBuf>
+{
+ size_t operator () (const NYson::TYsonStringBuf& str) const
+ {
+ return THash<TStringBuf>()(str.AsStringBuf());
+ }
+};
diff --git a/library/cpp/yson/token.cpp b/library/cpp/yson/token.cpp
new file mode 100644
index 00000000000..c8584c8c2ea
--- /dev/null
+++ b/library/cpp/yson/token.cpp
@@ -0,0 +1,236 @@
+#include "token.h"
+
+#include <util/string/vector.h>
+#include <util/string/printf.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ ETokenType CharToTokenType(char ch) {
+ switch (ch) {
+ case ';':
+ return ETokenType::Semicolon;
+ case '=':
+ return ETokenType::Equals;
+ case '{':
+ return ETokenType::LeftBrace;
+ case '}':
+ return ETokenType::RightBrace;
+ case '#':
+ return ETokenType::Hash;
+ case '[':
+ return ETokenType::LeftBracket;
+ case ']':
+ return ETokenType::RightBracket;
+ case '<':
+ return ETokenType::LeftAngle;
+ case '>':
+ return ETokenType::RightAngle;
+ case '(':
+ return ETokenType::LeftParenthesis;
+ case ')':
+ return ETokenType::RightParenthesis;
+ case '+':
+ return ETokenType::Plus;
+ case ':':
+ return ETokenType::Colon;
+ case ',':
+ return ETokenType::Comma;
+ default:
+ return ETokenType::EndOfStream;
+ }
+ }
+
+ char TokenTypeToChar(ETokenType type) {
+ switch (type) {
+ case ETokenType::Semicolon:
+ return ';';
+ case ETokenType::Equals:
+ return '=';
+ case ETokenType::Hash:
+ return '#';
+ case ETokenType::LeftBracket:
+ return '[';
+ case ETokenType::RightBracket:
+ return ']';
+ case ETokenType::LeftBrace:
+ return '{';
+ case ETokenType::RightBrace:
+ return '}';
+ case ETokenType::LeftAngle:
+ return '<';
+ case ETokenType::RightAngle:
+ return '>';
+ case ETokenType::LeftParenthesis:
+ return '(';
+ case ETokenType::RightParenthesis:
+ return ')';
+ case ETokenType::Plus:
+ return '+';
+ case ETokenType::Colon:
+ return ':';
+ case ETokenType::Comma:
+ return ',';
+ default:
+ Y_FAIL("unreachable");
+ }
+ }
+
+ TString TokenTypeToString(ETokenType type) {
+ return TString(1, TokenTypeToChar(type));
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ const TToken TToken::EndOfStream;
+
+ TToken::TToken()
+ : Type_(ETokenType::EndOfStream)
+ , Int64Value(0)
+ , Uint64Value(0)
+ , DoubleValue(0.0)
+ , BooleanValue(false)
+ {
+ }
+
+ TToken::TToken(ETokenType type)
+ : Type_(type)
+ , Int64Value(0)
+ , Uint64Value(0)
+ , DoubleValue(0.0)
+ , BooleanValue(false)
+ {
+ switch (type) {
+ case ETokenType::String:
+ case ETokenType::Int64:
+ case ETokenType::Uint64:
+ case ETokenType::Double:
+ case ETokenType::Boolean:
+ Y_FAIL("unreachable");
+ default:
+ break;
+ }
+ }
+
+ TToken::TToken(const TStringBuf& stringValue)
+ : Type_(ETokenType::String)
+ , StringValue(stringValue)
+ , Int64Value(0)
+ , Uint64Value(0)
+ , DoubleValue(0.0)
+ , BooleanValue(false)
+ {
+ }
+
+ TToken::TToken(i64 int64Value)
+ : Type_(ETokenType::Int64)
+ , Int64Value(int64Value)
+ , Uint64Value(0)
+ , DoubleValue(0.0)
+ {
+ }
+
+ TToken::TToken(ui64 uint64Value)
+ : Type_(ETokenType::Uint64)
+ , Int64Value(0)
+ , Uint64Value(uint64Value)
+ , DoubleValue(0.0)
+ , BooleanValue(false)
+ {
+ }
+
+ TToken::TToken(double doubleValue)
+ : Type_(ETokenType::Double)
+ , Int64Value(0)
+ , Uint64Value(0)
+ , DoubleValue(doubleValue)
+ , BooleanValue(false)
+ {
+ }
+
+ TToken::TToken(bool booleanValue)
+ : Type_(ETokenType::Boolean)
+ , Int64Value(0)
+ , DoubleValue(0.0)
+ , BooleanValue(booleanValue)
+ {
+ }
+
+ bool TToken::IsEmpty() const {
+ return Type_ == ETokenType::EndOfStream;
+ }
+
+ const TStringBuf& TToken::GetStringValue() const {
+ CheckType(ETokenType::String);
+ return StringValue;
+ }
+
+ i64 TToken::GetInt64Value() const {
+ CheckType(ETokenType::Int64);
+ return Int64Value;
+ }
+
+ ui64 TToken::GetUint64Value() const {
+ CheckType(ETokenType::Uint64);
+ return Uint64Value;
+ }
+
+ double TToken::GetDoubleValue() const {
+ CheckType(ETokenType::Double);
+ return DoubleValue;
+ }
+
+ bool TToken::GetBooleanValue() const {
+ CheckType(ETokenType::Boolean);
+ return BooleanValue;
+ }
+
+ void TToken::CheckType(ETokenType expectedType) const {
+ if (Type_ != expectedType) {
+ if (Type_ == ETokenType::EndOfStream) {
+ ythrow TYsonException() << "Unexpected end of stream (ExpectedType: " << TokenTypeToString(expectedType) << ")";
+ } else {
+ ythrow TYsonException() << "Unexpected token (Token: '" << ToString(*this)
+ << "', Type: " << TokenTypeToString(Type_)
+ << ", ExpectedType: " << TokenTypeToString(expectedType) << ")";
+ }
+ }
+ }
+
+ void TToken::Reset() {
+ Type_ = ETokenType::EndOfStream;
+ Int64Value = 0;
+ Uint64Value = 0;
+ DoubleValue = 0.0;
+ StringValue = TStringBuf();
+ BooleanValue = false;
+ }
+
+ TString ToString(const TToken& token) {
+ switch (token.GetType()) {
+ case ETokenType::EndOfStream:
+ return TString();
+
+ case ETokenType::String:
+ return TString(token.GetStringValue());
+
+ case ETokenType::Int64:
+ return ::ToString(token.GetInt64Value());
+
+ case ETokenType::Uint64:
+ return ::ToString(token.GetUint64Value());
+
+ case ETokenType::Double:
+ return ::ToString(token.GetDoubleValue());
+
+ case ETokenType::Boolean:
+ return token.GetBooleanValue() ? "true" : "false";
+
+ default:
+ return TokenTypeToString(token.GetType());
+ }
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/token.h b/library/cpp/yson/token.h
new file mode 100644
index 00000000000..7283e569504
--- /dev/null
+++ b/library/cpp/yson/token.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include "public.h"
+
+#include <util/generic/strbuf.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ enum ETokenType {
+ EndOfStream,
+
+ String,
+ Int64,
+ Uint64,
+ Double,
+ Boolean,
+
+ // Special values:
+ // YSON
+ Semicolon, // ;
+ Equals, // =
+ Hash, // #
+ LeftBracket, // [
+ RightBracket, // ]
+ LeftBrace, // {
+ RightBrace, // }
+ LeftAngle, // <
+ RightAngle, // >
+
+ // Table ranges
+ LeftParenthesis, // (
+ RightParenthesis, // )
+ Plus, // +
+ Colon, // :
+ Comma, // ,
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ ETokenType CharToTokenType(char ch);
+ char TokenTypeToChar(ETokenType type);
+ TString TokenTypeToString(ETokenType type);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TLexerImpl;
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TToken {
+ public:
+ static const TToken EndOfStream;
+
+ TToken();
+ TToken(ETokenType type);
+ explicit TToken(const TStringBuf& stringValue);
+ explicit TToken(i64 int64Value);
+ explicit TToken(ui64 int64Value);
+ explicit TToken(double doubleValue);
+ explicit TToken(bool booleanValue);
+
+ ETokenType GetType() const {
+ return Type_;
+ }
+
+ bool IsEmpty() const;
+ const TStringBuf& GetStringValue() const;
+ i64 GetInt64Value() const;
+ ui64 GetUint64Value() const;
+ double GetDoubleValue() const;
+ bool GetBooleanValue() const;
+
+ void CheckType(ETokenType expectedType) const;
+ void Reset();
+
+ private:
+ friend class TLexerImpl;
+
+ ETokenType Type_;
+
+ TStringBuf StringValue;
+ i64 Int64Value;
+ ui64 Uint64Value;
+ double DoubleValue;
+ bool BooleanValue;
+ };
+
+ TString ToString(const TToken& token);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/tokenizer.cpp b/library/cpp/yson/tokenizer.cpp
new file mode 100644
index 00000000000..06760170d48
--- /dev/null
+++ b/library/cpp/yson/tokenizer.cpp
@@ -0,0 +1,37 @@
+#include "tokenizer.h"
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TTokenizer::TTokenizer(const TStringBuf& input)
+ : Input(input)
+ , Parsed(0)
+ {
+ }
+
+ bool TTokenizer::ParseNext() {
+ Input = Input.Tail(Parsed);
+ Token.Reset();
+ Parsed = Lexer.GetToken(Input, &Token);
+ return !CurrentToken().IsEmpty();
+ }
+
+ const TToken& TTokenizer::CurrentToken() const {
+ return Token;
+ }
+
+ ETokenType TTokenizer::GetCurrentType() const {
+ return CurrentToken().GetType();
+ }
+
+ TStringBuf TTokenizer::GetCurrentSuffix() const {
+ return Input.Tail(Parsed);
+ }
+
+ const TStringBuf& TTokenizer::CurrentInput() const {
+ return Input;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/tokenizer.h b/library/cpp/yson/tokenizer.h
new file mode 100644
index 00000000000..0576aace95c
--- /dev/null
+++ b/library/cpp/yson/tokenizer.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "public.h"
+#include "lexer.h"
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TTokenizer {
+ public:
+ explicit TTokenizer(const TStringBuf& input);
+
+ bool ParseNext();
+ const TToken& CurrentToken() const;
+ ETokenType GetCurrentType() const;
+ TStringBuf GetCurrentSuffix() const;
+ const TStringBuf& CurrentInput() const;
+
+ private:
+ TStringBuf Input;
+ TToken Token;
+ TStatelessLexer Lexer;
+ size_t Parsed;
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/varint.cpp b/library/cpp/yson/varint.cpp
new file mode 100644
index 00000000000..d538ee3cffa
--- /dev/null
+++ b/library/cpp/yson/varint.cpp
@@ -0,0 +1,71 @@
+#include "varint.h"
+
+#include "zigzag.h"
+
+#include <util/generic/yexception.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ int WriteVarUInt64(IOutputStream* output, ui64 value) {
+ bool stop = false;
+ int bytesWritten = 0;
+ while (!stop) {
+ ++bytesWritten;
+ ui8 byte = static_cast<ui8>(value | 0x80);
+ value >>= 7;
+ if (value == 0) {
+ stop = true;
+ byte &= 0x7F;
+ }
+ output->Write(byte);
+ }
+ return bytesWritten;
+ }
+
+ int WriteVarInt32(IOutputStream* output, i32 value) {
+ return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode32(value)));
+ }
+
+ int WriteVarInt64(IOutputStream* output, i64 value) {
+ return WriteVarUInt64(output, static_cast<ui64>(ZigZagEncode64(value)));
+ }
+
+ int ReadVarUInt64(IInputStream* input, ui64* value) {
+ size_t count = 0;
+ ui64 result = 0;
+
+ ui8 byte = 0;
+ do {
+ if (7 * count > 8 * sizeof(ui64)) {
+ ythrow yexception() << "The data is too long to read ui64";
+ }
+ if (input->Read(&byte, 1) != 1) {
+ ythrow yexception() << "The data is too long to read ui64";
+ }
+ result |= (static_cast<ui64>(byte & 0x7F)) << (7 * count);
+ ++count;
+ } while (byte & 0x80);
+
+ *value = result;
+ return count;
+ }
+
+ int ReadVarInt32(IInputStream* input, i32* value) {
+ ui64 varInt;
+ int bytesRead = ReadVarUInt64(input, &varInt);
+ if (varInt > Max<ui32>()) {
+ ythrow yexception() << "The data is too long to read ui64";
+ }
+ *value = ZigZagDecode32(static_cast<ui32>(varInt));
+ return bytesRead;
+ }
+
+ int ReadVarInt64(IInputStream* input, i64* value) {
+ ui64 varInt;
+ int bytesRead = ReadVarUInt64(input, &varInt);
+ *value = ZigZagDecode64(varInt);
+ return bytesRead;
+ }
+
+} // namespace NYson
diff --git a/library/cpp/yson/varint.h b/library/cpp/yson/varint.h
new file mode 100644
index 00000000000..80b1184e57f
--- /dev/null
+++ b/library/cpp/yson/varint.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+#include <util/system/defaults.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // Various functions that read/write varints from/to a stream.
+
+ // Returns the number of bytes written.
+ int WriteVarUInt64(IOutputStream* output, ui64 value);
+ int WriteVarInt32(IOutputStream* output, i32 value);
+ int WriteVarInt64(IOutputStream* output, i64 value);
+
+ // Returns the number of bytes read.
+ int ReadVarUInt64(IInputStream* input, ui64* value);
+ int ReadVarInt32(IInputStream* input, i32* value);
+ int ReadVarInt64(IInputStream* input, i64* value);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/writer.cpp b/library/cpp/yson/writer.cpp
new file mode 100644
index 00000000000..054459f9f5f
--- /dev/null
+++ b/library/cpp/yson/writer.cpp
@@ -0,0 +1,355 @@
+#include "writer.h"
+
+#include "detail.h"
+#include "format.h"
+#include "parser.h"
+#include "varint.h"
+#include "zigzag.h"
+
+#include <util/string/cast.h>
+
+#include <cmath>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // Copied from <util/string/escape.cpp>
+ namespace {
+ inline char HexDigit(char value) {
+ Y_ASSERT(value < 16);
+ if (value < 10)
+ return '0' + value;
+ else
+ return 'A' + value - 10;
+ }
+
+ inline char OctDigit(char value) {
+ Y_ASSERT(value < 8);
+ return '0' + value;
+ }
+
+ inline bool IsPrintable(char c) {
+ return c >= 32 && c <= 126;
+ }
+
+ inline bool IsHexDigit(char c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+ }
+
+ inline bool IsOctDigit(char c) {
+ return c >= '0' && c <= '7';
+ }
+
+ const size_t ESCAPE_C_BUFFER_SIZE = 4;
+
+ inline size_t EscapeC(unsigned char c, char next, char r[ESCAPE_C_BUFFER_SIZE]) {
+ // (1) Printable characters go as-is, except backslash and double quote.
+ // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
+ // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
+ if (c == '\"') {
+ r[0] = '\\';
+ r[1] = '\"';
+ return 2;
+ } else if (c == '\\') {
+ r[0] = '\\';
+ r[1] = '\\';
+ return 2;
+ } else if (IsPrintable(c)) {
+ r[0] = c;
+ return 1;
+ } else if (c == '\r') {
+ r[0] = '\\';
+ r[1] = 'r';
+ return 2;
+ } else if (c == '\n') {
+ r[0] = '\\';
+ r[1] = 'n';
+ return 2;
+ } else if (c == '\t') {
+ r[0] = '\\';
+ r[1] = 't';
+ return 2;
+ } else if (c < 8 && !IsOctDigit(next)) {
+ r[0] = '\\';
+ r[1] = OctDigit(c);
+ return 2;
+ } else if (!IsHexDigit(next)) {
+ r[0] = '\\';
+ r[1] = 'x';
+ r[2] = HexDigit((c & 0xF0) >> 4);
+ r[3] = HexDigit((c & 0x0F) >> 0);
+ return 4;
+ } else {
+ r[0] = '\\';
+ r[1] = OctDigit((c & 0700) >> 6);
+ r[2] = OctDigit((c & 0070) >> 3);
+ r[3] = OctDigit((c & 0007) >> 0);
+ return 4;
+ }
+ }
+
+ void EscapeC(const char* str, size_t len, IOutputStream& output) {
+ char buffer[ESCAPE_C_BUFFER_SIZE];
+
+ size_t i, j;
+ for (i = 0, j = 0; i < len; ++i) {
+ size_t rlen = EscapeC(str[i], (i + 1 < len ? str[i + 1] : 0), buffer);
+
+ if (rlen > 1) {
+ output.Write(str + j, i - j);
+ j = i + 1;
+ output.Write(buffer, rlen);
+ }
+ }
+
+ if (j > 0) {
+ output.Write(str + j, len - j);
+ } else {
+ output.Write(str, len);
+ }
+ }
+
+ TString FloatToStringWithNanInf(double value) {
+ if (std::isfinite(value)) {
+ return ::ToString(value);
+ }
+
+ static const TStringBuf nanLiteral = "%nan";
+ static const TStringBuf infLiteral = "%inf";
+ static const TStringBuf negativeInfLiteral = "%-inf";
+
+ TStringBuf str;
+ if (std::isnan(value)) {
+ str = nanLiteral;
+ } else if (value > 0) {
+ str = infLiteral;
+ } else {
+ str = negativeInfLiteral;
+ }
+ return TString(str.data(), str.size());
+ }
+
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ TYsonWriter::TYsonWriter(
+ IOutputStream* stream,
+ EYsonFormat format,
+ EYsonType type,
+ bool enableRaw)
+ : Stream(stream)
+ , Format(format)
+ , Type(type)
+ , EnableRaw(enableRaw)
+ , Depth(0)
+ , BeforeFirstItem(true)
+ {
+ Y_ASSERT(stream);
+ }
+
+ void TYsonWriter::WriteIndent() {
+ for (int i = 0; i < IndentSize * Depth; ++i) {
+ Stream->Write(' ');
+ }
+ }
+
+ bool TYsonWriter::IsTopLevelFragmentContext() const {
+ return Depth == 0 && (Type == ::NYson::EYsonType::ListFragment || Type == ::NYson::EYsonType::MapFragment);
+ }
+
+ void TYsonWriter::EndNode() {
+ if (IsTopLevelFragmentContext()) {
+ ETokenType separatorToken =
+ Type == ::NYson::EYsonType::ListFragment
+ ? ListItemSeparatorToken
+ : KeyedItemSeparatorToken;
+ Stream->Write(TokenTypeToChar(separatorToken));
+ if (Format == EYsonFormat::Text || Format == EYsonFormat::Pretty) {
+ Stream->Write('\n');
+ }
+ }
+ }
+
+ void TYsonWriter::BeginCollection(ETokenType beginToken) {
+ Stream->Write(TokenTypeToChar(beginToken));
+ ++Depth;
+ BeforeFirstItem = true;
+ }
+
+ void TYsonWriter::CollectionItem(ETokenType separatorToken) {
+ if (!IsTopLevelFragmentContext()) {
+ if (!BeforeFirstItem) {
+ Stream->Write(TokenTypeToChar(separatorToken));
+ }
+
+ if (Format == EYsonFormat::Pretty) {
+ Stream->Write('\n');
+ WriteIndent();
+ }
+ }
+
+ BeforeFirstItem = false;
+ }
+
+ void TYsonWriter::EndCollection(ETokenType endToken) {
+ --Depth;
+ if (Format == EYsonFormat::Pretty && !BeforeFirstItem) {
+ Stream->Write('\n');
+ WriteIndent();
+ }
+ Stream->Write(TokenTypeToChar(endToken));
+ BeforeFirstItem = false;
+ }
+
+ void TYsonWriter::WriteStringScalar(const TStringBuf& value) {
+ if (Format == EYsonFormat::Binary) {
+ Stream->Write(NDetail::StringMarker);
+ WriteVarInt32(Stream, static_cast<i32>(value.length()));
+ Stream->Write(value.begin(), value.length());
+ } else {
+ Stream->Write('"');
+ EscapeC(value.data(), value.length(), *Stream);
+ Stream->Write('"');
+ }
+ }
+
+ void TYsonWriter::OnStringScalar(TStringBuf value) {
+ WriteStringScalar(value);
+ EndNode();
+ }
+
+ void TYsonWriter::OnInt64Scalar(i64 value) {
+ if (Format == EYsonFormat::Binary) {
+ Stream->Write(NDetail::Int64Marker);
+ WriteVarInt64(Stream, value);
+ } else {
+ Stream->Write(::ToString(value));
+ }
+ EndNode();
+ }
+
+ void TYsonWriter::OnUint64Scalar(ui64 value) {
+ if (Format == EYsonFormat::Binary) {
+ Stream->Write(NDetail::Uint64Marker);
+ WriteVarUInt64(Stream, value);
+ } else {
+ Stream->Write(::ToString(value));
+ Stream->Write("u");
+ }
+ EndNode();
+ }
+
+ void TYsonWriter::OnDoubleScalar(double value) {
+ if (Format == EYsonFormat::Binary) {
+ Stream->Write(NDetail::DoubleMarker);
+ Stream->Write(&value, sizeof(double));
+ } else {
+ auto str = FloatToStringWithNanInf(value);
+ Stream->Write(str);
+ if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) {
+ Stream->Write(".");
+ }
+ }
+ EndNode();
+ }
+
+ void TYsonWriter::OnBooleanScalar(bool value) {
+ if (Format == EYsonFormat::Binary) {
+ Stream->Write(value ? NDetail::TrueMarker : NDetail::FalseMarker);
+ } else {
+ Stream->Write(value ? "%true" : "%false");
+ }
+ EndNode();
+ }
+
+ void TYsonWriter::OnEntity() {
+ Stream->Write(TokenTypeToChar(EntityToken));
+ EndNode();
+ }
+
+ void TYsonWriter::OnBeginList() {
+ BeginCollection(BeginListToken);
+ }
+
+ void TYsonWriter::OnListItem() {
+ CollectionItem(ListItemSeparatorToken);
+ }
+
+ void TYsonWriter::OnEndList() {
+ EndCollection(EndListToken);
+ EndNode();
+ }
+
+ void TYsonWriter::OnBeginMap() {
+ BeginCollection(BeginMapToken);
+ }
+
+ void TYsonWriter::OnKeyedItem(TStringBuf key) {
+ CollectionItem(KeyedItemSeparatorToken);
+
+ WriteStringScalar(key);
+
+ if (Format == NYson::EYsonFormat::Pretty) {
+ Stream->Write(' ');
+ }
+ Stream->Write(TokenTypeToChar(KeyValueSeparatorToken));
+ if (Format == NYson::EYsonFormat::Pretty) {
+ Stream->Write(' ');
+ }
+
+ BeforeFirstItem = false;
+ }
+
+ void TYsonWriter::OnEndMap() {
+ EndCollection(EndMapToken);
+ EndNode();
+ }
+
+ void TYsonWriter::OnBeginAttributes() {
+ BeginCollection(BeginAttributesToken);
+ }
+
+ void TYsonWriter::OnEndAttributes() {
+ EndCollection(EndAttributesToken);
+ if (Format == NYson::EYsonFormat::Pretty) {
+ Stream->Write(' ');
+ }
+ }
+
+ void TYsonWriter::OnRaw(TStringBuf yson, EYsonType type) {
+ if (EnableRaw) {
+ Stream->Write(yson);
+ BeforeFirstItem = false;
+ } else {
+ TYsonConsumerBase::OnRaw(yson, type);
+ }
+ }
+
+ TYsonWriter::TState TYsonWriter::State() const {
+ TState state;
+ state.Depth = Depth;
+ state.BeforeFirstItem = BeforeFirstItem;
+ return state;
+ }
+
+ void TYsonWriter::Reset(const TState& state) {
+ Depth = state.Depth;
+ BeforeFirstItem = state.BeforeFirstItem;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ void ReformatYsonStream(
+ IInputStream* input,
+ IOutputStream* output,
+ EYsonFormat format,
+ EYsonType type) {
+ TYsonWriter writer(output, format, type);
+ TYsonParser parser(&writer, input, type);
+ parser.Parse();
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/writer.h b/library/cpp/yson/writer.h
new file mode 100644
index 00000000000..40f5d7d5014
--- /dev/null
+++ b/library/cpp/yson/writer.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include "public.h"
+#include "token.h"
+#include "consumer.h"
+
+#include <util/generic/noncopyable.h>
+
+class IOutputStream;
+class IZeroCopyInput;
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ class TYsonWriter
+ : public TYsonConsumerBase,
+ private TNonCopyable {
+ public:
+ class TState {
+ private:
+ int Depth;
+ bool BeforeFirstItem;
+
+ friend class TYsonWriter;
+ };
+
+ public:
+ TYsonWriter(
+ IOutputStream* stream,
+ EYsonFormat format = EYsonFormat::Binary,
+ EYsonType type = ::NYson::EYsonType::Node,
+ bool enableRaw = false);
+
+ void OnStringScalar(TStringBuf value) override;
+ void OnInt64Scalar(i64 value) override;
+ void OnUint64Scalar(ui64 value) override;
+ void OnDoubleScalar(double value) override;
+ void OnBooleanScalar(bool value) override;
+ void OnEntity() override;
+
+ void OnBeginList() override;
+ void OnListItem() override;
+ void OnEndList() override;
+
+ void OnBeginMap() override;
+ void OnKeyedItem(TStringBuf key) override;
+ void OnEndMap() override;
+
+ void OnBeginAttributes() override;
+ void OnEndAttributes() override;
+
+ void OnRaw(TStringBuf yson, EYsonType type = ::NYson::EYsonType::Node) override;
+
+ TState State() const;
+ void Reset(const TState& state);
+
+ protected:
+ IOutputStream* Stream;
+ EYsonFormat Format;
+ EYsonType Type;
+ bool EnableRaw;
+
+ int Depth;
+ bool BeforeFirstItem;
+
+ static const int IndentSize = 4;
+
+ void WriteIndent();
+ void WriteStringScalar(const TStringBuf& value);
+
+ void BeginCollection(ETokenType beginToken);
+ void CollectionItem(ETokenType separatorToken);
+ void EndCollection(ETokenType endToken);
+
+ bool IsTopLevelFragmentContext() const;
+ void EndNode();
+ };
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ void ReformatYsonStream(
+ IInputStream* input,
+ IOutputStream* output,
+ EYsonFormat format = EYsonFormat::Binary,
+ EYsonType type = ::NYson::EYsonType::Node);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson
diff --git a/library/cpp/yson/ya.make b/library/cpp/yson/ya.make
new file mode 100644
index 00000000000..c55a189b105
--- /dev/null
+++ b/library/cpp/yson/ya.make
@@ -0,0 +1,23 @@
+LIBRARY()
+
+OWNER(
+ ermolovd
+ g:yt
+)
+
+PEERDIR(
+ library/cpp/yt/misc
+ library/cpp/yt/yson
+)
+
+SRCS(
+ consumer.cpp
+ lexer.cpp
+ parser.cpp
+ token.cpp
+ tokenizer.cpp
+ varint.cpp
+ writer.cpp
+)
+
+END()
diff --git a/library/cpp/yson/zigzag.h b/library/cpp/yson/zigzag.h
new file mode 100644
index 00000000000..2f1190508fb
--- /dev/null
+++ b/library/cpp/yson/zigzag.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <util/system/defaults.h>
+
+namespace NYson {
+ ////////////////////////////////////////////////////////////////////////////////
+
+ //! Functions that provide coding of integers with property: 0 <= f(x) <= 2 * |x|
+ //! Actually taken 'as is' from protobuf/wire_format_lite.h
+
+ inline ui32 ZigZagEncode32(i32 n) {
+ // Note: the right-shift must be arithmetic
+ return (ui32(n) << 1) ^ (n >> 31);
+ }
+
+ inline i32 ZigZagDecode32(ui32 n) {
+ return (n >> 1) ^ -static_cast<i32>(n & 1);
+ }
+
+ inline ui64 ZigZagEncode64(i64 n) {
+ // Note: the right-shift must be arithmetic
+ return (ui64(n) << 1) ^ (n >> 63);
+ }
+
+ inline i64 ZigZagDecode64(ui64 n) {
+ return (n >> 1) ^ -static_cast<i64>(n & 1);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYson