aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yt/yson_string
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yt/yson_string
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yt/yson_string')
-rw-r--r--library/cpp/yt/yson_string/convert.cpp381
-rw-r--r--library/cpp/yt/yson_string/convert.h114
-rw-r--r--library/cpp/yt/yson_string/format.h44
-rw-r--r--library/cpp/yt/yson_string/public.h39
-rw-r--r--library/cpp/yt/yson_string/string-inl.h93
-rw-r--r--library/cpp/yt/yson_string/string.cpp185
-rw-r--r--library/cpp/yt/yson_string/string.h140
-rw-r--r--library/cpp/yt/yson_string/unittests/convert_ut.cpp79
-rw-r--r--library/cpp/yt/yson_string/unittests/ya.make15
-rw-r--r--library/cpp/yt/yson_string/ya.make21
10 files changed, 1111 insertions, 0 deletions
diff --git a/library/cpp/yt/yson_string/convert.cpp b/library/cpp/yt/yson_string/convert.cpp
new file mode 100644
index 0000000000..27f5c30d01
--- /dev/null
+++ b/library/cpp/yt/yson_string/convert.cpp
@@ -0,0 +1,381 @@
+#include "convert.h"
+#include "format.h"
+
+#include <library/cpp/yt/assert/assert.h>
+
+#include <library/cpp/yt/string/format.h>
+
+#include <library/cpp/yt/coding/varint.h>
+
+#include <library/cpp/yt/misc/cast.h>
+
+#include <array>
+
+#include <util/stream/mem.h>
+
+namespace NYT::NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <>
+TYsonString ConvertToYsonString<i8>(const i8& value)
+{
+ return ConvertToYsonString(static_cast<i64>(value));
+}
+
+template <>
+TYsonString ConvertToYsonString<i32>(const i32& value)
+{
+ return ConvertToYsonString(static_cast<i64>(value));
+}
+
+template <>
+TYsonString ConvertToYsonString<i64>(const i64& value)
+{
+ std::array<char, 1 + MaxVarInt64Size> buffer;
+ auto* ptr = buffer.data();
+ *ptr++ = NDetail::Int64Marker;
+ ptr += WriteVarInt64(ptr, value);
+ return TYsonString(TStringBuf(buffer.data(), ptr - buffer.data()));
+}
+
+template <>
+TYsonString ConvertToYsonString<ui8>(const ui8& value)
+{
+ return ConvertToYsonString(static_cast<ui64>(value));
+}
+
+template <>
+TYsonString ConvertToYsonString<ui32>(const ui32& value)
+{
+ return ConvertToYsonString(static_cast<ui64>(value));
+}
+
+template <>
+TYsonString ConvertToYsonString<ui64>(const ui64& value)
+{
+ std::array<char, 1 + MaxVarInt64Size> buffer;
+ auto* ptr = buffer.data();
+ *ptr++ = NDetail::Uint64Marker;
+ ptr += WriteVarUint64(ptr, value);
+ return TYsonString(TStringBuf(buffer.data(), ptr - buffer.data()));
+}
+
+template <>
+TYsonString ConvertToYsonString<TString>(const TString& value)
+{
+ return ConvertToYsonString(static_cast<TStringBuf>(value));
+}
+
+struct TConvertStringToYsonStringTag
+{ };
+
+template <>
+TYsonString ConvertToYsonString<TStringBuf>(const TStringBuf& value)
+{
+ auto buffer = TSharedMutableRef::Allocate<TConvertStringToYsonStringTag>(
+ 1 + MaxVarInt64Size + value.length(),
+ /*initializeStorage*/ false);
+ auto* ptr = buffer.Begin();
+ *ptr++ = NDetail::StringMarker;
+ ptr += WriteVarInt64(ptr, static_cast<i64>(value.length()));
+ ::memcpy(ptr, value.data(), value.length());
+ ptr += value.length();
+ return TYsonString(buffer.Slice(buffer.Begin(), ptr));
+}
+
+TYsonString ConvertToYsonString(const char* value)
+{
+ return ConvertToYsonString(TStringBuf(value));
+}
+
+template <>
+TYsonString ConvertToYsonString<float>(const float& value)
+{
+ return ConvertToYsonString(static_cast<double>(value));
+}
+
+template <>
+TYsonString ConvertToYsonString<double>(const double& value)
+{
+ std::array<char, 1 + sizeof(double)> buffer;
+ auto* ptr = buffer.data();
+ *ptr++ = NDetail::DoubleMarker;
+ ::memcpy(ptr, &value, sizeof(value));
+ ptr += sizeof(value);
+ return TYsonString(TStringBuf(buffer.data(), ptr - buffer.data()));
+}
+
+template <>
+TYsonString ConvertToYsonString<bool>(const bool& value)
+{
+ char ch = value ? NDetail::TrueMarker : NDetail::FalseMarker;
+ return TYsonString(TStringBuf(&ch, 1));
+}
+
+template <>
+TYsonString ConvertToYsonString<TInstant>(const TInstant& value)
+{
+ return ConvertToYsonString(value.ToString());
+}
+
+template <>
+TYsonString ConvertToYsonString<TDuration>(const TDuration& value)
+{
+ return ConvertToYsonString(value.MilliSeconds());
+}
+
+template <>
+TYsonString ConvertToYsonString<TGuid>(const TGuid& value)
+{
+ std::array<char, MaxGuidStringSize> guidBuffer;
+ auto guidLength = WriteGuidToBuffer(guidBuffer.data(), value) - guidBuffer.data();
+ std::array<char, 1 + MaxVarInt64Size + MaxGuidStringSize> ysonBuffer;
+ auto* ptr = ysonBuffer.data();
+ *ptr++ = NDetail::StringMarker;
+ ptr += WriteVarInt64(ptr, static_cast<i64>(guidLength));
+ ::memcpy(ptr, guidBuffer.data(), guidLength);
+ ptr += guidLength;
+ return TYsonString(TStringBuf(ysonBuffer.data(), ptr - ysonBuffer.data()));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+TString FormatUnexpectedMarker(char ch)
+{
+ switch (ch) {
+ case NDetail::BeginListSymbol:
+ return "list";
+ case NDetail::BeginMapSymbol:
+ return "map";
+ case NDetail::BeginAttributesSymbol:
+ return "attributes";
+ case NDetail::EntitySymbol:
+ return "\"entity\" literal";
+ case NDetail::StringMarker:
+ return "\"string\" literal";
+ case NDetail::Int64Marker:
+ return "\"int64\" literal";
+ case NDetail::DoubleMarker:
+ return "\"double\" literal";
+ case NDetail::FalseMarker:
+ case NDetail::TrueMarker:
+ return "\"boolean\" literal";
+ case NDetail::Uint64Marker:
+ return "\"uint64\" literal";
+ default:
+ return Format("unexpected symbol %qv", ch);
+ }
+}
+
+i64 ParseInt64FromYsonString(const TYsonStringBuf& str)
+{
+ YT_ASSERT(str.GetType() == EYsonType::Node);
+ auto strBuf = str.AsStringBuf();
+ TMemoryInput input(strBuf.data(), strBuf.length());
+ char ch;
+ if (!input.ReadChar(ch)) {
+ throw TYsonLiteralParseException("Missing type marker");
+ }
+ if (ch != NDetail::Int64Marker) {
+ throw TYsonLiteralParseException(Format("Unexpected %v",
+ FormatUnexpectedMarker(ch)));
+ }
+ i64 result;
+ try {
+ ReadVarInt64(&input, &result);
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Failed to decode \"int64\" value");
+ }
+ return result;
+}
+
+ui64 ParseUint64FromYsonString(const TYsonStringBuf& str)
+{
+ YT_ASSERT(str.GetType() == EYsonType::Node);
+ auto strBuf = str.AsStringBuf();
+ TMemoryInput input(strBuf.data(), strBuf.length());
+ char ch;
+ if (!input.ReadChar(ch)) {
+ throw TYsonLiteralParseException("Missing type marker");
+ }
+ if (ch != NDetail::Uint64Marker) {
+ throw TYsonLiteralParseException(Format("Unexpected %v",
+ FormatUnexpectedMarker(ch)));
+ }
+ ui64 result;
+ try {
+ ReadVarUint64(&input, &result);
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Failed to decode \"uint64\" value");
+ }
+ return result;
+}
+
+TString ParseStringFromYsonString(const TYsonStringBuf& str)
+{
+ YT_ASSERT(str.GetType() == EYsonType::Node);
+ auto strBuf = str.AsStringBuf();
+ TMemoryInput input(strBuf.data(), strBuf.length());
+ char ch;
+ if (!input.ReadChar(ch)) {
+ throw TYsonLiteralParseException("Missing type marker");
+ }
+ if (ch != NDetail::StringMarker) {
+ throw TYsonLiteralParseException(Format("Unexpected %v",
+ FormatUnexpectedMarker(ch)));
+ }
+ i64 length;
+ try {
+ ReadVarInt64(&input, &length);
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Failed to decode string length");
+ }
+ if (length < 0) {
+ throw TYsonLiteralParseException(Format("Negative string length ",
+ length));
+ }
+ if (static_cast<i64>(input.Avail()) != length) {
+ throw TYsonLiteralParseException(Format("Incorrect remaining string length: expected %v, got %v",
+ length,
+ input.Avail()));
+ }
+ TString result;
+ result.ReserveAndResize(length);
+ YT_VERIFY(static_cast<i64>(input.Read(result.Detach(), length)) == length);
+ return result;
+}
+
+double ParseDoubleFromYsonString(const TYsonStringBuf& str)
+{
+ YT_ASSERT(str.GetType() == EYsonType::Node);
+ auto strBuf = str.AsStringBuf();
+ TMemoryInput input(strBuf.data(), strBuf.length());
+ char ch;
+ if (!input.ReadChar(ch)) {
+ throw TYsonLiteralParseException("Missing type marker");
+ }
+ if (ch != NDetail::DoubleMarker) {
+ throw TYsonLiteralParseException(Format("Unexpected %v",
+ FormatUnexpectedMarker(ch)));
+ }
+ if (input.Avail() != sizeof(double)) {
+ throw TYsonLiteralParseException(Format("Incorrect remaining string length: expected %v, got %v",
+ sizeof(double),
+ input.Avail()));
+ }
+ double result;
+ YT_VERIFY(input.Read(&result, sizeof(result)));
+ return result;
+}
+
+} // namespace
+
+#define PARSE(type, underlyingType) \
+ template <> \
+ type ConvertFromYsonString<type>(const TYsonStringBuf& str) \
+ { \
+ try { \
+ return CheckedIntegralCast<type>(Parse ## underlyingType ## FromYsonString(str)); \
+ } catch (const std::exception& ex) { \
+ throw TYsonLiteralParseException(ex, "Error parsing \"" #type "\" value from YSON"); \
+ } \
+ }
+
+PARSE(i8, Int64 )
+PARSE(i16, Int64 )
+PARSE(i32, Int64 )
+PARSE(i64, Int64 )
+PARSE(ui8, Uint64)
+PARSE(ui16, Uint64)
+PARSE(ui32, Uint64)
+PARSE(ui64, Uint64)
+
+#undef PARSE
+
+template <>
+TString ConvertFromYsonString<TString>(const TYsonStringBuf& str)
+{
+ try {
+ return ParseStringFromYsonString(str);
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"string\" value from YSON");
+ }
+}
+
+template <>
+float ConvertFromYsonString<float>(const TYsonStringBuf& str)
+{
+ try {
+ return static_cast<float>(ParseDoubleFromYsonString(str));
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"float\" value from YSON");
+ }
+}
+
+template <>
+double ConvertFromYsonString<double>(const TYsonStringBuf& str)
+{
+ try {
+ return ParseDoubleFromYsonString(str);
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"double\" value from YSON");
+ }
+}
+
+template <>
+bool ConvertFromYsonString<bool>(const TYsonStringBuf& str)
+{
+ try {
+ YT_ASSERT(str.GetType() == EYsonType::Node);
+ auto strBuf = str.AsStringBuf();
+ TMemoryInput input(strBuf.data(), strBuf.length());
+ char ch;
+ if (!input.ReadChar(ch)) {
+ throw TYsonLiteralParseException("Missing type marker");
+ }
+ if (ch != NDetail::TrueMarker && ch != NDetail::FalseMarker) {
+ throw TYsonLiteralParseException(Format("Unexpected %v",
+ FormatUnexpectedMarker(ch)));
+ }
+ return ch == NDetail::TrueMarker;
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"boolean\" value from YSON");
+ }
+}
+
+template <>
+TInstant ConvertFromYsonString<TInstant>(const TYsonStringBuf& str)
+{
+ try {
+ return TInstant::ParseIso8601(ParseStringFromYsonString(str));
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"instant\" value from YSON");
+ }
+}
+
+template <>
+TDuration ConvertFromYsonString<TDuration>(const TYsonStringBuf& str)
+{
+ try {
+ return TDuration::MilliSeconds(ParseUint64FromYsonString(str));
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"duration\" value from YSON");
+ }
+}
+
+template <>
+TGuid ConvertFromYsonString<TGuid>(const TYsonStringBuf& str)
+{
+ try {
+ return TGuid::FromString(ParseStringFromYsonString(str));
+ } catch (const std::exception& ex) {
+ throw TYsonLiteralParseException(ex, "Error parsing \"guid\" value from YSON");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
diff --git a/library/cpp/yt/yson_string/convert.h b/library/cpp/yt/yson_string/convert.h
new file mode 100644
index 0000000000..3c2cc7d284
--- /dev/null
+++ b/library/cpp/yt/yson_string/convert.h
@@ -0,0 +1,114 @@
+#pragma once
+
+#include "string.h"
+
+#include <library/cpp/yt/misc/guid.h>
+
+#include <library/cpp/yt/exception/exception.h>
+
+#include <util/generic/string.h>
+
+#include <util/datetime/base.h>
+
+namespace NYT::NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+// Generic forward declarations.
+
+template <class T>
+TYsonString ConvertToYsonString(const T& value);
+
+template <class T>
+TYsonString ConvertToYsonString(const T& value, EYsonFormat format);
+
+template <class T>
+T ConvertFromYsonString(const TYsonStringBuf& str);
+
+////////////////////////////////////////////////////////////////////////////////
+// Basic specializations for ConvertToYsonString.
+
+template <>
+TYsonString ConvertToYsonString<i8>(const i8& value);
+template <>
+TYsonString ConvertToYsonString<i32>(const i32& value);
+template <>
+TYsonString ConvertToYsonString<i64>(const i64& value);
+
+template <>
+TYsonString ConvertToYsonString<ui8>(const ui8& value);
+template <>
+TYsonString ConvertToYsonString<ui32>(const ui32& value);
+template <>
+TYsonString ConvertToYsonString<ui64>(const ui64& value);
+
+template <>
+TYsonString ConvertToYsonString<TString>(const TString& value);
+template <>
+TYsonString ConvertToYsonString<TStringBuf>(const TStringBuf& value);
+TYsonString ConvertToYsonString(const char* value);
+
+template <>
+TYsonString ConvertToYsonString<float>(const float& value);
+template <>
+TYsonString ConvertToYsonString<double>(const double& value);
+
+template <>
+TYsonString ConvertToYsonString<bool>(const bool& value);
+
+template <>
+TYsonString ConvertToYsonString<TInstant>(const TInstant& value);
+
+template <>
+TYsonString ConvertToYsonString<TDuration>(const TDuration& value);
+
+template <>
+TYsonString ConvertToYsonString<TGuid>(const TGuid& value);
+
+////////////////////////////////////////////////////////////////////////////////
+// Basic specializations for ConvertFromYsonString.
+// Note: these currently support a subset of NYT::NYTree::Convert features.
+
+class TYsonLiteralParseException
+ : public TCompositeException
+{
+public:
+ using TCompositeException::TCompositeException;
+};
+
+template <>
+i8 ConvertFromYsonString<i8>(const TYsonStringBuf& str);
+template <>
+i32 ConvertFromYsonString<i32>(const TYsonStringBuf& str);
+template <>
+i64 ConvertFromYsonString<i64>(const TYsonStringBuf& str);
+
+template <>
+ui8 ConvertFromYsonString<ui8>(const TYsonStringBuf& str);
+template <>
+ui32 ConvertFromYsonString<ui32>(const TYsonStringBuf& str);
+template <>
+ui64 ConvertFromYsonString<ui64>(const TYsonStringBuf& str);
+
+template <>
+TString ConvertFromYsonString<TString>(const TYsonStringBuf& str);
+
+template <>
+float ConvertFromYsonString<float>(const TYsonStringBuf& str);
+template <>
+double ConvertFromYsonString<double>(const TYsonStringBuf& str);
+
+template <>
+bool ConvertFromYsonString<bool>(const TYsonStringBuf& str);
+
+template <>
+TInstant ConvertFromYsonString<TInstant>(const TYsonStringBuf& str);
+
+template <>
+TDuration ConvertFromYsonString<TDuration>(const TYsonStringBuf& str);
+
+template <>
+TGuid ConvertFromYsonString<TGuid>(const TYsonStringBuf& str);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
diff --git a/library/cpp/yt/yson_string/format.h b/library/cpp/yt/yson_string/format.h
new file mode 100644
index 0000000000..2efd4fa39a
--- /dev/null
+++ b/library/cpp/yt/yson_string/format.h
@@ -0,0 +1,44 @@
+#pragma once
+
+namespace NYT::NYson::NDetail {
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Indicates the beginning of a list.
+constexpr char BeginListSymbol = '[';
+//! Indicates the end of a list.
+constexpr char EndListSymbol = ']';
+
+//! Indicates the beginning of a map.
+constexpr char BeginMapSymbol = '{';
+//! Indicates the end of a map.
+constexpr char EndMapSymbol = '}';
+
+//! Indicates the beginning of an attribute map.
+constexpr char BeginAttributesSymbol = '<';
+//! Indicates the end of an attribute map.
+constexpr char EndAttributesSymbol = '>';
+
+//! Separates items in lists, maps, attributes.
+constexpr char ItemSeparatorSymbol = ';';
+//! Separates keys from values in maps.
+constexpr char KeyValueSeparatorSymbol = '=';
+
+//! Indicates an entity.
+constexpr char EntitySymbol = '#';
+//! Marks the beginning of a binary string literal.
+constexpr char StringMarker = '\x01';
+//! Marks the beginning of a binary i64 literal.
+constexpr char Int64Marker = '\x02';
+//! Marks the beginning of a binary double literal.
+constexpr char DoubleMarker = '\x03';
+//! Marks |false| boolean value.
+constexpr char FalseMarker = '\x04';
+//! Marks |true| boolean value.
+constexpr char TrueMarker = '\x05';
+//! Marks the beginning of a binary ui64 literal.
+constexpr char Uint64Marker = '\x06';
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson::NDetail
diff --git a/library/cpp/yt/yson_string/public.h b/library/cpp/yt/yson_string/public.h
new file mode 100644
index 0000000000..42c1ce80bb
--- /dev/null
+++ b/library/cpp/yt/yson_string/public.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <library/cpp/yt/misc/enum.h>
+
+namespace NYT::NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! The data format.
+DEFINE_ENUM(EYsonFormat,
+ // Binary.
+ // Most compact but not human-readable.
+ (Binary)
+
+ // Text.
+ // Not so compact but human-readable.
+ // Does not use indentation.
+ // Uses escaping for non-text characters.
+ (Text)
+
+ // Text with indentation.
+ // Extremely verbose but human-readable.
+ // Uses escaping for non-text characters.
+ (Pretty)
+);
+
+// NB: -1 is used for serializing null TYsonString.
+DEFINE_ENUM_WITH_UNDERLYING_TYPE(EYsonType, i8,
+ ((Node) (0))
+ ((ListFragment) (1))
+ ((MapFragment) (2))
+);
+
+class TYsonString;
+class TYsonStringBuf;
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
diff --git a/library/cpp/yt/yson_string/string-inl.h b/library/cpp/yt/yson_string/string-inl.h
new file mode 100644
index 0000000000..5c41629cc0
--- /dev/null
+++ b/library/cpp/yt/yson_string/string-inl.h
@@ -0,0 +1,93 @@
+#ifndef STRING_INL_H_
+#error "Direct inclusion of this file is not allowed, include string.h"
+// For the sake of sane code completion.
+#include "string.h"
+#endif
+
+namespace NYT::NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NDetail {
+
+template <typename TLeft, typename TRight>
+bool Equals(const TLeft& lhs, const TRight& rhs)
+{
+ auto lhsNull = !lhs.operator bool();
+ auto rhsNull = !rhs.operator bool();
+ if (lhsNull != rhsNull) {
+ return false;
+ }
+ if (lhsNull && rhsNull) {
+ return true;
+ }
+ return
+ lhs.AsStringBuf() == rhs.AsStringBuf() &&
+ lhs.GetType() == rhs.GetType();
+}
+
+} // namespace NDetail
+
+inline bool operator == (const TYsonString& lhs, const TYsonString& rhs)
+{
+ return NDetail::Equals(lhs, rhs);
+}
+
+inline bool operator == (const TYsonString& lhs, const TYsonStringBuf& rhs)
+{
+ return NDetail::Equals(lhs, rhs);
+}
+
+inline bool operator == (const TYsonStringBuf& lhs, const TYsonString& rhs)
+{
+ return NDetail::Equals(lhs, rhs);
+}
+
+inline bool operator == (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs)
+{
+ return NDetail::Equals(lhs, rhs);
+}
+
+inline bool operator != (const TYsonString& lhs, const TYsonString& rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline bool operator != (const TYsonString& lhs, const TYsonStringBuf& rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline bool operator != (const TYsonStringBuf& lhs, const TYsonString& rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline bool operator != (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs)
+{
+ return !(lhs == rhs);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
+
+//! A hasher for TYsonString
+template <>
+struct THash<NYT::NYson::TYsonString>
+{
+ size_t operator () (const NYT::NYson::TYsonString& str) const
+ {
+ return str.ComputeHash();
+ }
+};
+
+//! A hasher for TYsonStringBuf
+template <>
+struct THash<NYT::NYson::TYsonStringBuf>
+{
+ size_t operator () (const NYT::NYson::TYsonStringBuf& str) const
+ {
+ return THash<TStringBuf>()(str.AsStringBuf());
+ }
+};
diff --git a/library/cpp/yt/yson_string/string.cpp b/library/cpp/yt/yson_string/string.cpp
new file mode 100644
index 0000000000..99d45e8616
--- /dev/null
+++ b/library/cpp/yt/yson_string/string.cpp
@@ -0,0 +1,185 @@
+#include "string.h"
+
+#include <library/cpp/yt/assert/assert.h>
+
+#include <library/cpp/yt/misc/variant.h>
+
+#include <library/cpp/yt/memory/new.h>
+
+namespace NYT::NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYsonStringBuf::TYsonStringBuf()
+{
+ Type_ = EYsonType::Node; // fake
+ Null_ = true;
+}
+
+TYsonStringBuf::TYsonStringBuf(const TYsonString& ysonString)
+{
+ if (ysonString) {
+ Data_ = ysonString.AsStringBuf();
+ Type_ = ysonString.GetType();
+ Null_ = false;
+ } else {
+ Type_ = EYsonType::Node; // fake
+ Null_ = true;
+ }
+}
+
+TYsonStringBuf::TYsonStringBuf(const TString& data, EYsonType type)
+ : TYsonStringBuf(TStringBuf(data), type)
+{ }
+
+TYsonStringBuf::TYsonStringBuf(TStringBuf data, EYsonType type)
+ : Data_(data)
+ , Type_(type)
+ , Null_(false)
+{ }
+
+TYsonStringBuf::TYsonStringBuf(const char* data, EYsonType type)
+ : TYsonStringBuf(TStringBuf(data), type)
+{ }
+
+TYsonStringBuf::operator bool() const
+{
+ return !Null_;
+}
+
+TStringBuf TYsonStringBuf::AsStringBuf() const
+{
+ YT_VERIFY(*this);
+ return Data_;
+}
+
+EYsonType TYsonStringBuf::GetType() const
+{
+ YT_VERIFY(*this);
+ return Type_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TYsonString::TYsonString()
+{
+ Begin_ = nullptr;
+ Size_ = 0;
+ Type_ = EYsonType::Node; // fake
+}
+
+TYsonString::TYsonString(const TYsonStringBuf& ysonStringBuf)
+{
+ if (ysonStringBuf) {
+ struct TCapturedYsonStringPayload
+ : public TRefCounted
+ , public TWithExtraSpace<TCapturedYsonStringPayload>
+ {
+ char* GetData()
+ {
+ return static_cast<char*>(GetExtraSpacePtr());
+ }
+ };
+
+ auto data = ysonStringBuf.AsStringBuf();
+ auto payload = NewWithExtraSpace<TCapturedYsonStringPayload>(data.length());
+ ::memcpy(payload->GetData(), data.data(), data.length());
+ Payload_ = payload;
+ Begin_ = payload->GetData();
+ Size_ = data.Size();
+ Type_ = ysonStringBuf.GetType();
+ } else {
+ Begin_ = nullptr;
+ Size_ = 0;
+ Type_ = EYsonType::Node; // fake
+ }
+}
+
+TYsonString::TYsonString(
+ TStringBuf data,
+ EYsonType type)
+ : TYsonString(TYsonStringBuf(data, type))
+{ }
+
+#ifdef TSTRING_IS_STD_STRING
+TYsonString::TYsonString(
+ const TString& data,
+ EYsonType type)
+ : TYsonString(TYsonStringBuf(data, type))
+{ }
+#else
+TYsonString::TYsonString(
+ const TString& data,
+ EYsonType type)
+{
+ // NOTE: CoW TString implementation is assumed
+ // Moving the payload MUST NOT invalidate its internal pointers
+ Payload_ = data;
+ Begin_ = data.data();
+ Size_ = data.length();
+ Type_ = type;
+}
+#endif
+
+TYsonString::TYsonString(
+ const TSharedRef& data,
+ EYsonType type)
+{
+ Payload_ = data.GetHolder();
+ Begin_ = data.Begin();
+ Size_ = data.Size();
+ Type_ = type;
+}
+
+TYsonString::operator bool() const
+{
+ return !std::holds_alternative<TNullPayload>(Payload_);
+}
+
+EYsonType TYsonString::GetType() const
+{
+ YT_VERIFY(*this);
+ return Type_;
+}
+
+TStringBuf TYsonString::AsStringBuf() const
+{
+ YT_VERIFY(*this);
+ return TStringBuf(Begin_, Begin_ + Size_);
+}
+
+TString TYsonString::ToString() const
+{
+ return Visit(
+ Payload_,
+ [] (const TNullPayload&) -> TString {
+ YT_ABORT();
+ },
+ [&] (const TRefCountedPtr&) {
+ return TString(AsStringBuf());
+ },
+ [] (const TString& payload) {
+ return payload;
+ });
+}
+
+size_t TYsonString::ComputeHash() const
+{
+ return THash<TStringBuf>()(TStringBuf(Begin_, Begin_ + Size_));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TString ToString(const TYsonString& yson)
+{
+ return yson.ToString();
+}
+
+TString ToString(const TYsonStringBuf& yson)
+{
+ return TString(yson.AsStringBuf());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
diff --git a/library/cpp/yt/yson_string/string.h b/library/cpp/yt/yson_string/string.h
new file mode 100644
index 0000000000..e13af37a6d
--- /dev/null
+++ b/library/cpp/yt/yson_string/string.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include "public.h"
+
+#include <library/cpp/yt/memory/ref.h>
+
+#include <variant>
+
+namespace NYT::NYson {
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! Contains a sequence of bytes in YSON encoding annotated with EYsonType describing
+//! the content. Could be null. Non-owning.
+class TYsonStringBuf
+{
+public:
+ //! Constructs a null instance.
+ TYsonStringBuf();
+
+ //! Constructs an instance from TYsonString.
+ TYsonStringBuf(const TYsonString& ysonString);
+
+ //! Constructs a non-null instance with given type and content.
+ explicit TYsonStringBuf(
+ const TString& data,
+ EYsonType type = EYsonType::Node);
+
+ //! Constructs a non-null instance with given type and content.
+ explicit TYsonStringBuf(
+ TStringBuf data,
+ EYsonType type = EYsonType::Node);
+
+ //! Constructs a non-null instance with given type and content
+ //! (without this overload there is no way to construct TYsonStringBuf from
+ //! string literal).
+ explicit TYsonStringBuf(
+ const char* data,
+ EYsonType type = EYsonType::Node);
+
+ //! Returns |true| if the instance is not null.
+ explicit operator bool() const;
+
+ //! Returns the underlying YSON bytes. The instance must be non-null.
+ TStringBuf AsStringBuf() const;
+
+ //! Returns type of YSON contained here. The instance must be non-null.
+ EYsonType GetType() const;
+
+protected:
+ TStringBuf Data_;
+ EYsonType Type_;
+ bool Null_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! An owning version of TYsonStringBuf.
+/*!
+ * Internally captures the data either via TString or a polymorphic ref-counted holder.
+ */
+class TYsonString
+{
+public:
+ //! Constructs a null instance.
+ TYsonString();
+
+ //! Constructs an instance from TYsonStringBuf.
+ //! Copies the data into a ref-counted payload.
+ explicit TYsonString(const TYsonStringBuf& ysonStringBuf);
+
+ //! Constructs an instance from TStringBuf.
+ //! Copies the data into a ref-counted payload.
+ explicit TYsonString(
+ TStringBuf data,
+ EYsonType type = EYsonType::Node);
+
+ //! Constructs an instance from TString.
+ //! Zero-copy for CoW TString: retains the reference to TString in payload.
+ explicit TYsonString(
+ const TString& data,
+ EYsonType type = EYsonType::Node);
+
+ //! Constructs an instance from TSharedRef.
+ //! Zero-copy; retains the reference to TSharedRef holder in payload.
+ explicit TYsonString(
+ const TSharedRef& ref,
+ EYsonType type = EYsonType::Node);
+
+ //! Returns |true| if the instance is not null.
+ explicit operator bool() const;
+
+ //! Returns type of YSON contained here. The instance must be non-null.
+ EYsonType GetType() const;
+
+ //! Returns the non-owning data. The instance must be non-null.
+ TStringBuf AsStringBuf() const;
+
+ //! Returns the data represented by TString. The instance must be non-null.
+ //! Copies the data in case the payload is not TString.
+ TString ToString() const;
+
+ //! Computes the hash code.
+ size_t ComputeHash() const;
+
+private:
+ struct TNullPayload
+ { };
+
+ using THolder = TRefCountedPtr;
+
+ std::variant<TNullPayload, THolder, TString> Payload_;
+
+ const char* Begin_;
+ ui64 Size_ : 56;
+ EYsonType Type_ : 8;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool operator == (const TYsonString& lhs, const TYsonString& rhs);
+bool operator == (const TYsonString& lhs, const TYsonStringBuf& rhs);
+bool operator == (const TYsonStringBuf& lhs, const TYsonString& rhs);
+bool operator == (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs);
+
+bool operator != (const TYsonString& lhs, const TYsonString& rhs);
+bool operator != (const TYsonString& lhs, const TYsonStringBuf& rhs);
+bool operator != (const TYsonStringBuf& lhs, const TYsonString& rhs);
+bool operator != (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs);
+
+TString ToString(const TYsonString& yson);
+TString ToString(const TYsonStringBuf& yson);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYson
+
+#define STRING_INL_H_
+#include "string-inl.h"
+#undef STRING_INL_H_
diff --git a/library/cpp/yt/yson_string/unittests/convert_ut.cpp b/library/cpp/yt/yson_string/unittests/convert_ut.cpp
new file mode 100644
index 0000000000..3a64f63896
--- /dev/null
+++ b/library/cpp/yt/yson_string/unittests/convert_ut.cpp
@@ -0,0 +1,79 @@
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <library/cpp/testing/gtest_extensions/assertions.h>
+
+#include <library/cpp/yt/yson_string/convert.h>
+
+#include <thread>
+
+namespace NYT::NYson {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class T, class R = T, class U>
+void Check(const U& value)
+{
+ auto str = ConvertToYsonString(static_cast<T>(value));
+ auto anotherValue = ConvertFromYsonString<R>(str);
+ EXPECT_EQ(static_cast<T>(value), anotherValue);
+}
+
+TEST(TConvertTest, Basic)
+{
+ Check<i8>(13);
+ Check<i32>(13);
+ Check<i64>(13);
+ Check<i8>(-13);
+ Check<i32>(-13);
+ Check<i64>(-13);
+ Check<ui8>(13);
+ Check<ui32>(13);
+ Check<ui64>(13);
+ Check<TString>("");
+ Check<TString>("hello");
+ Check<TStringBuf, TString>("hello");
+ Check<const char*, TString>("hello");
+ Check<float>(3.14);
+ Check<double>(3.14);
+ Check<bool>(true);
+ Check<bool>(false);
+ Check<TInstant>(TInstant::Now());
+ Check<TDuration>(TDuration::Seconds(123));
+ Check<TGuid>(TGuid::FromString("12345678-12345678-abcdabcd-fefefefe"));
+}
+
+TEST(TConvertTest, InRange)
+{
+ EXPECT_EQ(ConvertFromYsonString<i16>(ConvertToYsonString(static_cast<i64>(-123))), -123);
+ EXPECT_EQ(ConvertFromYsonString<ui16>(ConvertToYsonString(static_cast<ui64>(123))), 123U);
+}
+
+TEST(TConvertTest, OutOfRange)
+{
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(
+ ConvertFromYsonString<i8>(ConvertToYsonString(static_cast<i64>(128))),
+ TYsonLiteralParseException,
+ "is out of expected range");
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(
+ ConvertFromYsonString<ui8>(ConvertToYsonString(static_cast<ui64>(256))),
+ TYsonLiteralParseException,
+ "is out of expected range");
+}
+
+TEST(TConvertTest, MalformedValues)
+{
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(
+ ConvertFromYsonString<TInstant>(ConvertToYsonString(TStringBuf("sometime"))),
+ TYsonLiteralParseException,
+ "Error parsing \"instant\" value");
+ EXPECT_THROW_MESSAGE_HAS_SUBSTR(
+ ConvertFromYsonString<TGuid>(ConvertToYsonString(TStringBuf("1-2-3-g"))),
+ TYsonLiteralParseException,
+ "Error parsing \"guid\" value");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT::NYson
diff --git a/library/cpp/yt/yson_string/unittests/ya.make b/library/cpp/yt/yson_string/unittests/ya.make
new file mode 100644
index 0000000000..f327d298f1
--- /dev/null
+++ b/library/cpp/yt/yson_string/unittests/ya.make
@@ -0,0 +1,15 @@
+GTEST()
+
+OWNER(g:yt)
+
+SRCS(
+ convert_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/yt/yson_string
+ library/cpp/testing/gtest
+ library/cpp/testing/gtest_extensions
+)
+
+END()
diff --git a/library/cpp/yt/yson_string/ya.make b/library/cpp/yt/yson_string/ya.make
new file mode 100644
index 0000000000..b7447d89ff
--- /dev/null
+++ b/library/cpp/yt/yson_string/ya.make
@@ -0,0 +1,21 @@
+LIBRARY()
+
+SRCS(
+ convert.cpp
+ string.cpp
+)
+
+PEERDIR(
+ library/cpp/yt/assert
+ library/cpp/yt/coding
+ library/cpp/yt/exception
+ library/cpp/yt/string
+ library/cpp/yt/memory
+ library/cpp/yt/misc
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ unittests
+)