diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yt/yson_string | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yt/yson_string')
-rw-r--r-- | library/cpp/yt/yson_string/convert.cpp | 381 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/convert.h | 114 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/format.h | 44 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/public.h | 39 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/string-inl.h | 93 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/string.cpp | 185 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/string.h | 140 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/unittests/convert_ut.cpp | 79 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/unittests/ya.make | 15 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/ya.make | 21 |
10 files changed, 1111 insertions, 0 deletions
diff --git a/library/cpp/yt/yson_string/convert.cpp b/library/cpp/yt/yson_string/convert.cpp new file mode 100644 index 0000000000..27f5c30d01 --- /dev/null +++ b/library/cpp/yt/yson_string/convert.cpp @@ -0,0 +1,381 @@ +#include "convert.h" +#include "format.h" + +#include <library/cpp/yt/assert/assert.h> + +#include <library/cpp/yt/string/format.h> + +#include <library/cpp/yt/coding/varint.h> + +#include <library/cpp/yt/misc/cast.h> + +#include <array> + +#include <util/stream/mem.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +template <> +TYsonString ConvertToYsonString<i8>(const i8& value) +{ + return ConvertToYsonString(static_cast<i64>(value)); +} + +template <> +TYsonString ConvertToYsonString<i32>(const i32& value) +{ + return ConvertToYsonString(static_cast<i64>(value)); +} + +template <> +TYsonString ConvertToYsonString<i64>(const i64& value) +{ + std::array<char, 1 + MaxVarInt64Size> buffer; + auto* ptr = buffer.data(); + *ptr++ = NDetail::Int64Marker; + ptr += WriteVarInt64(ptr, value); + return TYsonString(TStringBuf(buffer.data(), ptr - buffer.data())); +} + +template <> +TYsonString ConvertToYsonString<ui8>(const ui8& value) +{ + return ConvertToYsonString(static_cast<ui64>(value)); +} + +template <> +TYsonString ConvertToYsonString<ui32>(const ui32& value) +{ + return ConvertToYsonString(static_cast<ui64>(value)); +} + +template <> +TYsonString ConvertToYsonString<ui64>(const ui64& value) +{ + std::array<char, 1 + MaxVarInt64Size> buffer; + auto* ptr = buffer.data(); + *ptr++ = NDetail::Uint64Marker; + ptr += WriteVarUint64(ptr, value); + return TYsonString(TStringBuf(buffer.data(), ptr - buffer.data())); +} + +template <> +TYsonString ConvertToYsonString<TString>(const TString& value) +{ + return ConvertToYsonString(static_cast<TStringBuf>(value)); +} + +struct TConvertStringToYsonStringTag +{ }; + +template <> +TYsonString ConvertToYsonString<TStringBuf>(const TStringBuf& value) +{ + auto buffer = TSharedMutableRef::Allocate<TConvertStringToYsonStringTag>( + 1 + MaxVarInt64Size + value.length(), + /*initializeStorage*/ false); + auto* ptr = buffer.Begin(); + *ptr++ = NDetail::StringMarker; + ptr += WriteVarInt64(ptr, static_cast<i64>(value.length())); + ::memcpy(ptr, value.data(), value.length()); + ptr += value.length(); + return TYsonString(buffer.Slice(buffer.Begin(), ptr)); +} + +TYsonString ConvertToYsonString(const char* value) +{ + return ConvertToYsonString(TStringBuf(value)); +} + +template <> +TYsonString ConvertToYsonString<float>(const float& value) +{ + return ConvertToYsonString(static_cast<double>(value)); +} + +template <> +TYsonString ConvertToYsonString<double>(const double& value) +{ + std::array<char, 1 + sizeof(double)> buffer; + auto* ptr = buffer.data(); + *ptr++ = NDetail::DoubleMarker; + ::memcpy(ptr, &value, sizeof(value)); + ptr += sizeof(value); + return TYsonString(TStringBuf(buffer.data(), ptr - buffer.data())); +} + +template <> +TYsonString ConvertToYsonString<bool>(const bool& value) +{ + char ch = value ? NDetail::TrueMarker : NDetail::FalseMarker; + return TYsonString(TStringBuf(&ch, 1)); +} + +template <> +TYsonString ConvertToYsonString<TInstant>(const TInstant& value) +{ + return ConvertToYsonString(value.ToString()); +} + +template <> +TYsonString ConvertToYsonString<TDuration>(const TDuration& value) +{ + return ConvertToYsonString(value.MilliSeconds()); +} + +template <> +TYsonString ConvertToYsonString<TGuid>(const TGuid& value) +{ + std::array<char, MaxGuidStringSize> guidBuffer; + auto guidLength = WriteGuidToBuffer(guidBuffer.data(), value) - guidBuffer.data(); + std::array<char, 1 + MaxVarInt64Size + MaxGuidStringSize> ysonBuffer; + auto* ptr = ysonBuffer.data(); + *ptr++ = NDetail::StringMarker; + ptr += WriteVarInt64(ptr, static_cast<i64>(guidLength)); + ::memcpy(ptr, guidBuffer.data(), guidLength); + ptr += guidLength; + return TYsonString(TStringBuf(ysonBuffer.data(), ptr - ysonBuffer.data())); +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +TString FormatUnexpectedMarker(char ch) +{ + switch (ch) { + case NDetail::BeginListSymbol: + return "list"; + case NDetail::BeginMapSymbol: + return "map"; + case NDetail::BeginAttributesSymbol: + return "attributes"; + case NDetail::EntitySymbol: + return "\"entity\" literal"; + case NDetail::StringMarker: + return "\"string\" literal"; + case NDetail::Int64Marker: + return "\"int64\" literal"; + case NDetail::DoubleMarker: + return "\"double\" literal"; + case NDetail::FalseMarker: + case NDetail::TrueMarker: + return "\"boolean\" literal"; + case NDetail::Uint64Marker: + return "\"uint64\" literal"; + default: + return Format("unexpected symbol %qv", ch); + } +} + +i64 ParseInt64FromYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + TMemoryInput input(strBuf.data(), strBuf.length()); + char ch; + if (!input.ReadChar(ch)) { + throw TYsonLiteralParseException("Missing type marker"); + } + if (ch != NDetail::Int64Marker) { + throw TYsonLiteralParseException(Format("Unexpected %v", + FormatUnexpectedMarker(ch))); + } + i64 result; + try { + ReadVarInt64(&input, &result); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Failed to decode \"int64\" value"); + } + return result; +} + +ui64 ParseUint64FromYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + TMemoryInput input(strBuf.data(), strBuf.length()); + char ch; + if (!input.ReadChar(ch)) { + throw TYsonLiteralParseException("Missing type marker"); + } + if (ch != NDetail::Uint64Marker) { + throw TYsonLiteralParseException(Format("Unexpected %v", + FormatUnexpectedMarker(ch))); + } + ui64 result; + try { + ReadVarUint64(&input, &result); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Failed to decode \"uint64\" value"); + } + return result; +} + +TString ParseStringFromYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + TMemoryInput input(strBuf.data(), strBuf.length()); + char ch; + if (!input.ReadChar(ch)) { + throw TYsonLiteralParseException("Missing type marker"); + } + if (ch != NDetail::StringMarker) { + throw TYsonLiteralParseException(Format("Unexpected %v", + FormatUnexpectedMarker(ch))); + } + i64 length; + try { + ReadVarInt64(&input, &length); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Failed to decode string length"); + } + if (length < 0) { + throw TYsonLiteralParseException(Format("Negative string length ", + length)); + } + if (static_cast<i64>(input.Avail()) != length) { + throw TYsonLiteralParseException(Format("Incorrect remaining string length: expected %v, got %v", + length, + input.Avail())); + } + TString result; + result.ReserveAndResize(length); + YT_VERIFY(static_cast<i64>(input.Read(result.Detach(), length)) == length); + return result; +} + +double ParseDoubleFromYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + TMemoryInput input(strBuf.data(), strBuf.length()); + char ch; + if (!input.ReadChar(ch)) { + throw TYsonLiteralParseException("Missing type marker"); + } + if (ch != NDetail::DoubleMarker) { + throw TYsonLiteralParseException(Format("Unexpected %v", + FormatUnexpectedMarker(ch))); + } + if (input.Avail() != sizeof(double)) { + throw TYsonLiteralParseException(Format("Incorrect remaining string length: expected %v, got %v", + sizeof(double), + input.Avail())); + } + double result; + YT_VERIFY(input.Read(&result, sizeof(result))); + return result; +} + +} // namespace + +#define PARSE(type, underlyingType) \ + template <> \ + type ConvertFromYsonString<type>(const TYsonStringBuf& str) \ + { \ + try { \ + return CheckedIntegralCast<type>(Parse ## underlyingType ## FromYsonString(str)); \ + } catch (const std::exception& ex) { \ + throw TYsonLiteralParseException(ex, "Error parsing \"" #type "\" value from YSON"); \ + } \ + } + +PARSE(i8, Int64 ) +PARSE(i16, Int64 ) +PARSE(i32, Int64 ) +PARSE(i64, Int64 ) +PARSE(ui8, Uint64) +PARSE(ui16, Uint64) +PARSE(ui32, Uint64) +PARSE(ui64, Uint64) + +#undef PARSE + +template <> +TString ConvertFromYsonString<TString>(const TYsonStringBuf& str) +{ + try { + return ParseStringFromYsonString(str); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"string\" value from YSON"); + } +} + +template <> +float ConvertFromYsonString<float>(const TYsonStringBuf& str) +{ + try { + return static_cast<float>(ParseDoubleFromYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"float\" value from YSON"); + } +} + +template <> +double ConvertFromYsonString<double>(const TYsonStringBuf& str) +{ + try { + return ParseDoubleFromYsonString(str); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"double\" value from YSON"); + } +} + +template <> +bool ConvertFromYsonString<bool>(const TYsonStringBuf& str) +{ + try { + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + TMemoryInput input(strBuf.data(), strBuf.length()); + char ch; + if (!input.ReadChar(ch)) { + throw TYsonLiteralParseException("Missing type marker"); + } + if (ch != NDetail::TrueMarker && ch != NDetail::FalseMarker) { + throw TYsonLiteralParseException(Format("Unexpected %v", + FormatUnexpectedMarker(ch))); + } + return ch == NDetail::TrueMarker; + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"boolean\" value from YSON"); + } +} + +template <> +TInstant ConvertFromYsonString<TInstant>(const TYsonStringBuf& str) +{ + try { + return TInstant::ParseIso8601(ParseStringFromYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"instant\" value from YSON"); + } +} + +template <> +TDuration ConvertFromYsonString<TDuration>(const TYsonStringBuf& str) +{ + try { + return TDuration::MilliSeconds(ParseUint64FromYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"duration\" value from YSON"); + } +} + +template <> +TGuid ConvertFromYsonString<TGuid>(const TYsonStringBuf& str) +{ + try { + return TGuid::FromString(ParseStringFromYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"guid\" value from YSON"); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/library/cpp/yt/yson_string/convert.h b/library/cpp/yt/yson_string/convert.h new file mode 100644 index 0000000000..3c2cc7d284 --- /dev/null +++ b/library/cpp/yt/yson_string/convert.h @@ -0,0 +1,114 @@ +#pragma once + +#include "string.h" + +#include <library/cpp/yt/misc/guid.h> + +#include <library/cpp/yt/exception/exception.h> + +#include <util/generic/string.h> + +#include <util/datetime/base.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// +// Generic forward declarations. + +template <class T> +TYsonString ConvertToYsonString(const T& value); + +template <class T> +TYsonString ConvertToYsonString(const T& value, EYsonFormat format); + +template <class T> +T ConvertFromYsonString(const TYsonStringBuf& str); + +//////////////////////////////////////////////////////////////////////////////// +// Basic specializations for ConvertToYsonString. + +template <> +TYsonString ConvertToYsonString<i8>(const i8& value); +template <> +TYsonString ConvertToYsonString<i32>(const i32& value); +template <> +TYsonString ConvertToYsonString<i64>(const i64& value); + +template <> +TYsonString ConvertToYsonString<ui8>(const ui8& value); +template <> +TYsonString ConvertToYsonString<ui32>(const ui32& value); +template <> +TYsonString ConvertToYsonString<ui64>(const ui64& value); + +template <> +TYsonString ConvertToYsonString<TString>(const TString& value); +template <> +TYsonString ConvertToYsonString<TStringBuf>(const TStringBuf& value); +TYsonString ConvertToYsonString(const char* value); + +template <> +TYsonString ConvertToYsonString<float>(const float& value); +template <> +TYsonString ConvertToYsonString<double>(const double& value); + +template <> +TYsonString ConvertToYsonString<bool>(const bool& value); + +template <> +TYsonString ConvertToYsonString<TInstant>(const TInstant& value); + +template <> +TYsonString ConvertToYsonString<TDuration>(const TDuration& value); + +template <> +TYsonString ConvertToYsonString<TGuid>(const TGuid& value); + +//////////////////////////////////////////////////////////////////////////////// +// Basic specializations for ConvertFromYsonString. +// Note: these currently support a subset of NYT::NYTree::Convert features. + +class TYsonLiteralParseException + : public TCompositeException +{ +public: + using TCompositeException::TCompositeException; +}; + +template <> +i8 ConvertFromYsonString<i8>(const TYsonStringBuf& str); +template <> +i32 ConvertFromYsonString<i32>(const TYsonStringBuf& str); +template <> +i64 ConvertFromYsonString<i64>(const TYsonStringBuf& str); + +template <> +ui8 ConvertFromYsonString<ui8>(const TYsonStringBuf& str); +template <> +ui32 ConvertFromYsonString<ui32>(const TYsonStringBuf& str); +template <> +ui64 ConvertFromYsonString<ui64>(const TYsonStringBuf& str); + +template <> +TString ConvertFromYsonString<TString>(const TYsonStringBuf& str); + +template <> +float ConvertFromYsonString<float>(const TYsonStringBuf& str); +template <> +double ConvertFromYsonString<double>(const TYsonStringBuf& str); + +template <> +bool ConvertFromYsonString<bool>(const TYsonStringBuf& str); + +template <> +TInstant ConvertFromYsonString<TInstant>(const TYsonStringBuf& str); + +template <> +TDuration ConvertFromYsonString<TDuration>(const TYsonStringBuf& str); + +template <> +TGuid ConvertFromYsonString<TGuid>(const TYsonStringBuf& str); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/library/cpp/yt/yson_string/format.h b/library/cpp/yt/yson_string/format.h new file mode 100644 index 0000000000..2efd4fa39a --- /dev/null +++ b/library/cpp/yt/yson_string/format.h @@ -0,0 +1,44 @@ +#pragma once + +namespace NYT::NYson::NDetail { + +//////////////////////////////////////////////////////////////////////////////// + +//! Indicates the beginning of a list. +constexpr char BeginListSymbol = '['; +//! Indicates the end of a list. +constexpr char EndListSymbol = ']'; + +//! Indicates the beginning of a map. +constexpr char BeginMapSymbol = '{'; +//! Indicates the end of a map. +constexpr char EndMapSymbol = '}'; + +//! Indicates the beginning of an attribute map. +constexpr char BeginAttributesSymbol = '<'; +//! Indicates the end of an attribute map. +constexpr char EndAttributesSymbol = '>'; + +//! Separates items in lists, maps, attributes. +constexpr char ItemSeparatorSymbol = ';'; +//! Separates keys from values in maps. +constexpr char KeyValueSeparatorSymbol = '='; + +//! Indicates an entity. +constexpr char EntitySymbol = '#'; +//! Marks the beginning of a binary string literal. +constexpr char StringMarker = '\x01'; +//! Marks the beginning of a binary i64 literal. +constexpr char Int64Marker = '\x02'; +//! Marks the beginning of a binary double literal. +constexpr char DoubleMarker = '\x03'; +//! Marks |false| boolean value. +constexpr char FalseMarker = '\x04'; +//! Marks |true| boolean value. +constexpr char TrueMarker = '\x05'; +//! Marks the beginning of a binary ui64 literal. +constexpr char Uint64Marker = '\x06'; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson::NDetail diff --git a/library/cpp/yt/yson_string/public.h b/library/cpp/yt/yson_string/public.h new file mode 100644 index 0000000000..42c1ce80bb --- /dev/null +++ b/library/cpp/yt/yson_string/public.h @@ -0,0 +1,39 @@ +#pragma once + +#include <library/cpp/yt/misc/enum.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +//! The data format. +DEFINE_ENUM(EYsonFormat, + // Binary. + // Most compact but not human-readable. + (Binary) + + // Text. + // Not so compact but human-readable. + // Does not use indentation. + // Uses escaping for non-text characters. + (Text) + + // Text with indentation. + // Extremely verbose but human-readable. + // Uses escaping for non-text characters. + (Pretty) +); + +// NB: -1 is used for serializing null TYsonString. +DEFINE_ENUM_WITH_UNDERLYING_TYPE(EYsonType, i8, + ((Node) (0)) + ((ListFragment) (1)) + ((MapFragment) (2)) +); + +class TYsonString; +class TYsonStringBuf; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/library/cpp/yt/yson_string/string-inl.h b/library/cpp/yt/yson_string/string-inl.h new file mode 100644 index 0000000000..5c41629cc0 --- /dev/null +++ b/library/cpp/yt/yson_string/string-inl.h @@ -0,0 +1,93 @@ +#ifndef STRING_INL_H_ +#error "Direct inclusion of this file is not allowed, include string.h" +// For the sake of sane code completion. +#include "string.h" +#endif + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +namespace NDetail { + +template <typename TLeft, typename TRight> +bool Equals(const TLeft& lhs, const TRight& rhs) +{ + auto lhsNull = !lhs.operator bool(); + auto rhsNull = !rhs.operator bool(); + if (lhsNull != rhsNull) { + return false; + } + if (lhsNull && rhsNull) { + return true; + } + return + lhs.AsStringBuf() == rhs.AsStringBuf() && + lhs.GetType() == rhs.GetType(); +} + +} // namespace NDetail + +inline bool operator == (const TYsonString& lhs, const TYsonString& rhs) +{ + return NDetail::Equals(lhs, rhs); +} + +inline bool operator == (const TYsonString& lhs, const TYsonStringBuf& rhs) +{ + return NDetail::Equals(lhs, rhs); +} + +inline bool operator == (const TYsonStringBuf& lhs, const TYsonString& rhs) +{ + return NDetail::Equals(lhs, rhs); +} + +inline bool operator == (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs) +{ + return NDetail::Equals(lhs, rhs); +} + +inline bool operator != (const TYsonString& lhs, const TYsonString& rhs) +{ + return !(lhs == rhs); +} + +inline bool operator != (const TYsonString& lhs, const TYsonStringBuf& rhs) +{ + return !(lhs == rhs); +} + +inline bool operator != (const TYsonStringBuf& lhs, const TYsonString& rhs) +{ + return !(lhs == rhs); +} + +inline bool operator != (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs) +{ + return !(lhs == rhs); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson + +//! A hasher for TYsonString +template <> +struct THash<NYT::NYson::TYsonString> +{ + size_t operator () (const NYT::NYson::TYsonString& str) const + { + return str.ComputeHash(); + } +}; + +//! A hasher for TYsonStringBuf +template <> +struct THash<NYT::NYson::TYsonStringBuf> +{ + size_t operator () (const NYT::NYson::TYsonStringBuf& str) const + { + return THash<TStringBuf>()(str.AsStringBuf()); + } +}; diff --git a/library/cpp/yt/yson_string/string.cpp b/library/cpp/yt/yson_string/string.cpp new file mode 100644 index 0000000000..99d45e8616 --- /dev/null +++ b/library/cpp/yt/yson_string/string.cpp @@ -0,0 +1,185 @@ +#include "string.h" + +#include <library/cpp/yt/assert/assert.h> + +#include <library/cpp/yt/misc/variant.h> + +#include <library/cpp/yt/memory/new.h> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +TYsonStringBuf::TYsonStringBuf() +{ + Type_ = EYsonType::Node; // fake + Null_ = true; +} + +TYsonStringBuf::TYsonStringBuf(const TYsonString& ysonString) +{ + if (ysonString) { + Data_ = ysonString.AsStringBuf(); + Type_ = ysonString.GetType(); + Null_ = false; + } else { + Type_ = EYsonType::Node; // fake + Null_ = true; + } +} + +TYsonStringBuf::TYsonStringBuf(const TString& data, EYsonType type) + : TYsonStringBuf(TStringBuf(data), type) +{ } + +TYsonStringBuf::TYsonStringBuf(TStringBuf data, EYsonType type) + : Data_(data) + , Type_(type) + , Null_(false) +{ } + +TYsonStringBuf::TYsonStringBuf(const char* data, EYsonType type) + : TYsonStringBuf(TStringBuf(data), type) +{ } + +TYsonStringBuf::operator bool() const +{ + return !Null_; +} + +TStringBuf TYsonStringBuf::AsStringBuf() const +{ + YT_VERIFY(*this); + return Data_; +} + +EYsonType TYsonStringBuf::GetType() const +{ + YT_VERIFY(*this); + return Type_; +} + +//////////////////////////////////////////////////////////////////////////////// + +TYsonString::TYsonString() +{ + Begin_ = nullptr; + Size_ = 0; + Type_ = EYsonType::Node; // fake +} + +TYsonString::TYsonString(const TYsonStringBuf& ysonStringBuf) +{ + if (ysonStringBuf) { + struct TCapturedYsonStringPayload + : public TRefCounted + , public TWithExtraSpace<TCapturedYsonStringPayload> + { + char* GetData() + { + return static_cast<char*>(GetExtraSpacePtr()); + } + }; + + auto data = ysonStringBuf.AsStringBuf(); + auto payload = NewWithExtraSpace<TCapturedYsonStringPayload>(data.length()); + ::memcpy(payload->GetData(), data.data(), data.length()); + Payload_ = payload; + Begin_ = payload->GetData(); + Size_ = data.Size(); + Type_ = ysonStringBuf.GetType(); + } else { + Begin_ = nullptr; + Size_ = 0; + Type_ = EYsonType::Node; // fake + } +} + +TYsonString::TYsonString( + TStringBuf data, + EYsonType type) + : TYsonString(TYsonStringBuf(data, type)) +{ } + +#ifdef TSTRING_IS_STD_STRING +TYsonString::TYsonString( + const TString& data, + EYsonType type) + : TYsonString(TYsonStringBuf(data, type)) +{ } +#else +TYsonString::TYsonString( + const TString& data, + EYsonType type) +{ + // NOTE: CoW TString implementation is assumed + // Moving the payload MUST NOT invalidate its internal pointers + Payload_ = data; + Begin_ = data.data(); + Size_ = data.length(); + Type_ = type; +} +#endif + +TYsonString::TYsonString( + const TSharedRef& data, + EYsonType type) +{ + Payload_ = data.GetHolder(); + Begin_ = data.Begin(); + Size_ = data.Size(); + Type_ = type; +} + +TYsonString::operator bool() const +{ + return !std::holds_alternative<TNullPayload>(Payload_); +} + +EYsonType TYsonString::GetType() const +{ + YT_VERIFY(*this); + return Type_; +} + +TStringBuf TYsonString::AsStringBuf() const +{ + YT_VERIFY(*this); + return TStringBuf(Begin_, Begin_ + Size_); +} + +TString TYsonString::ToString() const +{ + return Visit( + Payload_, + [] (const TNullPayload&) -> TString { + YT_ABORT(); + }, + [&] (const TRefCountedPtr&) { + return TString(AsStringBuf()); + }, + [] (const TString& payload) { + return payload; + }); +} + +size_t TYsonString::ComputeHash() const +{ + return THash<TStringBuf>()(TStringBuf(Begin_, Begin_ + Size_)); +} + +//////////////////////////////////////////////////////////////////////////////// + +TString ToString(const TYsonString& yson) +{ + return yson.ToString(); +} + +TString ToString(const TYsonStringBuf& yson) +{ + return TString(yson.AsStringBuf()); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson diff --git a/library/cpp/yt/yson_string/string.h b/library/cpp/yt/yson_string/string.h new file mode 100644 index 0000000000..e13af37a6d --- /dev/null +++ b/library/cpp/yt/yson_string/string.h @@ -0,0 +1,140 @@ +#pragma once + +#include "public.h" + +#include <library/cpp/yt/memory/ref.h> + +#include <variant> + +namespace NYT::NYson { + +//////////////////////////////////////////////////////////////////////////////// + +//! Contains a sequence of bytes in YSON encoding annotated with EYsonType describing +//! the content. Could be null. Non-owning. +class TYsonStringBuf +{ +public: + //! Constructs a null instance. + TYsonStringBuf(); + + //! Constructs an instance from TYsonString. + TYsonStringBuf(const TYsonString& ysonString); + + //! Constructs a non-null instance with given type and content. + explicit TYsonStringBuf( + const TString& data, + EYsonType type = EYsonType::Node); + + //! Constructs a non-null instance with given type and content. + explicit TYsonStringBuf( + TStringBuf data, + EYsonType type = EYsonType::Node); + + //! Constructs a non-null instance with given type and content + //! (without this overload there is no way to construct TYsonStringBuf from + //! string literal). + explicit TYsonStringBuf( + const char* data, + EYsonType type = EYsonType::Node); + + //! Returns |true| if the instance is not null. + explicit operator bool() const; + + //! Returns the underlying YSON bytes. The instance must be non-null. + TStringBuf AsStringBuf() const; + + //! Returns type of YSON contained here. The instance must be non-null. + EYsonType GetType() const; + +protected: + TStringBuf Data_; + EYsonType Type_; + bool Null_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +//! An owning version of TYsonStringBuf. +/*! + * Internally captures the data either via TString or a polymorphic ref-counted holder. + */ +class TYsonString +{ +public: + //! Constructs a null instance. + TYsonString(); + + //! Constructs an instance from TYsonStringBuf. + //! Copies the data into a ref-counted payload. + explicit TYsonString(const TYsonStringBuf& ysonStringBuf); + + //! Constructs an instance from TStringBuf. + //! Copies the data into a ref-counted payload. + explicit TYsonString( + TStringBuf data, + EYsonType type = EYsonType::Node); + + //! Constructs an instance from TString. + //! Zero-copy for CoW TString: retains the reference to TString in payload. + explicit TYsonString( + const TString& data, + EYsonType type = EYsonType::Node); + + //! Constructs an instance from TSharedRef. + //! Zero-copy; retains the reference to TSharedRef holder in payload. + explicit TYsonString( + const TSharedRef& ref, + EYsonType type = EYsonType::Node); + + //! Returns |true| if the instance is not null. + explicit operator bool() const; + + //! Returns type of YSON contained here. The instance must be non-null. + EYsonType GetType() const; + + //! Returns the non-owning data. The instance must be non-null. + TStringBuf AsStringBuf() const; + + //! Returns the data represented by TString. The instance must be non-null. + //! Copies the data in case the payload is not TString. + TString ToString() const; + + //! Computes the hash code. + size_t ComputeHash() const; + +private: + struct TNullPayload + { }; + + using THolder = TRefCountedPtr; + + std::variant<TNullPayload, THolder, TString> Payload_; + + const char* Begin_; + ui64 Size_ : 56; + EYsonType Type_ : 8; +}; + +//////////////////////////////////////////////////////////////////////////////// + +bool operator == (const TYsonString& lhs, const TYsonString& rhs); +bool operator == (const TYsonString& lhs, const TYsonStringBuf& rhs); +bool operator == (const TYsonStringBuf& lhs, const TYsonString& rhs); +bool operator == (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs); + +bool operator != (const TYsonString& lhs, const TYsonString& rhs); +bool operator != (const TYsonString& lhs, const TYsonStringBuf& rhs); +bool operator != (const TYsonStringBuf& lhs, const TYsonString& rhs); +bool operator != (const TYsonStringBuf& lhs, const TYsonStringBuf& rhs); + +TString ToString(const TYsonString& yson); +TString ToString(const TYsonStringBuf& yson); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NYson + +#define STRING_INL_H_ +#include "string-inl.h" +#undef STRING_INL_H_ diff --git a/library/cpp/yt/yson_string/unittests/convert_ut.cpp b/library/cpp/yt/yson_string/unittests/convert_ut.cpp new file mode 100644 index 0000000000..3a64f63896 --- /dev/null +++ b/library/cpp/yt/yson_string/unittests/convert_ut.cpp @@ -0,0 +1,79 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/testing/gtest_extensions/assertions.h> + +#include <library/cpp/yt/yson_string/convert.h> + +#include <thread> + +namespace NYT::NYson { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +template <class T, class R = T, class U> +void Check(const U& value) +{ + auto str = ConvertToYsonString(static_cast<T>(value)); + auto anotherValue = ConvertFromYsonString<R>(str); + EXPECT_EQ(static_cast<T>(value), anotherValue); +} + +TEST(TConvertTest, Basic) +{ + Check<i8>(13); + Check<i32>(13); + Check<i64>(13); + Check<i8>(-13); + Check<i32>(-13); + Check<i64>(-13); + Check<ui8>(13); + Check<ui32>(13); + Check<ui64>(13); + Check<TString>(""); + Check<TString>("hello"); + Check<TStringBuf, TString>("hello"); + Check<const char*, TString>("hello"); + Check<float>(3.14); + Check<double>(3.14); + Check<bool>(true); + Check<bool>(false); + Check<TInstant>(TInstant::Now()); + Check<TDuration>(TDuration::Seconds(123)); + Check<TGuid>(TGuid::FromString("12345678-12345678-abcdabcd-fefefefe")); +} + +TEST(TConvertTest, InRange) +{ + EXPECT_EQ(ConvertFromYsonString<i16>(ConvertToYsonString(static_cast<i64>(-123))), -123); + EXPECT_EQ(ConvertFromYsonString<ui16>(ConvertToYsonString(static_cast<ui64>(123))), 123U); +} + +TEST(TConvertTest, OutOfRange) +{ + EXPECT_THROW_MESSAGE_HAS_SUBSTR( + ConvertFromYsonString<i8>(ConvertToYsonString(static_cast<i64>(128))), + TYsonLiteralParseException, + "is out of expected range"); + EXPECT_THROW_MESSAGE_HAS_SUBSTR( + ConvertFromYsonString<ui8>(ConvertToYsonString(static_cast<ui64>(256))), + TYsonLiteralParseException, + "is out of expected range"); +} + +TEST(TConvertTest, MalformedValues) +{ + EXPECT_THROW_MESSAGE_HAS_SUBSTR( + ConvertFromYsonString<TInstant>(ConvertToYsonString(TStringBuf("sometime"))), + TYsonLiteralParseException, + "Error parsing \"instant\" value"); + EXPECT_THROW_MESSAGE_HAS_SUBSTR( + ConvertFromYsonString<TGuid>(ConvertToYsonString(TStringBuf("1-2-3-g"))), + TYsonLiteralParseException, + "Error parsing \"guid\" value"); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT::NYson diff --git a/library/cpp/yt/yson_string/unittests/ya.make b/library/cpp/yt/yson_string/unittests/ya.make new file mode 100644 index 0000000000..f327d298f1 --- /dev/null +++ b/library/cpp/yt/yson_string/unittests/ya.make @@ -0,0 +1,15 @@ +GTEST() + +OWNER(g:yt) + +SRCS( + convert_ut.cpp +) + +PEERDIR( + library/cpp/yt/yson_string + library/cpp/testing/gtest + library/cpp/testing/gtest_extensions +) + +END() diff --git a/library/cpp/yt/yson_string/ya.make b/library/cpp/yt/yson_string/ya.make new file mode 100644 index 0000000000..b7447d89ff --- /dev/null +++ b/library/cpp/yt/yson_string/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +SRCS( + convert.cpp + string.cpp +) + +PEERDIR( + library/cpp/yt/assert + library/cpp/yt/coding + library/cpp/yt/exception + library/cpp/yt/string + library/cpp/yt/memory + library/cpp/yt/misc +) + +END() + +RECURSE_FOR_TESTS( + unittests +) |