diff options
author | arkady-e1ppa <arkady-e1ppa@yandex-team.com> | 2024-12-26 10:52:40 +0300 |
---|---|---|
committer | arkady-e1ppa <arkady-e1ppa@yandex-team.com> | 2024-12-26 11:12:17 +0300 |
commit | 0328aa1f62630f14d06076175e2f3750969ac813 (patch) | |
tree | a2652fb95c5a603e15f1692b43b10e4bf036f084 /library/cpp/yt/error/text_yson.cpp | |
parent | b4473180e344fac1deb5f4e85fc681efe2c708de (diff) | |
download | ydb-0328aa1f62630f14d06076175e2f3750969ac813.tar.gz |
YT-21233: Drop dependency on yson in library/cpp/yt/error by switch to std::string everywhere
done
commit_hash:8a83afa39917ba66a5161388a7cd74a4488d9908
Diffstat (limited to 'library/cpp/yt/error/text_yson.cpp')
-rw-r--r-- | library/cpp/yt/error/text_yson.cpp | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/library/cpp/yt/error/text_yson.cpp b/library/cpp/yt/error/text_yson.cpp new file mode 100644 index 0000000000..42ad52bbcc --- /dev/null +++ b/library/cpp/yt/error/text_yson.cpp @@ -0,0 +1,436 @@ +#include "text_yson.h" + +#include "error.h" + +#include <library/cpp/yt/assert/assert.h> + +#include <library/cpp/yt/string/format.h> + +#include <library/cpp/yt/coding/varint.h> + +#include <library/cpp/yt/misc/cast.h> + +#include <array> + +#include <util/string/escape.h> + +#include <util/stream/mem.h> + +namespace NYT::NDetail { + +//////////////////////////////////////////////////////////////////////////////// + +size_t FloatToStringWithNanInf(double value, char* buf, size_t size) +{ + if (std::isfinite(value)) { + return FloatToString(value, buf, size); + } + + static const TStringBuf nanLiteral = "%nan"; + static const TStringBuf infLiteral = "%inf"; + static const TStringBuf negativeInfLiteral = "%-inf"; + + TStringBuf str; + if (std::isnan(value)) { + str = nanLiteral; + } else if (std::isinf(value) && value > 0) { + str = infLiteral; + } else { + str = negativeInfLiteral; + } + YT_VERIFY(str.size() + 1 <= size); + ::memcpy(buf, str.data(), str.size() + 1); + return str.size(); +} + +//////////////////////////////////////////////////////////////////////////////// + +// NB(arkady-e1ppa): Copied from library/cpp/yt/yson_string/format.h +// to avoid direct dependency on it. + +//! Indicates an entity. +constexpr char EntitySymbol = '#'; +//! Marks the beginning of a binary string literal. +constexpr char StringMarker = '\x01'; +//! Marks the beginning of a binary i64 literal. +constexpr char Int64Marker = '\x02'; +//! Marks the beginning of a binary double literal. +constexpr char DoubleMarker = '\x03'; +//! Marks |false| boolean value. +constexpr char FalseMarker = '\x04'; +//! Marks |true| boolean value. +constexpr char TrueMarker = '\x05'; +//! Marks the beginning of a binary ui64 literal. +constexpr char Uint64Marker = '\x06'; + +//////////////////////////////////////////////////////////////////////////////// + +bool IsBinaryYson(TStringBuf str) +{ + return + std::ssize(str) != 0 && + (str.front() == EntitySymbol || + str.front() == StringMarker || + str.front() == Int64Marker || + str.front() == DoubleMarker || + str.front() == FalseMarker || + str.front() == TrueMarker || + str.front() == Uint64Marker); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <> +std::string ConvertToTextYsonString<i8>(const i8& value) +{ + return ConvertToTextYsonString(static_cast<i64>(value)); +} + +template <> +std::string ConvertToTextYsonString<i32>(const i32& value) +{ + return ConvertToTextYsonString(static_cast<i64>(value)); +} + +template <> +std::string ConvertToTextYsonString<i64>(const i64& value) +{ + return std::string{::ToString(value)}; +} + +template <> +std::string ConvertToTextYsonString<ui8>(const ui8& value) +{ + return ConvertToTextYsonString(static_cast<ui64>(value)); +} + +template <> +std::string ConvertToTextYsonString<ui32>(const ui32& value) +{ + return ConvertToTextYsonString(static_cast<ui64>(value)); +} + +template <> +std::string ConvertToTextYsonString<ui64>(const ui64& value) +{ + return std::string{::ToString(value) + 'u'}; +} + +template <> +std::string ConvertToTextYsonString<TStringBuf>(const TStringBuf& value) +{ + return std::string(NYT::Format("\"%v\"", ::EscapeC(value))); +} + +template <> +std::string ConvertToTextYsonString<float>(const float& value) +{ + return ConvertToTextYsonString(static_cast<double>(value)); +} + +template <> +std::string ConvertToTextYsonString<double>(const double& value) +{ + char buf[256]; + auto str = TStringBuf(buf, NDetail::FloatToStringWithNanInf(value, buf, sizeof(buf))); + auto ret = NYT::Format( + "%v%v", + str, + MakeFormatterWrapper([&] (TStringBuilderBase* builder) { + if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) { + builder->AppendChar('.'); + } + })); + return std::string(std::move(ret)); +} + +template <> +std::string ConvertToTextYsonString<bool>(const bool& value) +{ + return value + ? std::string(TStringBuf("%true")) + : std::string(TStringBuf("%false")); +} + +template <> +std::string ConvertToTextYsonString<TInstant>(const TInstant& value) +{ + return ConvertToTextYsonString(TStringBuf(value.ToString())); +} + +template <> +std::string ConvertToTextYsonString<TDuration>(const TDuration& value) +{ + // ConvertTo does unchecked cast to i64 :(. + return ConvertToTextYsonString(static_cast<i64>(value.MilliSeconds())); +} + +template <> +std::string ConvertToTextYsonString<TGuid>(const TGuid& value) +{ + return ConvertToTextYsonString(TStringBuf(NYT::ToString(value))); +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +template <class TSomeInt> +TSomeInt ReadTextUint(TStringBuf strBuf) +{ + // Drop 'u' + return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length() - 1}); +} + +template <class TSomeInt> +TSomeInt ReadTextInt(TStringBuf strBuf) +{ + return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length()}); +} + +bool IsNumeric(TStringBuf strBuf) +{ + bool isNumeric = true; + bool isNegative = false; + for (int i = 0; i < std::ssize(strBuf); ++i) { + char c = strBuf[i]; + + if (!('0' <= c && c <= '9')) { + if (i == 0 && c == '-') { + isNegative = true; + continue; + } + if (i == std::ssize(strBuf) - 1 && c == 'u' && !isNegative) { + continue; + } + isNumeric = false; + break; + } + } + + return isNumeric; +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TSomeInt> +TSomeInt ParseSomeIntFromTextYsonString(TStringBuf strBuf) +{ + if (std::ssize(strBuf) == 0 || !IsNumeric(strBuf)) { + THROW_ERROR_EXCEPTION( + "Unexpected %v\n" + "Value is not numeric", + strBuf); + } + + if (strBuf.back() == 'u') { + // Drop 'u' + return ReadTextUint<TSomeInt>(strBuf); + } else { + return ReadTextInt<TSomeInt>(strBuf); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +TString DoParseStringFromTextYson(TStringBuf strBuf) +{ + // Remove quotation marks. + return ::UnescapeC(TStringBuf{strBuf.data() + 1, strBuf.length() - 2}); +} + +TString ParseStringFromTextYsonString(TStringBuf strBuf) +{ + if (std::ssize(strBuf) < 2 || strBuf.front() != '\"' || strBuf.back() != '\"') { + THROW_ERROR_EXCEPTION( + "Unexpected %v\n" + "Text yson string must begin and end with \\\"", + strBuf); + } + return DoParseStringFromTextYson(strBuf); +} + +//////////////////////////////////////////////////////////////////////////////// + +double ParseDoubleFromTextYsonString(TStringBuf strBuf) +{ + if (std::ssize(strBuf) < 2) { + THROW_ERROR_EXCEPTION( + "Incorrect remaining string length: expected at least 2, got %v", + std::ssize(strBuf)); + } + + // Check special values first. + // %nan + // %inf, %+inf, %-inf + if (strBuf[0] == '%') { + switch (strBuf[1]) { + case '+': + case 'i': + return std::numeric_limits<double>::infinity(); + + case '-': + return -std::numeric_limits<double>::infinity(); + + case 'n': + return std::numeric_limits<double>::quiet_NaN(); + + default: + THROW_ERROR_EXCEPTION( + "Incorrect %%-literal %v", + strBuf); + } + } + + return ::FromString<double>(strBuf); +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +#define PARSE_INT(type, underlyingType) \ + template <> \ + type ConvertFromTextYsonString<type>(TStringBuf str) \ + { \ + try { \ + return CheckedIntegralCast<type>(ParseSomeIntFromTextYsonString<underlyingType>(str)); \ + } catch (const std::exception& ex) { \ + THROW_ERROR_EXCEPTION("Error parsing \"" #type "\" value from YSON") << ex; \ + } \ + } + +PARSE_INT(i8, i64) +PARSE_INT(i16, i64) +PARSE_INT(i32, i64) +PARSE_INT(i64, i64) +PARSE_INT(ui8, ui64) +PARSE_INT(ui16, ui64) +PARSE_INT(ui32, ui64) +PARSE_INT(ui64, ui64) + +#undef PARSE + +//////////////////////////////////////////////////////////////////////////////// + +template <> +TString ConvertFromTextYsonString<TString>(TStringBuf str) +{ + try { + return ParseStringFromTextYsonString(str); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"string\" value from YSON") << ex; + } +} + +template <> +std::string ConvertFromTextYsonString<std::string>(TStringBuf str) +{ + return std::string(ConvertFromTextYsonString<TString>(str)); +} + +template <> +float ConvertFromTextYsonString<float>(TStringBuf str) +{ + try { + return static_cast<float>(ParseDoubleFromTextYsonString(str)); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"float\" value from YSON") << ex; + } +} + +template <> +double ConvertFromTextYsonString<double>(TStringBuf str) +{ + try { + return ParseDoubleFromTextYsonString(str); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"double\" value from YSON") << ex; + } +} + +template <> +bool ConvertFromTextYsonString<bool>(TStringBuf strBuf) +{ + try { + if (std::ssize(strBuf) == 0) { + THROW_ERROR_EXCEPTION("Empty string"); + } + + char ch = strBuf.front(); + + if (ch == '%') { + if (strBuf != "%true" && strBuf != "%false") { + THROW_ERROR_EXCEPTION( + "Expected %%true or %%false but found %v", + strBuf); + } + return strBuf == "%true"; + } + + if (ch == '\"') { + return ParseBool(DoParseStringFromTextYson(strBuf)); + } + + // NB(arkady-e1ppa): This check is linear in size(strBuf) + // And thus is tried as the last resort. + if (IsNumeric(strBuf)) { + auto checkValue = [&] (const auto& functor) { + auto value = functor(strBuf); + if (value != 0 && value != 1) { + THROW_ERROR_EXCEPTION( + "Expected 0 or 1 but found %v", + value); + } + return static_cast<bool>(value); + }; + + if (strBuf.back() == 'u') { + return checkValue(&ReadTextUint<ui64>); + } else { + return checkValue(&ReadTextInt<i64>); + } + } + + THROW_ERROR_EXCEPTION( + "Unexpected %v\n" + "No known conversion to \"boolean\" value", + strBuf); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"boolean\" value from YSON") << ex; + } +} + +template <> +TInstant ConvertFromTextYsonString<TInstant>(TStringBuf str) +{ + try { + return TInstant::ParseIso8601(ParseStringFromTextYsonString(str)); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"instant\" value from YSON") << ex; + } +} + +template <> +TDuration ConvertFromTextYsonString<TDuration>(TStringBuf str) +{ + try { + return TDuration::MilliSeconds(ParseSomeIntFromTextYsonString<i64>(str)); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"duration\" value from YSON") << ex; + } +} + +template <> +TGuid ConvertFromTextYsonString<TGuid>(TStringBuf str) +{ + try { + return TGuid::FromString(ParseStringFromTextYsonString(str)); + } catch (const std::exception& ex) { + THROW_ERROR_EXCEPTION("Error parsing \"guid\" value from YSON") << ex; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT::NDetail |