diff options
author | Alexander Smirnov <alex@ydb.tech> | 2024-12-24 15:46:17 +0000 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-12-24 15:46:17 +0000 |
commit | c7decaf9230ddcb1ec2c42d1f50fb3998166c4ef (patch) | |
tree | 4efde4e4276bb0f24c314909403a1f6ed94c60d7 /library/cpp | |
parent | cf344b64297e6a79d1e538be9f8f59afb06a2a97 (diff) | |
parent | b821606f7bd364dc755d37b5bcb3559130675364 (diff) | |
download | ydb-c7decaf9230ddcb1ec2c42d1f50fb3998166c4ef.tar.gz |
Merge branch 'rightlib' into merge-libs-241224-1545
Diffstat (limited to 'library/cpp')
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 16 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 2 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 1 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 1 | ||||
-rw-r--r-- | library/cpp/tld/tlds-alpha-by-domain.txt | 2 | ||||
-rw-r--r-- | library/cpp/yt/misc/typeid-inl.h | 49 | ||||
-rw-r--r-- | library/cpp/yt/misc/typeid.h | 27 | ||||
-rw-r--r-- | library/cpp/yt/misc/unittests/typeid_sample.cpp (renamed from library/cpp/yt/stockpile/stockpile_other.cpp) | 8 | ||||
-rw-r--r-- | library/cpp/yt/misc/unittests/typeid_sample.h | 17 | ||||
-rw-r--r-- | library/cpp/yt/misc/unittests/typeid_ut.cpp | 25 | ||||
-rw-r--r-- | library/cpp/yt/misc/unittests/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/yt/stockpile/README.md | 12 | ||||
-rw-r--r-- | library/cpp/yt/stockpile/stockpile.h | 52 | ||||
-rw-r--r-- | library/cpp/yt/stockpile/stockpile_linux.cpp | 158 | ||||
-rw-r--r-- | library/cpp/yt/stockpile/ya.make | 18 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/convert.cpp | 416 | ||||
-rw-r--r-- | library/cpp/yt/yson_string/convert.h | 101 |
17 files changed, 644 insertions, 263 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index 5b6069d4a3..0a4bfcb9ec 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -6,8 +6,6 @@ #include <contrib/libs/hyperscan/runtime_corei7/hs_runtime.h> #include <contrib/libs/hyperscan/runtime_avx2/hs_common.h> #include <contrib/libs/hyperscan/runtime_avx2/hs_runtime.h> -#include <contrib/libs/hyperscan/runtime_avx512/hs_common.h> -#include <contrib/libs/hyperscan/runtime_avx512/hs_runtime.h> #include <util/generic/singleton.h> #include <util/system/sanitizers.h> @@ -19,10 +17,9 @@ namespace NHyperscan { namespace NPrivate { ERuntime DetectCurrentRuntime() { - // TODO: Remove MSanIsOn check upon DEVTOOLSSUPPORT-49258 resolution - if (NX86::HaveAVX512F() && NX86::HaveAVX512BW() && !NSan::MSanIsOn()) { - return ERuntime::AVX512; - } else if (NX86::HaveAVX() && NX86::HaveAVX2()) { + // NOTE: We explicitly disable AVX512 runtime, there are bugs with + // trivial string matching. See SPI-122953 & SPI-117618. + if (NX86::HaveAVX() && NX86::HaveAVX2()) { return ERuntime::AVX2; } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) { return ERuntime::Corei7; @@ -41,8 +38,6 @@ namespace NHyperscan { return 0; case ERuntime::AVX2: return CPU_FEATURES_AVX2; - case ERuntime::AVX512: - return CPU_FEATURES_AVX512; } } @@ -78,11 +73,6 @@ namespace NHyperscan { SerializeDatabase = avx2_hs_serialize_database; DeserializeDatabase = avx2_hs_deserialize_database; break; - case ERuntime::AVX512: - AllocScratch = avx512_hs_alloc_scratch; - Scan = avx512_hs_scan; - SerializeDatabase = avx512_hs_serialize_database; - DeserializeDatabase = avx512_hs_deserialize_database; } } diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index 1c8f404389..eae82fa384 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -11,7 +11,6 @@ namespace NHyperscan { using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; - constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; template<typename TNativeDeleter, TNativeDeleter NativeDeleter> class TDeleter { @@ -35,7 +34,6 @@ namespace NHyperscan { Core2 = 0, Corei7 = 1, AVX2 = 2, - AVX512 = 3 }; ERuntime DetectCurrentRuntime(); diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 9caa53f2e7..75cd0bcc89 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -210,7 +210,6 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { ERuntime::Core2, ERuntime::Corei7, ERuntime::AVX2, - ERuntime::AVX512 }; // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make index 72abbef9a2..1a052ae214 100644 --- a/library/cpp/regex/hyperscan/ya.make +++ b/library/cpp/regex/hyperscan/ya.make @@ -5,7 +5,6 @@ PEERDIR( contrib/libs/hyperscan/runtime_core2 contrib/libs/hyperscan/runtime_corei7 contrib/libs/hyperscan/runtime_avx2 - contrib/libs/hyperscan/runtime_avx512 ) SRCS( diff --git a/library/cpp/tld/tlds-alpha-by-domain.txt b/library/cpp/tld/tlds-alpha-by-domain.txt index 133ab40cd8..b37de20a95 100644 --- a/library/cpp/tld/tlds-alpha-by-domain.txt +++ b/library/cpp/tld/tlds-alpha-by-domain.txt @@ -1,4 +1,4 @@ -# Version 2024121600, Last Updated Mon Dec 16 07:07:02 2024 UTC +# Version 2024122200, Last Updated Sun Dec 22 07:07:01 2024 UTC AAA AARP ABB diff --git a/library/cpp/yt/misc/typeid-inl.h b/library/cpp/yt/misc/typeid-inl.h new file mode 100644 index 0000000000..a4518cfa46 --- /dev/null +++ b/library/cpp/yt/misc/typeid-inl.h @@ -0,0 +1,49 @@ +#ifndef TYPEID_INL_H_ +#error "Direct inclusion of this file is not allowed, include typeid.h" +// For the sake of sane code completion. +#include "typeid.h" +#endif + +#include "port.h" + +#include <util/system/compiler.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +namespace NDetail { + +template <class T> +class TTypeidTag +{ }; + +} // namespace NDetail + +template <class T> +const std::type_info& Typeid() +{ + if constexpr (requires { TypeidImpl(NDetail::TTypeidTag<T>()); }) { + return TypeidImpl(NDetail::TTypeidTag<T>()); + } else { + return typeid(T); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +#undef YT_DECLARE_TYPEID +#undef YT_DEFINE_TYPEID + +#define YT_DECLARE_TYPEID(type) \ + [[maybe_unused]] YT_ATTRIBUTE_USED const std::type_info& TypeidImpl(::NYT::NDetail::TTypeidTag<type>); + +#define YT_DEFINE_TYPEID(type) \ + [[maybe_unused]] YT_ATTRIBUTE_USED Y_FORCE_INLINE const std::type_info& TypeidImpl(::NYT::NDetail::TTypeidTag<type>) \ + { \ + return typeid(type); \ + } + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/misc/typeid.h b/library/cpp/yt/misc/typeid.h new file mode 100644 index 0000000000..d4584e64f2 --- /dev/null +++ b/library/cpp/yt/misc/typeid.h @@ -0,0 +1,27 @@ +#pragma once + +#include <typeinfo> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// +// Enables accessing type_info for incomplete types. + +//! Place this macro in header file after forward-declaring a type. +#define YT_DECLARE_TYPEID(type) + +//! Place this macro in header or source file after fully defining a type. +#define YT_DEFINE_TYPEID(type) + +//! Equivalent to |typeid(T)| but also works for incomplete types +//! annotated with YT_DECLARE_TYPEID. +template <class T> +const std::type_info& Typeid(); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define TYPEID_INL_H_ +#include "typeid-inl.h" +#undef TYPEID_INL_H_ diff --git a/library/cpp/yt/stockpile/stockpile_other.cpp b/library/cpp/yt/misc/unittests/typeid_sample.cpp index 481b111b56..9a3184e458 100644 --- a/library/cpp/yt/stockpile/stockpile_other.cpp +++ b/library/cpp/yt/misc/unittests/typeid_sample.cpp @@ -1,11 +1,13 @@ -#include "stockpile.h" +#include "typeid_sample.h" namespace NYT { //////////////////////////////////////////////////////////////////////////////// -void TStockpileManager::Reconfigure(TStockpileOptions /*options*/) -{ } +struct TTypeidIncomplete +{ }; + +YT_DEFINE_TYPEID(TTypeidIncomplete); //////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/yt/misc/unittests/typeid_sample.h b/library/cpp/yt/misc/unittests/typeid_sample.h new file mode 100644 index 0000000000..d6b5a288b5 --- /dev/null +++ b/library/cpp/yt/misc/unittests/typeid_sample.h @@ -0,0 +1,17 @@ +#pragma once + +#include <library/cpp/yt/misc/typeid.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +struct TTypeidIncomplete; +YT_DECLARE_TYPEID(TTypeidIncomplete); + +struct TTypeidComplete +{ }; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/misc/unittests/typeid_ut.cpp b/library/cpp/yt/misc/unittests/typeid_ut.cpp new file mode 100644 index 0000000000..3400c832e2 --- /dev/null +++ b/library/cpp/yt/misc/unittests/typeid_ut.cpp @@ -0,0 +1,25 @@ +#include "typeid_sample.h" + +#include <library/cpp/yt/misc/typeid.h> + +#include <library/cpp/testing/gtest/gtest.h> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TTypeidTest, Complete) +{ + EXPECT_NE(std::string(Typeid<TTypeidComplete>().name()).find("TTypeidComplete"), std::string::npos); +} + +TEST(TTypeidTest, Incomplete) +{ + EXPECT_NE(std::string(Typeid<TTypeidIncomplete>().name()).find("TTypeidIncomplete"), std::string::npos); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/misc/unittests/ya.make b/library/cpp/yt/misc/unittests/ya.make index c914ca3061..ba7525f66a 100644 --- a/library/cpp/yt/misc/unittests/ya.make +++ b/library/cpp/yt/misc/unittests/ya.make @@ -12,6 +12,8 @@ SRCS( strong_typedef_ut.cpp tag_invoke_cpo_ut.cpp tag_invoke_impl_ut.cpp + typeid_sample.cpp + typeid_ut.cpp ) PEERDIR( diff --git a/library/cpp/yt/stockpile/README.md b/library/cpp/yt/stockpile/README.md deleted file mode 100644 index 6ee4cd1b1f..0000000000 --- a/library/cpp/yt/stockpile/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# stockpile - -При приближении к лимиту памяти в memory cgroup, linux запускает механизм direct reclaim, -чтобы освободить свободную память. По опыту YT, direct reclaim очень сильно замедляет работу -всего процесса. - -Проблема возникает не только, когда память занята анонимными страницами. 50% памяти контейнера -может быть занято не dirty страницами page cache, но проблема всёравно будет проявляться. Например, -если процесс активно читает файлы с диска без O_DIRECT, вся память очень быстро будет забита. - -Чтобы бороться с этой проблемой, в яндексовом ядре добавлена ручка `madvise(MADV_STOCKPILE)`. -Больше подробностей в https://st.yandex-team.ru/KERNEL-186
\ No newline at end of file diff --git a/library/cpp/yt/stockpile/stockpile.h b/library/cpp/yt/stockpile/stockpile.h deleted file mode 100644 index fae1b3a569..0000000000 --- a/library/cpp/yt/stockpile/stockpile.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include <library/cpp/yt/cpu_clock/clock.h> - -#include <library/cpp/yt/misc/enum.h> - -#include <util/system/types.h> - -#include <util/generic/size_literals.h> - -#include <util/datetime/base.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -DEFINE_ENUM(EStockpileStrategy, - ((FixedBreaks) (0)) - ((FlooredLoad) (1)) - ((ProgressiveBackoff) (2)) -); - -//////////////////////////////////////////////////////////////////////////////// - -struct TStockpileOptions -{ - static constexpr i64 DefaultBufferSize = 4_GBs; - i64 BufferSize = DefaultBufferSize; - - static constexpr int DefaultThreadCount = 4; - int ThreadCount = DefaultThreadCount; - - static constexpr EStockpileStrategy DefaultStrategy = EStockpileStrategy::FixedBreaks; - EStockpileStrategy Strategy = DefaultStrategy; - - static constexpr TDuration DefaultPeriod = TDuration::MilliSeconds(10); - TDuration Period = DefaultPeriod; -}; - -//////////////////////////////////////////////////////////////////////////////// - -class TStockpileManager -{ -public: - //! Configures the background stockpile threads. - //! Safe to call multiple times. - static void Reconfigure(TStockpileOptions options); -}; - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/library/cpp/yt/stockpile/stockpile_linux.cpp b/library/cpp/yt/stockpile/stockpile_linux.cpp deleted file mode 100644 index 8ae847dec7..0000000000 --- a/library/cpp/yt/stockpile/stockpile_linux.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include "stockpile.h" - -#include <library/cpp/yt/threading/spin_lock.h> - -#include <library/cpp/yt/misc/global.h> - -#include <library/cpp/yt/memory/leaky_singleton.h> - -#include <library/cpp/yt/logging/logger.h> - -#include <thread> - -#include <sys/mman.h> - -#include <util/system/thread.h> - -#include <string.h> - -namespace NYT { - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -YT_DEFINE_GLOBAL(const NLogging::TLogger, Logger, "Stockpile"); -constexpr int MADV_STOCKPILE = 0x59410004; - -} // namespace - -class TStockpileManagerImpl -{ -public: - static TStockpileManagerImpl* Get() - { - return LeakySingleton<TStockpileManagerImpl>(); - } - - void Reconfigure(TStockpileOptions options) - { - auto guard = Guard(SpinLock_); - - Run_.store(false); - for (const auto& thread : Threads_) { - thread->join(); - } - - Threads_.clear(); - Run_.store(true); - - Options_ = options; - - for (int threadIndex = 0; threadIndex < Options_.ThreadCount; ++threadIndex) { - Threads_.push_back(std::make_unique<std::thread>(&TStockpileManagerImpl::ThreadMain, this)); - } - } - -private: - DECLARE_LEAKY_SINGLETON_FRIEND(); - - const i64 PageSize_ = sysconf(_SC_PAGESIZE); - - YT_DECLARE_SPIN_LOCK(NThreading::TSpinLock, SpinLock_); - std::vector<std::unique_ptr<std::thread>> Threads_; - TStockpileOptions Options_; - std::atomic<bool> Run_ = false; - - void ThreadMain() - { - TThread::SetCurrentThreadName("Stockpile"); - - auto bufferSize = Options_.BufferSize; - auto period = Options_.Period; - - while (Run_.load()) { - switch (Options_.Strategy) { - case EStockpileStrategy::FixedBreaks: - RunWithFixedBreaks(Options_.BufferSize, Options_.Period); - break; - - case EStockpileStrategy::FlooredLoad: - RunWithCappedLoad(Options_.BufferSize, Options_.Period); - break; - - case EStockpileStrategy::ProgressiveBackoff: - std::tie(bufferSize, period) = RunWithBackoffs(bufferSize, period); - break; - - default: - YT_ABORT(); - } - } - } - - void RunWithFixedBreaks(i64 bufferSize, TDuration period) - { - auto returnCode = -::madvise(nullptr, bufferSize, MADV_STOCKPILE); - YT_LOG_DEBUG_IF(returnCode != 0, "System call \"madvise\" failed: %v", strerror(returnCode)); - - Sleep(period); - } - - void RunWithCappedLoad(i64 bufferSize, TDuration period) - { - auto started = GetApproximateCpuInstant(); - - auto returnCode = -::madvise(nullptr, bufferSize, MADV_STOCKPILE); - YT_LOG_DEBUG_IF(returnCode != 0, "System call \"madvise\" failed: %v", strerror(returnCode)); - - auto duration = CpuDurationToDuration(GetApproximateCpuInstant() - started); - if (duration < period) { - Sleep(period - duration); - } - } - - std::pair<i64, TDuration> RunWithBackoffs( - i64 adjustedBufferSize, - TDuration adjustedPeriod) - { - int result = ::madvise(nullptr, adjustedBufferSize, MADV_STOCKPILE); - if (result == 0) { - Sleep(Options_.Period); - return {Options_.BufferSize, Options_.Period}; - } - - YT_LOG_DEBUG("System call \"madvise\" failed: %v", strerror(errno)); - switch (errno) { - case ENOMEM: - if (adjustedBufferSize / 2 >= PageSize_) { - // Immediately make an attempt to reclaim half as much. - adjustedBufferSize = adjustedBufferSize / 2; - } else { - // Unless there is not even a single reclaimable page. - Sleep(Options_.Period); - } - return {adjustedBufferSize, Options_.Period}; - - case EAGAIN: - case EINTR: - Sleep(adjustedPeriod); - return {Options_.BufferSize, adjustedPeriod + Options_.Period}; - - default: - Sleep(Options_.Period); - return {Options_.BufferSize, Options_.Period}; - } - } -}; - -//////////////////////////////////////////////////////////////////////////////// - -void TStockpileManager::Reconfigure(TStockpileOptions options) -{ - TStockpileManagerImpl::Get()->Reconfigure(std::move(options)); -} - -//////////////////////////////////////////////////////////////////////////////// - -} // namespace NYT diff --git a/library/cpp/yt/stockpile/ya.make b/library/cpp/yt/stockpile/ya.make deleted file mode 100644 index 36ce673ba6..0000000000 --- a/library/cpp/yt/stockpile/ya.make +++ /dev/null @@ -1,18 +0,0 @@ -LIBRARY() - -INCLUDE(${ARCADIA_ROOT}/library/cpp/yt/ya_cpp.make.inc) - -IF (OS_LINUX AND NOT SANITIZER_TYPE) - SRCS(stockpile_linux.cpp) -ELSE() - SRCS(stockpile_other.cpp) -ENDIF() - -PEERDIR( - library/cpp/yt/misc - library/cpp/yt/threading - library/cpp/yt/logging - library/cpp/yt/memory -) - -END() diff --git a/library/cpp/yt/yson_string/convert.cpp b/library/cpp/yt/yson_string/convert.cpp index 68241adb78..1beb7cc4c7 100644 --- a/library/cpp/yt/yson_string/convert.cpp +++ b/library/cpp/yt/yson_string/convert.cpp @@ -11,12 +11,43 @@ #include <array> +#include <util/string/escape.h> + #include <util/stream/mem.h> namespace NYT::NYson { //////////////////////////////////////////////////////////////////////////////// +namespace NDetail { + +size_t FloatToStringWithNanInf(double value, char* buf, size_t size) +{ + if (std::isfinite(value)) { + return FloatToString(value, buf, size); + } + + static const TStringBuf nanLiteral = "%nan"; + static const TStringBuf infLiteral = "%inf"; + static const TStringBuf negativeInfLiteral = "%-inf"; + + TStringBuf str; + if (std::isnan(value)) { + str = nanLiteral; + } else if (std::isinf(value) && value > 0) { + str = infLiteral; + } else { + str = negativeInfLiteral; + } + YT_VERIFY(str.size() + 1 <= size); + ::memcpy(buf, str.data(), str.size() + 1); + return str.size(); +} + +} // namespace NDetail + +//////////////////////////////////////////////////////////////////////////////// + template <> TYsonString ConvertToYsonString<i8>(const i8& value) { @@ -385,4 +416,389 @@ TGuid ConvertFromYsonString<TGuid>(const TYsonStringBuf& str) //////////////////////////////////////////////////////////////////////////////// +template <> +TYsonString ConvertToTextYsonString<i8>(const i8& value) +{ + return ConvertToTextYsonString(static_cast<i64>(value)); +} + +template <> +TYsonString ConvertToTextYsonString<i32>(const i32& value) +{ + return ConvertToTextYsonString(static_cast<i64>(value)); +} + +template <> +TYsonString ConvertToTextYsonString<i64>(const i64& value) +{ + return TYsonString{::ToString(value)}; +} + +template <> +TYsonString ConvertToTextYsonString<ui8>(const ui8& value) +{ + return ConvertToTextYsonString(static_cast<ui64>(value)); +} + +template <> +TYsonString ConvertToTextYsonString<ui32>(const ui32& value) +{ + return ConvertToTextYsonString(static_cast<ui64>(value)); +} + +template <> +TYsonString ConvertToTextYsonString<ui64>(const ui64& value) +{ + return TYsonString{::ToString(value) + 'u'}; +} + +template <> +TYsonString ConvertToTextYsonString<TString>(const TString& value) +{ + return ConvertToTextYsonString(TStringBuf(value)); +} + +template <> +TYsonString ConvertToTextYsonString<std::string>(const std::string& value) +{ + return ConvertToTextYsonString(TStringBuf(value)); +} + +template <> +TYsonString ConvertToTextYsonString<TStringBuf>(const TStringBuf& value) +{ + return TYsonString(NYT::Format("\"%v\"", ::EscapeC(value))); +} + +template <> +TYsonString ConvertToTextYsonString<std::string_view>(const std::string_view& value) +{ + return ConvertToTextYsonString(TStringBuf(value)); +} + +TYsonString ConvertToTextYsonString(const char* value) +{ + return ConvertToTextYsonString(TStringBuf(value)); +} + +template <> +TYsonString ConvertToTextYsonString<float>(const float& value) +{ + return ConvertToTextYsonString(static_cast<double>(value)); +} + +template <> +TYsonString ConvertToTextYsonString<double>(const double& value) +{ + char buf[256]; + auto str = TStringBuf(buf, NDetail::FloatToStringWithNanInf(value, buf, sizeof(buf))); + auto ret = NYT::Format( + "%v%v", + str, + MakeFormatterWrapper([&] (TStringBuilderBase* builder) { + if (str.find('.') == TString::npos && str.find('e') == TString::npos && std::isfinite(value)) { + builder->AppendChar('.'); + } + })); + return TYsonString(std::move(ret)); +} + +template <> +TYsonString ConvertToTextYsonString<bool>(const bool& value) +{ + return value + ? TYsonString(TStringBuf("%true")) + : TYsonString(TStringBuf("%false")); +} + +template <> +TYsonString ConvertToTextYsonString<TInstant>(const TInstant& value) +{ + return ConvertToTextYsonString(value.ToString()); +} + +template <> +TYsonString ConvertToTextYsonString<TDuration>(const TDuration& value) +{ + // ConvertTo does unchecked cast to i64 :(. + return ConvertToTextYsonString(static_cast<i64>(value.MilliSeconds())); +} + +template <> +TYsonString ConvertToTextYsonString<TGuid>(const TGuid& value) +{ + return ConvertToTextYsonString(NYT::ToString(value)); +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +template <class TSomeInt> +TSomeInt ReadTextUint(TStringBuf strBuf) +{ + // Drop 'u' + return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length() - 1}); +} + +template <class TSomeInt> +TSomeInt ReadTextInt(TStringBuf strBuf) +{ + return ::FromString<TSomeInt>(TStringBuf{strBuf.data(), strBuf.length()}); +} + +bool IsNumeric(TStringBuf strBuf) +{ + bool isNumeric = true; + bool isNegative = false; + for (int i = 0; i < std::ssize(strBuf); ++i) { + char c = strBuf[i]; + + if (!('0' <= c && c <= '9')) { + if (i == 0 && c == '-') { + isNegative = true; + continue; + } + if (i == std::ssize(strBuf) - 1 && c == 'u' && !isNegative) { + continue; + } + isNumeric = false; + break; + } + } + + return isNumeric; +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TSomeInt> +TSomeInt ParseSomeIntFromTextYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + + if (std::ssize(strBuf) == 0 || !IsNumeric(strBuf)) { + throw TYsonLiteralParseException(NYT::Format( + "Unexpected %v\n" + "Value is not numeric", + strBuf)); + } + + if (strBuf.back() == 'u') { + // Drop 'u' + return ReadTextUint<TSomeInt>(strBuf); + } else { + return ReadTextInt<TSomeInt>(strBuf); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +TString DoParseStringFromTextYson(TStringBuf strBuf) +{ + // Remove quotation marks. + return ::UnescapeC(TStringBuf{strBuf.data() + 1, strBuf.length() - 2}); +} + +TString ParseStringFromTextYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + if (std::ssize(strBuf) < 2 || strBuf.front() != '\"' || strBuf.back() != '\"') { + throw TYsonLiteralParseException(Format( + "Unexpected %v\n" + "Text yson string must begin and end with \\\"", + strBuf)); + } + return DoParseStringFromTextYson(strBuf); +} + +//////////////////////////////////////////////////////////////////////////////// + +double ParseDoubleFromTextYsonString(const TYsonStringBuf& str) +{ + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + + if (std::ssize(strBuf) < 2) { + throw TYsonLiteralParseException(Format( + "Incorrect remaining string length: expected at least 2, got %v", + std::ssize(strBuf))); + } + + // Check special values first. + // %nan + // %inf, %+inf, %-inf + if (strBuf[0] == '%') { + switch (strBuf[1]) { + case '+': + case 'i': + return std::numeric_limits<double>::infinity(); + + case '-': + return -std::numeric_limits<double>::infinity(); + + case 'n': + return std::numeric_limits<double>::quiet_NaN(); + + default: + throw TYsonLiteralParseException(Format( + "Incorrect %%-literal %v", + strBuf)); + } + } + + return ::FromString<double>(strBuf); +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +#define PARSE_INT(type, underlyingType) \ + template <> \ + type ConvertFromTextYsonString<type>(const TYsonStringBuf& str) \ + { \ + try { \ + return CheckedIntegralCast<type>(ParseSomeIntFromTextYsonString<underlyingType>(str)); \ + } catch (const std::exception& ex) { \ + throw TYsonLiteralParseException(ex, "Error parsing \"" #type "\" value from YSON"); \ + } \ + } + +PARSE_INT(i8, i64) +PARSE_INT(i16, i64) +PARSE_INT(i32, i64) +PARSE_INT(i64, i64) +PARSE_INT(ui8, ui64) +PARSE_INT(ui16, ui64) +PARSE_INT(ui32, ui64) +PARSE_INT(ui64, ui64) + +#undef PARSE + +template <> +TString ConvertFromTextYsonString<TString>(const TYsonStringBuf& str) +{ + try { + return ParseStringFromTextYsonString(str); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"string\" value from YSON"); + } +} + +template <> +std::string ConvertFromTextYsonString<std::string>(const TYsonStringBuf& str) +{ + return std::string{ConvertFromTextYsonString<TString>(str)}; +} + +template <> +float ConvertFromTextYsonString<float>(const TYsonStringBuf& str) +{ + try { + return static_cast<float>(ParseDoubleFromTextYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"float\" value from YSON"); + } +} + +template <> +double ConvertFromTextYsonString<double>(const TYsonStringBuf& str) +{ + try { + return ParseDoubleFromTextYsonString(str); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"double\" value from YSON"); + } +} + +template <> +bool ConvertFromTextYsonString<bool>(const TYsonStringBuf& str) +{ + try { + YT_ASSERT(str.GetType() == EYsonType::Node); + auto strBuf = str.AsStringBuf(); + + if (std::ssize(strBuf) == 0) { + throw TYsonLiteralParseException("Empty string"); + } + + char ch = strBuf.front(); + + if (ch == '%') { + if (strBuf != "%true" && strBuf != "%false") { + throw TYsonLiteralParseException(Format( + "Expected %%true or %%false but found %v", + strBuf)); + } + return strBuf == "%true"; + } + + if (ch == '\"') { + return ParseBool(DoParseStringFromTextYson(strBuf)); + } + + // NB(arkady-e1ppa): This check is linear in size(strBuf) + // And thus is tried as the last resort. + if (IsNumeric(strBuf)) { + auto checkValue = [&] (const auto& functor) { + auto value = functor(strBuf); + if (value != 0 && value != 1) { + throw TYsonLiteralParseException(Format( + "Expected 0 or 1 but found %v", + value)); + } + return static_cast<bool>(value); + }; + + if (strBuf.back() == 'u') { + return checkValue(&ReadTextUint<ui64>); + } else { + return checkValue(&ReadTextInt<i64>); + } + } + + throw TYsonLiteralParseException(Format( + "Unexpected %v\n" + "No known conversion to \"boolean\" value", + strBuf)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"boolean\" value from YSON"); + } +} + +template <> +TInstant ConvertFromTextYsonString<TInstant>(const TYsonStringBuf& str) +{ + try { + return TInstant::ParseIso8601(ParseStringFromTextYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"instant\" value from YSON"); + } +} + +template <> +TDuration ConvertFromTextYsonString<TDuration>(const TYsonStringBuf& str) +{ + try { + return TDuration::MilliSeconds(ParseSomeIntFromTextYsonString<i64>(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"duration\" value from YSON"); + } +} + +template <> +TGuid ConvertFromTextYsonString<TGuid>(const TYsonStringBuf& str) +{ + try { + return TGuid::FromString(ParseStringFromTextYsonString(str)); + } catch (const std::exception& ex) { + throw TYsonLiteralParseException(ex, "Error parsing \"guid\" value from YSON"); + } +} + +//////////////////////////////////////////////////////////////////////////////// + } // namespace NYT::NYson diff --git a/library/cpp/yt/yson_string/convert.h b/library/cpp/yt/yson_string/convert.h index eedb0939e0..a51821fa30 100644 --- a/library/cpp/yt/yson_string/convert.h +++ b/library/cpp/yt/yson_string/convert.h @@ -13,6 +13,15 @@ namespace NYT::NYson { //////////////////////////////////////////////////////////////////////////////// + +namespace NDetail { + +size_t FloatToStringWithNanInf(double value, char* buf, size_t size); + +} // namespace NDetail + +//////////////////////////////////////////////////////////////////////////////// + // Generic forward declarations. template <class T> @@ -24,6 +33,13 @@ TYsonString ConvertToYsonString(const T& value, EYsonFormat format); template <class T> T ConvertFromYsonString(const TYsonStringBuf& str); +// TODO(arkady-e1ppa): Move those to library/cpp/yt/error +// and swap to std::string(_view) to drop dep on library/cpp/yson_string. +template <class T> +TYsonString ConvertToTextYsonString(const T& value) = delete; +template <class T> +T ConvertFromTextYsonString(const TYsonStringBuf& str) = delete; + //////////////////////////////////////////////////////////////////////////////// // Basic specializations for ConvertToYsonString. @@ -44,8 +60,6 @@ TYsonString ConvertToYsonString<ui64>(const ui64& value); template <> TYsonString ConvertToYsonString<TString>(const TString& value); template <> -TYsonString ConvertToYsonString<std::string>(const std::string& value); -template <> TYsonString ConvertToYsonString<TStringBuf>(const TStringBuf& value); TYsonString ConvertToYsonString(const char* value); @@ -93,6 +107,8 @@ ui64 ConvertFromYsonString<ui64>(const TYsonStringBuf& str); template <> TString ConvertFromYsonString<TString>(const TYsonStringBuf& str); +template <> +std::string ConvertFromYsonString<std::string>(const TYsonStringBuf& str); template <> float ConvertFromYsonString<float>(const TYsonStringBuf& str); @@ -113,4 +129,85 @@ TGuid ConvertFromYsonString<TGuid>(const TYsonStringBuf& str); //////////////////////////////////////////////////////////////////////////////// +template <> +TYsonString ConvertToTextYsonString<i8>(const i8& value); +template <> +TYsonString ConvertToTextYsonString<i32>(const i32& value); +template <> +TYsonString ConvertToTextYsonString<i64>(const i64& value); + +template <> +TYsonString ConvertToTextYsonString<ui8>(const ui8& value); +template <> +TYsonString ConvertToTextYsonString<ui32>(const ui32& value); +template <> +TYsonString ConvertToTextYsonString<ui64>(const ui64& value); + +template <> +TYsonString ConvertToTextYsonString<TString>(const TString& value); +template <> +TYsonString ConvertToTextYsonString<std::string>(const std::string& value); +template <> +TYsonString ConvertToTextYsonString<TStringBuf>(const TStringBuf& value); +template <> +TYsonString ConvertToTextYsonString<std::string_view>(const std::string_view& value); +TYsonString ConvertToTextYsonString(const char* value); + +template <> +TYsonString ConvertToTextYsonString<float>(const float& value); +template <> +TYsonString ConvertToTextYsonString<double>(const double& value); + +template <> +TYsonString ConvertToTextYsonString<bool>(const bool& value); + +template <> +TYsonString ConvertToTextYsonString<TInstant>(const TInstant& value); + +template <> +TYsonString ConvertToTextYsonString<TDuration>(const TDuration& value); + +template <> +TYsonString ConvertToTextYsonString<TGuid>(const TGuid& value); + +//////////////////////////////////////////////////////////////////////////////// + +template <> +i8 ConvertFromTextYsonString<i8>(const TYsonStringBuf& str); +template <> +i32 ConvertFromTextYsonString<i32>(const TYsonStringBuf& str); +template <> +i64 ConvertFromTextYsonString<i64>(const TYsonStringBuf& str); + +template <> +ui8 ConvertFromTextYsonString<ui8>(const TYsonStringBuf& str); +template <> +ui32 ConvertFromTextYsonString<ui32>(const TYsonStringBuf& str); +template <> +ui64 ConvertFromTextYsonString<ui64>(const TYsonStringBuf& str); + +template <> +TString ConvertFromTextYsonString<TString>(const TYsonStringBuf& str); +template <> +std::string ConvertFromTextYsonString<std::string>(const TYsonStringBuf& str); + +template <> +float ConvertFromTextYsonString<float>(const TYsonStringBuf& str); +template <> +double ConvertFromTextYsonString<double>(const TYsonStringBuf& str); + +template <> +bool ConvertFromTextYsonString<bool>(const TYsonStringBuf& str); + +template <> +TInstant ConvertFromTextYsonString<TInstant>(const TYsonStringBuf& str); + +template <> +TDuration ConvertFromTextYsonString<TDuration>(const TYsonStringBuf& str); + +template <> +TGuid ConvertFromTextYsonString<TGuid>(const TYsonStringBuf& str); + +//////////////////////////////////////////////////////////////////////////////// + } // namespace NYT::NYson |