diff options
author | ionagamed <ionagamed@yandex-team.com> | 2024-04-15 10:18:45 +0300 |
---|---|---|
committer | ionagamed <ionagamed@yandex-team.com> | 2024-04-15 10:28:06 +0300 |
commit | c015541a60f8d93070c53511daaff81db730d194 (patch) | |
tree | 248d7d962c718e75801036109fdef147bc1880ed /library/cpp/yson/node | |
parent | 7930380b354abe9969174901a4e8a730ab1d0906 (diff) | |
download | ydb-c015541a60f8d93070c53511daaff81db730d194.tar.gz |
YT: Add NodeFromYsonStreamNonGreedy; use it in TNode::Load
В рамках
3547980204d51d6eba4c3b56989a916379526673
Diffstat (limited to 'library/cpp/yson/node')
-rw-r--r-- | library/cpp/yson/node/benchmark/saveload.cpp | 57 | ||||
-rw-r--r-- | library/cpp/yson/node/benchmark/ya.make | 1 | ||||
-rw-r--r-- | library/cpp/yson/node/node.cpp | 2 | ||||
-rw-r--r-- | library/cpp/yson/node/node_io.cpp | 33 | ||||
-rw-r--r-- | library/cpp/yson/node/node_io.h | 7 | ||||
-rw-r--r-- | library/cpp/yson/node/node_ut.cpp | 31 |
6 files changed, 125 insertions, 6 deletions
diff --git a/library/cpp/yson/node/benchmark/saveload.cpp b/library/cpp/yson/node/benchmark/saveload.cpp new file mode 100644 index 0000000000..838075f2e4 --- /dev/null +++ b/library/cpp/yson/node/benchmark/saveload.cpp @@ -0,0 +1,57 @@ +#include <benchmark/benchmark.h> + +#include <library/cpp/yson/node/node_io.h> + +using namespace NYT; + +namespace { + +static NYT::TNode GenerateList(size_t size) +{ + NYT::TNode result = NYT::TNode::CreateList(); + + for (size_t i = 0; i < size; ++i) { + result.AsList().emplace_back(NYT::TNode("val")); + } + + return result; +} + +} // namespace + +static void BM_SaveLoadGreedy(benchmark::State& state, size_t size) +{ + auto list = GenerateList(size); + + TString bytes; + TStringOutput outputStream{bytes}; + NodeToYsonStream(list, &outputStream, ::NYson::EYsonFormat::Binary); + + for (const auto& _ : state) { + TStringInput inputStream{bytes}; + NodeFromYsonStream(&inputStream); + } +} + +static void BM_SaveLoadNonGreedy(benchmark::State& state, size_t size) +{ + auto list = GenerateList(size); + + TString bytes; + TStringOutput outputStream{bytes}; + NodeToYsonStream(list, &outputStream, ::NYson::EYsonFormat::Binary); + + for (const auto& _ : state) { + TStringInput inputStream{bytes}; + NodeFromYsonStreamNonGreedy(&inputStream); + } +} + +BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_10, 10ul); +BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_10, 10ul); +BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_100, 100ul); +BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_100, 100ul); +BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_1000, 1000ul); +BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_1000, 1000ul); +BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_10000, 10000ul); +BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_10000, 10000ul); diff --git a/library/cpp/yson/node/benchmark/ya.make b/library/cpp/yson/node/benchmark/ya.make index dd2035b1fa..53a6e5f48c 100644 --- a/library/cpp/yson/node/benchmark/ya.make +++ b/library/cpp/yson/node/benchmark/ya.make @@ -2,6 +2,7 @@ G_BENCHMARK() SRCS( reserve.cpp + saveload.cpp ) PEERDIR( diff --git a/library/cpp/yson/node/node.cpp b/library/cpp/yson/node/node.cpp index 5156033cfe..f142ae0f42 100644 --- a/library/cpp/yson/node/node.cpp +++ b/library/cpp/yson/node/node.cpp @@ -861,7 +861,7 @@ void TNode::Save(IOutputStream* out) const void TNode::Load(IInputStream* in) { Clear(); - *this = NodeFromYsonStream(in, ::NYson::EYsonType::Node); + *this = NodeFromYsonStreamNonGreedy(in, ::NYson::EYsonType::Node); } //////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp index 2e191d8d48..d8a05ec995 100644 --- a/library/cpp/yson/node/node_io.cpp +++ b/library/cpp/yson/node/node_io.cpp @@ -11,6 +11,7 @@ #include <library/cpp/json/json_reader.h> #include <library/cpp/json/json_value.h> +#include <util/generic/size_literals.h> #include <util/stream/input.h> #include <util/stream/output.h> #include <util/stream/str.h> @@ -82,6 +83,28 @@ static TNode CreateEmptyNodeByType(::NYson::EYsonType type) return result; } +static TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type, bool consumeUntilEof) +{ + TNode result = CreateEmptyNodeByType(type); + + ui64 bufferSizeLimit = 64_KB; + if (!consumeUntilEof) { + // Other values might be in the stream, so reading one symbol at a time. + bufferSizeLimit = 1; + } + + TNodeBuilder builder(&result); + ::NYson::TYsonParser parser( + &builder, + input, + type, + /*enableLinePositionInfo*/ false, + bufferSizeLimit, + consumeUntilEof); + parser.Parse(); + return result; +} + TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type) { TMemoryInput stream(input); @@ -104,12 +127,12 @@ TString NodeToCanonicalYsonString(const TNode& node, NYson::EYsonFormat format) TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type) { - TNode result = CreateEmptyNodeByType(type); + return NodeFromYsonStream(input, type, /*consumeUntilEof*/ true); +} - TNodeBuilder builder(&result); - ::NYson::TYsonParser parser(&builder, input, type); - parser.Parse(); - return result; +TNode NodeFromYsonStreamNonGreedy(IInputStream* input, ::NYson::EYsonType type) +{ + return NodeFromYsonStream(input, type, /*consumeUntilEof*/ false); } void NodeToYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format) diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h index 1348d88bbb..2db1318db4 100644 --- a/library/cpp/yson/node/node_io.h +++ b/library/cpp/yson/node/node_io.h @@ -23,6 +23,13 @@ TString NodeToCanonicalYsonString(const TNode& node, ::NYson::EYsonFormat format // Parse TNode from stream in YSON format TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node); +// Parse TNode from stream in YSON format. +// NB: This is substantially slower (1.5-2x using the benchmark from `./benchmark/saveload.cpp`) than using +// the original `NodeFromYsonStream`. +// Stops reading from `input` as soon as some valid YSON was read, leaving the remainder of the stream unread. +// Used in TNode::Load to support cases of saveloading multiple values after the TNode from/to the same stream. +TNode NodeFromYsonStreamNonGreedy(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node); + // Serialize TNode to stream in one of YSON formats with random order of maps' keys (don't use in tests) void NodeToYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text); diff --git a/library/cpp/yson/node/node_ut.cpp b/library/cpp/yson/node/node_ut.cpp index 728a926283..80d231cd09 100644 --- a/library/cpp/yson/node/node_ut.cpp +++ b/library/cpp/yson/node/node_ut.cpp @@ -279,6 +279,37 @@ Y_UNIT_TEST_SUITE(YtNodeTest) { UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy); } + Y_UNIT_TEST(TestSaveLoadWithNeighbours) { + TString stringBefore = "before"; + + TNode node = TNode()("foo", "bar")("baz", 42); + node.Attributes()["attr_name"] = "attr_value"; + + TString stringAfter = "after"; + + TString bytes; + { + TStringOutput s(bytes); + ::Save(&s, stringBefore); + ::Save(&s, node); + ::Save(&s, stringAfter); + } + + TString deserializedStringBefore; + TString deserializedStringAfter; + TNode nodeCopy; + { + TStringInput s(bytes); + ::Load(&s, deserializedStringBefore); + ::Load(&s, nodeCopy); + ::Load(&s, deserializedStringAfter); + } + + UNIT_ASSERT_VALUES_EQUAL(stringBefore, deserializedStringBefore); + UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy); + UNIT_ASSERT_VALUES_EQUAL(stringAfter, deserializedStringAfter); + } + Y_UNIT_TEST(TestIntCast) { TNode node = 1ull << 31; UNIT_ASSERT(node.IsUint64()); |