aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yson/node
diff options
context:
space:
mode:
authorionagamed <ionagamed@yandex-team.com>2024-04-15 10:18:45 +0300
committerionagamed <ionagamed@yandex-team.com>2024-04-15 10:28:06 +0300
commitc015541a60f8d93070c53511daaff81db730d194 (patch)
tree248d7d962c718e75801036109fdef147bc1880ed /library/cpp/yson/node
parent7930380b354abe9969174901a4e8a730ab1d0906 (diff)
downloadydb-c015541a60f8d93070c53511daaff81db730d194.tar.gz
YT: Add NodeFromYsonStreamNonGreedy; use it in TNode::Load
В рамках 3547980204d51d6eba4c3b56989a916379526673
Diffstat (limited to 'library/cpp/yson/node')
-rw-r--r--library/cpp/yson/node/benchmark/saveload.cpp57
-rw-r--r--library/cpp/yson/node/benchmark/ya.make1
-rw-r--r--library/cpp/yson/node/node.cpp2
-rw-r--r--library/cpp/yson/node/node_io.cpp33
-rw-r--r--library/cpp/yson/node/node_io.h7
-rw-r--r--library/cpp/yson/node/node_ut.cpp31
6 files changed, 125 insertions, 6 deletions
diff --git a/library/cpp/yson/node/benchmark/saveload.cpp b/library/cpp/yson/node/benchmark/saveload.cpp
new file mode 100644
index 0000000000..838075f2e4
--- /dev/null
+++ b/library/cpp/yson/node/benchmark/saveload.cpp
@@ -0,0 +1,57 @@
+#include <benchmark/benchmark.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+using namespace NYT;
+
+namespace {
+
+static NYT::TNode GenerateList(size_t size)
+{
+ NYT::TNode result = NYT::TNode::CreateList();
+
+ for (size_t i = 0; i < size; ++i) {
+ result.AsList().emplace_back(NYT::TNode("val"));
+ }
+
+ return result;
+}
+
+} // namespace
+
+static void BM_SaveLoadGreedy(benchmark::State& state, size_t size)
+{
+ auto list = GenerateList(size);
+
+ TString bytes;
+ TStringOutput outputStream{bytes};
+ NodeToYsonStream(list, &outputStream, ::NYson::EYsonFormat::Binary);
+
+ for (const auto& _ : state) {
+ TStringInput inputStream{bytes};
+ NodeFromYsonStream(&inputStream);
+ }
+}
+
+static void BM_SaveLoadNonGreedy(benchmark::State& state, size_t size)
+{
+ auto list = GenerateList(size);
+
+ TString bytes;
+ TStringOutput outputStream{bytes};
+ NodeToYsonStream(list, &outputStream, ::NYson::EYsonFormat::Binary);
+
+ for (const auto& _ : state) {
+ TStringInput inputStream{bytes};
+ NodeFromYsonStreamNonGreedy(&inputStream);
+ }
+}
+
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_10, 10ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_10, 10ul);
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_100, 100ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_100, 100ul);
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_1000, 1000ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_1000, 1000ul);
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_10000, 10000ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_10000, 10000ul);
diff --git a/library/cpp/yson/node/benchmark/ya.make b/library/cpp/yson/node/benchmark/ya.make
index dd2035b1fa..53a6e5f48c 100644
--- a/library/cpp/yson/node/benchmark/ya.make
+++ b/library/cpp/yson/node/benchmark/ya.make
@@ -2,6 +2,7 @@ G_BENCHMARK()
SRCS(
reserve.cpp
+ saveload.cpp
)
PEERDIR(
diff --git a/library/cpp/yson/node/node.cpp b/library/cpp/yson/node/node.cpp
index 5156033cfe..f142ae0f42 100644
--- a/library/cpp/yson/node/node.cpp
+++ b/library/cpp/yson/node/node.cpp
@@ -861,7 +861,7 @@ void TNode::Save(IOutputStream* out) const
void TNode::Load(IInputStream* in)
{
Clear();
- *this = NodeFromYsonStream(in, ::NYson::EYsonType::Node);
+ *this = NodeFromYsonStreamNonGreedy(in, ::NYson::EYsonType::Node);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp
index 2e191d8d48..d8a05ec995 100644
--- a/library/cpp/yson/node/node_io.cpp
+++ b/library/cpp/yson/node/node_io.cpp
@@ -11,6 +11,7 @@
#include <library/cpp/json/json_reader.h>
#include <library/cpp/json/json_value.h>
+#include <util/generic/size_literals.h>
#include <util/stream/input.h>
#include <util/stream/output.h>
#include <util/stream/str.h>
@@ -82,6 +83,28 @@ static TNode CreateEmptyNodeByType(::NYson::EYsonType type)
return result;
}
+static TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type, bool consumeUntilEof)
+{
+ TNode result = CreateEmptyNodeByType(type);
+
+ ui64 bufferSizeLimit = 64_KB;
+ if (!consumeUntilEof) {
+ // Other values might be in the stream, so reading one symbol at a time.
+ bufferSizeLimit = 1;
+ }
+
+ TNodeBuilder builder(&result);
+ ::NYson::TYsonParser parser(
+ &builder,
+ input,
+ type,
+ /*enableLinePositionInfo*/ false,
+ bufferSizeLimit,
+ consumeUntilEof);
+ parser.Parse();
+ return result;
+}
+
TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type)
{
TMemoryInput stream(input);
@@ -104,12 +127,12 @@ TString NodeToCanonicalYsonString(const TNode& node, NYson::EYsonFormat format)
TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type)
{
- TNode result = CreateEmptyNodeByType(type);
+ return NodeFromYsonStream(input, type, /*consumeUntilEof*/ true);
+}
- TNodeBuilder builder(&result);
- ::NYson::TYsonParser parser(&builder, input, type);
- parser.Parse();
- return result;
+TNode NodeFromYsonStreamNonGreedy(IInputStream* input, ::NYson::EYsonType type)
+{
+ return NodeFromYsonStream(input, type, /*consumeUntilEof*/ false);
}
void NodeToYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format)
diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h
index 1348d88bbb..2db1318db4 100644
--- a/library/cpp/yson/node/node_io.h
+++ b/library/cpp/yson/node/node_io.h
@@ -23,6 +23,13 @@ TString NodeToCanonicalYsonString(const TNode& node, ::NYson::EYsonFormat format
// Parse TNode from stream in YSON format
TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node);
+// Parse TNode from stream in YSON format.
+// NB: This is substantially slower (1.5-2x using the benchmark from `./benchmark/saveload.cpp`) than using
+// the original `NodeFromYsonStream`.
+// Stops reading from `input` as soon as some valid YSON was read, leaving the remainder of the stream unread.
+// Used in TNode::Load to support cases of saveloading multiple values after the TNode from/to the same stream.
+TNode NodeFromYsonStreamNonGreedy(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node);
+
// Serialize TNode to stream in one of YSON formats with random order of maps' keys (don't use in tests)
void NodeToYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text);
diff --git a/library/cpp/yson/node/node_ut.cpp b/library/cpp/yson/node/node_ut.cpp
index 728a926283..80d231cd09 100644
--- a/library/cpp/yson/node/node_ut.cpp
+++ b/library/cpp/yson/node/node_ut.cpp
@@ -279,6 +279,37 @@ Y_UNIT_TEST_SUITE(YtNodeTest) {
UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy);
}
+ Y_UNIT_TEST(TestSaveLoadWithNeighbours) {
+ TString stringBefore = "before";
+
+ TNode node = TNode()("foo", "bar")("baz", 42);
+ node.Attributes()["attr_name"] = "attr_value";
+
+ TString stringAfter = "after";
+
+ TString bytes;
+ {
+ TStringOutput s(bytes);
+ ::Save(&s, stringBefore);
+ ::Save(&s, node);
+ ::Save(&s, stringAfter);
+ }
+
+ TString deserializedStringBefore;
+ TString deserializedStringAfter;
+ TNode nodeCopy;
+ {
+ TStringInput s(bytes);
+ ::Load(&s, deserializedStringBefore);
+ ::Load(&s, nodeCopy);
+ ::Load(&s, deserializedStringAfter);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stringBefore, deserializedStringBefore);
+ UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy);
+ UNIT_ASSERT_VALUES_EQUAL(stringAfter, deserializedStringAfter);
+ }
+
Y_UNIT_TEST(TestIntCast) {
TNode node = 1ull << 31;
UNIT_ASSERT(node.IsUint64());