aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authorionagamed <ionagamed@yandex-team.com>2024-04-15 10:18:45 +0300
committerionagamed <ionagamed@yandex-team.com>2024-04-15 10:28:06 +0300
commitc015541a60f8d93070c53511daaff81db730d194 (patch)
tree248d7d962c718e75801036109fdef147bc1880ed /library/cpp
parent7930380b354abe9969174901a4e8a730ab1d0906 (diff)
downloadydb-c015541a60f8d93070c53511daaff81db730d194.tar.gz
YT: Add NodeFromYsonStreamNonGreedy; use it in TNode::Load
В рамках 3547980204d51d6eba4c3b56989a916379526673
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/yson/node/benchmark/saveload.cpp57
-rw-r--r--library/cpp/yson/node/benchmark/ya.make1
-rw-r--r--library/cpp/yson/node/node.cpp2
-rw-r--r--library/cpp/yson/node/node_io.cpp33
-rw-r--r--library/cpp/yson/node/node_io.h7
-rw-r--r--library/cpp/yson/node/node_ut.cpp31
-rw-r--r--library/cpp/yson/parser.cpp14
-rw-r--r--library/cpp/yson/parser.h2
-rw-r--r--library/cpp/yson/parser_detail.h33
9 files changed, 160 insertions, 20 deletions
diff --git a/library/cpp/yson/node/benchmark/saveload.cpp b/library/cpp/yson/node/benchmark/saveload.cpp
new file mode 100644
index 0000000000..838075f2e4
--- /dev/null
+++ b/library/cpp/yson/node/benchmark/saveload.cpp
@@ -0,0 +1,57 @@
+#include <benchmark/benchmark.h>
+
+#include <library/cpp/yson/node/node_io.h>
+
+using namespace NYT;
+
+namespace {
+
+static NYT::TNode GenerateList(size_t size)
+{
+ NYT::TNode result = NYT::TNode::CreateList();
+
+ for (size_t i = 0; i < size; ++i) {
+ result.AsList().emplace_back(NYT::TNode("val"));
+ }
+
+ return result;
+}
+
+} // namespace
+
+static void BM_SaveLoadGreedy(benchmark::State& state, size_t size)
+{
+ auto list = GenerateList(size);
+
+ TString bytes;
+ TStringOutput outputStream{bytes};
+ NodeToYsonStream(list, &outputStream, ::NYson::EYsonFormat::Binary);
+
+ for (const auto& _ : state) {
+ TStringInput inputStream{bytes};
+ NodeFromYsonStream(&inputStream);
+ }
+}
+
+static void BM_SaveLoadNonGreedy(benchmark::State& state, size_t size)
+{
+ auto list = GenerateList(size);
+
+ TString bytes;
+ TStringOutput outputStream{bytes};
+ NodeToYsonStream(list, &outputStream, ::NYson::EYsonFormat::Binary);
+
+ for (const auto& _ : state) {
+ TStringInput inputStream{bytes};
+ NodeFromYsonStreamNonGreedy(&inputStream);
+ }
+}
+
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_10, 10ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_10, 10ul);
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_100, 100ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_100, 100ul);
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_1000, 1000ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_1000, 1000ul);
+BENCHMARK_CAPTURE(BM_SaveLoadGreedy, greedy_10000, 10000ul);
+BENCHMARK_CAPTURE(BM_SaveLoadNonGreedy, non_greedy_10000, 10000ul);
diff --git a/library/cpp/yson/node/benchmark/ya.make b/library/cpp/yson/node/benchmark/ya.make
index dd2035b1fa..53a6e5f48c 100644
--- a/library/cpp/yson/node/benchmark/ya.make
+++ b/library/cpp/yson/node/benchmark/ya.make
@@ -2,6 +2,7 @@ G_BENCHMARK()
SRCS(
reserve.cpp
+ saveload.cpp
)
PEERDIR(
diff --git a/library/cpp/yson/node/node.cpp b/library/cpp/yson/node/node.cpp
index 5156033cfe..f142ae0f42 100644
--- a/library/cpp/yson/node/node.cpp
+++ b/library/cpp/yson/node/node.cpp
@@ -861,7 +861,7 @@ void TNode::Save(IOutputStream* out) const
void TNode::Load(IInputStream* in)
{
Clear();
- *this = NodeFromYsonStream(in, ::NYson::EYsonType::Node);
+ *this = NodeFromYsonStreamNonGreedy(in, ::NYson::EYsonType::Node);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp
index 2e191d8d48..d8a05ec995 100644
--- a/library/cpp/yson/node/node_io.cpp
+++ b/library/cpp/yson/node/node_io.cpp
@@ -11,6 +11,7 @@
#include <library/cpp/json/json_reader.h>
#include <library/cpp/json/json_value.h>
+#include <util/generic/size_literals.h>
#include <util/stream/input.h>
#include <util/stream/output.h>
#include <util/stream/str.h>
@@ -82,6 +83,28 @@ static TNode CreateEmptyNodeByType(::NYson::EYsonType type)
return result;
}
+static TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type, bool consumeUntilEof)
+{
+ TNode result = CreateEmptyNodeByType(type);
+
+ ui64 bufferSizeLimit = 64_KB;
+ if (!consumeUntilEof) {
+ // Other values might be in the stream, so reading one symbol at a time.
+ bufferSizeLimit = 1;
+ }
+
+ TNodeBuilder builder(&result);
+ ::NYson::TYsonParser parser(
+ &builder,
+ input,
+ type,
+ /*enableLinePositionInfo*/ false,
+ bufferSizeLimit,
+ consumeUntilEof);
+ parser.Parse();
+ return result;
+}
+
TNode NodeFromYsonString(const TStringBuf input, ::NYson::EYsonType type)
{
TMemoryInput stream(input);
@@ -104,12 +127,12 @@ TString NodeToCanonicalYsonString(const TNode& node, NYson::EYsonFormat format)
TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type)
{
- TNode result = CreateEmptyNodeByType(type);
+ return NodeFromYsonStream(input, type, /*consumeUntilEof*/ true);
+}
- TNodeBuilder builder(&result);
- ::NYson::TYsonParser parser(&builder, input, type);
- parser.Parse();
- return result;
+TNode NodeFromYsonStreamNonGreedy(IInputStream* input, ::NYson::EYsonType type)
+{
+ return NodeFromYsonStream(input, type, /*consumeUntilEof*/ false);
}
void NodeToYsonStream(const TNode& node, IOutputStream* output, NYson::EYsonFormat format)
diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h
index 1348d88bbb..2db1318db4 100644
--- a/library/cpp/yson/node/node_io.h
+++ b/library/cpp/yson/node/node_io.h
@@ -23,6 +23,13 @@ TString NodeToCanonicalYsonString(const TNode& node, ::NYson::EYsonFormat format
// Parse TNode from stream in YSON format
TNode NodeFromYsonStream(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node);
+// Parse TNode from stream in YSON format.
+// NB: This is substantially slower (1.5-2x using the benchmark from `./benchmark/saveload.cpp`) than using
+// the original `NodeFromYsonStream`.
+// Stops reading from `input` as soon as some valid YSON was read, leaving the remainder of the stream unread.
+// Used in TNode::Load to support cases of saveloading multiple values after the TNode from/to the same stream.
+TNode NodeFromYsonStreamNonGreedy(IInputStream* input, ::NYson::EYsonType type = ::NYson::EYsonType::Node);
+
// Serialize TNode to stream in one of YSON formats with random order of maps' keys (don't use in tests)
void NodeToYsonStream(const TNode& node, IOutputStream* output, ::NYson::EYsonFormat format = ::NYson::EYsonFormat::Text);
diff --git a/library/cpp/yson/node/node_ut.cpp b/library/cpp/yson/node/node_ut.cpp
index 728a926283..80d231cd09 100644
--- a/library/cpp/yson/node/node_ut.cpp
+++ b/library/cpp/yson/node/node_ut.cpp
@@ -279,6 +279,37 @@ Y_UNIT_TEST_SUITE(YtNodeTest) {
UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy);
}
+ Y_UNIT_TEST(TestSaveLoadWithNeighbours) {
+ TString stringBefore = "before";
+
+ TNode node = TNode()("foo", "bar")("baz", 42);
+ node.Attributes()["attr_name"] = "attr_value";
+
+ TString stringAfter = "after";
+
+ TString bytes;
+ {
+ TStringOutput s(bytes);
+ ::Save(&s, stringBefore);
+ ::Save(&s, node);
+ ::Save(&s, stringAfter);
+ }
+
+ TString deserializedStringBefore;
+ TString deserializedStringAfter;
+ TNode nodeCopy;
+ {
+ TStringInput s(bytes);
+ ::Load(&s, deserializedStringBefore);
+ ::Load(&s, nodeCopy);
+ ::Load(&s, deserializedStringAfter);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(stringBefore, deserializedStringBefore);
+ UNIT_ASSERT_VALUES_EQUAL(node, nodeCopy);
+ UNIT_ASSERT_VALUES_EQUAL(stringAfter, deserializedStringAfter);
+ }
+
Y_UNIT_TEST(TestIntCast) {
TNode node = 1ull << 31;
UNIT_ASSERT(node.IsUint64());
diff --git a/library/cpp/yson/parser.cpp b/library/cpp/yson/parser.cpp
index 783f9b9047..934a56ee21 100644
--- a/library/cpp/yson/parser.cpp
+++ b/library/cpp/yson/parser.cpp
@@ -16,22 +16,27 @@ namespace NYson {
IInputStream* stream,
EYsonType type,
bool enableLinePositionInfo,
+ ui64 bufferSizeLimit,
+ bool consumeUntilEof,
TMaybe<ui64> memoryLimit = Nothing())
: Consumer_(consumer)
, Stream_(stream)
, Type_(type)
, EnableLinePositionInfo_(enableLinePositionInfo)
+ , BufferSizeLimit_(bufferSizeLimit)
+ , ConsumeUntilEof_(consumeUntilEof)
, MemoryLimit_(memoryLimit)
{
}
void Parse() {
- TBuffer buffer(64 << 10);
+ TBuffer buffer(BufferSizeLimit_);
ParseYsonStreamImpl<NYT::NYson::IYsonConsumer, TStreamReader>(
TStreamReader(Stream_, buffer.Data(), buffer.Capacity()),
Consumer_,
Type_,
EnableLinePositionInfo_,
+ ConsumeUntilEof_,
MemoryLimit_);
}
@@ -40,6 +45,8 @@ namespace NYson {
IInputStream* Stream_;
EYsonType Type_;
bool EnableLinePositionInfo_;
+ ui64 BufferSizeLimit_;
+ bool ConsumeUntilEof_;
TMaybe<ui64> MemoryLimit_;
};
@@ -50,8 +57,10 @@ namespace NYson {
IInputStream* stream,
EYsonType type,
bool enableLinePositionInfo,
+ ui64 bufferSizeLimit,
+ bool consumeUntilEof,
TMaybe<ui64> memoryLimit)
- : Impl(new TImpl(consumer, stream, type, enableLinePositionInfo, memoryLimit))
+ : Impl(new TImpl(consumer, stream, type, enableLinePositionInfo, bufferSizeLimit, consumeUntilEof, memoryLimit))
{
}
@@ -115,6 +124,7 @@ namespace NYson {
consumer,
type,
enableLinePositionInfo,
+ true,
memoryLimit);
}
diff --git a/library/cpp/yson/parser.h b/library/cpp/yson/parser.h
index dce35a8cd4..b6d8b110a1 100644
--- a/library/cpp/yson/parser.h
+++ b/library/cpp/yson/parser.h
@@ -21,6 +21,8 @@ namespace NYson {
IInputStream* stream,
EYsonType type = ::NYson::EYsonType::Node,
bool enableLinePositionInfo = false,
+ ui64 bufferSizeLimit = 64 << 10,
+ bool consumeUntilEof = true,
TMaybe<ui64> memoryLimit = Nothing());
~TYsonParser();
diff --git a/library/cpp/yson/parser_detail.h b/library/cpp/yson/parser_detail.h
index 9d8315dd60..fb72affed5 100644
--- a/library/cpp/yson/parser_detail.h
+++ b/library/cpp/yson/parser_detail.h
@@ -12,11 +12,17 @@ namespace NYson {
private:
using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>;
TConsumer* Consumer;
+ bool ConsumeUntilEof_;
public:
- TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
- : TBase(blockStream, memoryLimit)
- , Consumer(consumer)
+ TParser(
+ const TBlockStream& blockStream,
+ TConsumer* consumer,
+ bool consumeUntilEof,
+ TMaybe<ui64> memoryLimit)
+ : TBase(blockStream, memoryLimit)
+ , Consumer(consumer)
+ , ConsumeUntilEof_(consumeUntilEof)
{
}
@@ -38,11 +44,13 @@ namespace NYson {
Y_ABORT("unreachable");
}
- while (!(TBase::IsFinished() && TBase::IsEmpty())) {
- if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) {
- ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found";
- } else if (!TBase::IsEmpty()) {
- TBase::Advance(1);
+ if (ConsumeUntilEof_) {
+ while (!(TBase::IsFinished() && TBase::IsEmpty())) {
+ if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) {
+ ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found";
+ } else if (!TBase::IsEmpty()) {
+ TBase::Advance(1);
+ }
}
}
}
@@ -308,14 +316,15 @@ namespace NYson {
NYT::NYson::IYsonConsumer* consumer,
EYsonType parsingMode,
bool enableLinePositionInfo,
+ bool consumeUntilEof,
TMaybe<ui64> memoryLimit) {
if (enableLinePositionInfo) {
using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>;
- TImpl impl(blockStream, consumer, memoryLimit);
+ TImpl impl(blockStream, consumer, consumeUntilEof, memoryLimit);
impl.DoParse(parsingMode);
} else {
using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>;
- TImpl impl(blockStream, consumer, memoryLimit);
+ TImpl impl(blockStream, consumer, consumeUntilEof, memoryLimit);
impl.DoParse(parsingMode);
}
}
@@ -337,7 +346,7 @@ namespace NYson {
public:
TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit)
- : Parser(TStringReader(), consumer, memoryLimit)
+ : Parser(TStringReader(), consumer, true, memoryLimit)
{
}
@@ -365,7 +374,7 @@ namespace NYson {
public:
TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit)
- : Parser(blockStream, consumer, memoryLimit)
+ : Parser(blockStream, consumer, true, memoryLimit)
{
}