From bbdd76a9666c19dd2eb27a842909358e8b541a67 Mon Sep 17 00:00:00 2001 From: ionagamed Date: Wed, 6 Dec 2023 11:15:42 +0300 Subject: library/yson: Add NodeFromJsonStringIterative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI для --- library/cpp/yson/node/node_io.cpp | 16 +++++++++++ library/cpp/yson/node/node_io.h | 6 ++++ library/cpp/yson/node/node_io_ut.cpp | 53 ++++++++++++++++++++++++++++++++++++ library/cpp/yson/node/ut/ya.make | 1 + 4 files changed, 76 insertions(+) create mode 100644 library/cpp/yson/node/node_io_ut.cpp (limited to 'library/cpp/yson/node') diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp index 26b95ef996d..2e191d8d48e 100644 --- a/library/cpp/yson/node/node_io.cpp +++ b/library/cpp/yson/node/node_io.cpp @@ -151,6 +151,22 @@ TNode NodeFromJsonString(const TStringBuf input) return result; } +TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth) +{ + TMemoryInput stream(input); + + TNode result; + + TNodeBuilder builder(&result); + TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true, maxDepth); + NJson::TJsonReaderConfig config; + config.DontValidateUtf8 = true; + config.UseIterativeParser = true; + config.MaxDepth = maxDepth; + NJson::ReadJson(&stream, &config, &callbacks); + return result; +} + TNode NodeFromJsonValue(const NJson::TJsonValue& input) { TNode result; diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h index 02067045b83..1348d88bbb1 100644 --- a/library/cpp/yson/node/node_io.h +++ b/library/cpp/yson/node/node_io.h @@ -33,6 +33,12 @@ void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, ::NYson TNode NodeFromJsonString(const TStringBuf input); bool TryNodeFromJsonString(const TStringBuf input, TNode& dst); +// Parse TNode from string in JSON format using an iterative JSON parser. +// Iterative JSON parsers still use the stack, but allocate it on the heap (instead of using the system call stack). +// Needed to mitigate stack overflow with short stacks on deeply nested JSON strings +// (e.g. 256kb of stack when parsing "[[[[[[...]]]]]]" crashes the whole binary). +TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth = 1024); + // Convert TJsonValue to TNode TNode NodeFromJsonValue(const ::NJson::TJsonValue& input); diff --git a/library/cpp/yson/node/node_io_ut.cpp b/library/cpp/yson/node/node_io_ut.cpp new file mode 100644 index 00000000000..90720cd3a1b --- /dev/null +++ b/library/cpp/yson/node/node_io_ut.cpp @@ -0,0 +1,53 @@ +#include "node_io.h" + +#include + +#include + +using namespace NYson; + +namespace { + void GenerateDeepJson(TStringStream& stream, ui64 depth) { + stream << "{\"key\":"; + for (ui32 i = 0; i < depth - 1; ++i) { + stream << "["; + } + for (ui32 i = 0; i < depth - 1; ++i) { + stream << "]"; + } + stream << "}"; + } +} + +Y_UNIT_TEST_SUITE(TestNodeFromJsonStringIterativeTest) { + Y_UNIT_TEST(NoCrashOn1e5Brackets) { + constexpr ui32 brackets = static_cast(1e5); + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + UNIT_ASSERT_EXCEPTION( + NYT::NodeFromJsonStringIterative(jsonStream.Str()), + std::exception); + } + + Y_UNIT_TEST(NoCrashOn1025Brackets) { + constexpr ui32 brackets = 1025; + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + UNIT_ASSERT_EXCEPTION( + NYT::NodeFromJsonStringIterative(jsonStream.Str()), + std::exception); + } + + Y_UNIT_TEST(NoErrorOn1024Brackets) { + constexpr ui32 brackets = 1024; + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + UNIT_ASSERT_NO_EXCEPTION(NYT::NodeFromJsonStringIterative(jsonStream.Str())); + } +} diff --git a/library/cpp/yson/node/ut/ya.make b/library/cpp/yson/node/ut/ya.make index a3f79f74035..269c167c147 100644 --- a/library/cpp/yson/node/ut/ya.make +++ b/library/cpp/yson/node/ut/ya.make @@ -2,6 +2,7 @@ UNITTEST_FOR(library/cpp/yson/node) SRCS( node_ut.cpp + node_io_ut.cpp ) END() -- cgit v1.3