diff options
author | ionagamed <ionagamed@yandex-team.com> | 2023-12-06 11:15:42 +0300 |
---|---|---|
committer | ionagamed <ionagamed@yandex-team.com> | 2023-12-06 11:59:15 +0300 |
commit | bbdd76a9666c19dd2eb27a842909358e8b541a67 (patch) | |
tree | 4dd28e53f070d9f33c58df8631357e0b7125ce54 /library | |
parent | 19dfab524551ce06a74e89eda1a7dd2c95663e9c (diff) | |
download | ydb-bbdd76a9666c19dd2eb27a842909358e8b541a67.tar.gz |
library/yson: Add NodeFromJsonStringIterative
AI для
Diffstat (limited to 'library')
-rw-r--r-- | library/cpp/yson/json/yson2json_adapter.cpp | 12 | ||||
-rw-r--r-- | library/cpp/yson/json/yson2json_adapter.h | 6 | ||||
-rw-r--r-- | library/cpp/yson/node/node_io.cpp | 16 | ||||
-rw-r--r-- | library/cpp/yson/node/node_io.h | 6 | ||||
-rw-r--r-- | library/cpp/yson/node/node_io_ut.cpp | 53 | ||||
-rw-r--r-- | library/cpp/yson/node/ut/ya.make | 1 |
6 files changed, 92 insertions, 2 deletions
diff --git a/library/cpp/yson/json/yson2json_adapter.cpp b/library/cpp/yson/json/yson2json_adapter.cpp index b5e7c49d4d..1dbe770e5b 100644 --- a/library/cpp/yson/json/yson2json_adapter.cpp +++ b/library/cpp/yson/json/yson2json_adapter.cpp @@ -1,9 +1,13 @@ #include "yson2json_adapter.h" namespace NYT { - TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException) + TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter( + ::NYson::TYsonConsumerBase* impl, + bool throwException, + ui64 maxDepth) : NJson::TJsonCallbacks(throwException) , Impl_(impl) + , MaxDepth_(maxDepth) { } @@ -46,6 +50,9 @@ namespace NYT { bool TYson2JsonCallbacksAdapter::OnOpenArray() { WrapIfListItem(); State_.ContextStack.push(true); + if (State_.ContextStack.size() > MaxDepth_) { + return false; + } Impl_->OnBeginList(); return true; } @@ -59,6 +66,9 @@ namespace NYT { bool TYson2JsonCallbacksAdapter::OnOpenMap() { WrapIfListItem(); State_.ContextStack.push(false); + if (State_.ContextStack.size() > MaxDepth_) { + return false; + } Impl_->OnBeginMap(); return true; } diff --git a/library/cpp/yson/json/yson2json_adapter.h b/library/cpp/yson/json/yson2json_adapter.h index da1bf5ba70..b8270a6e62 100644 --- a/library/cpp/yson/json/yson2json_adapter.h +++ b/library/cpp/yson/json/yson2json_adapter.h @@ -21,7 +21,10 @@ namespace NYT { }; public: - TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException = false); + TYson2JsonCallbacksAdapter( + ::NYson::TYsonConsumerBase* impl, + bool throwException = false, + ui64 maxDepth = std::numeric_limits<ui64>::max()); bool OnNull() override; bool OnBoolean(bool val) override; @@ -49,5 +52,6 @@ namespace NYT { private: ::NYson::TYsonConsumerBase* Impl_; TState State_; + ui64 MaxDepth_; }; } diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp index 26b95ef996..2e191d8d48 100644 --- a/library/cpp/yson/node/node_io.cpp +++ b/library/cpp/yson/node/node_io.cpp @@ -151,6 +151,22 @@ TNode NodeFromJsonString(const TStringBuf input) return result; } +TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth) +{ + TMemoryInput stream(input); + + TNode result; + + TNodeBuilder builder(&result); + TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true, maxDepth); + NJson::TJsonReaderConfig config; + config.DontValidateUtf8 = true; + config.UseIterativeParser = true; + config.MaxDepth = maxDepth; + NJson::ReadJson(&stream, &config, &callbacks); + return result; +} + TNode NodeFromJsonValue(const NJson::TJsonValue& input) { TNode result; diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h index 02067045b8..1348d88bbb 100644 --- a/library/cpp/yson/node/node_io.h +++ b/library/cpp/yson/node/node_io.h @@ -33,6 +33,12 @@ void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, ::NYson TNode NodeFromJsonString(const TStringBuf input); bool TryNodeFromJsonString(const TStringBuf input, TNode& dst); +// Parse TNode from string in JSON format using an iterative JSON parser. +// Iterative JSON parsers still use the stack, but allocate it on the heap (instead of using the system call stack). +// Needed to mitigate stack overflow with short stacks on deeply nested JSON strings +// (e.g. 256kb of stack when parsing "[[[[[[...]]]]]]" crashes the whole binary). +TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth = 1024); + // Convert TJsonValue to TNode TNode NodeFromJsonValue(const ::NJson::TJsonValue& input); diff --git a/library/cpp/yson/node/node_io_ut.cpp b/library/cpp/yson/node/node_io_ut.cpp new file mode 100644 index 0000000000..90720cd3a1 --- /dev/null +++ b/library/cpp/yson/node/node_io_ut.cpp @@ -0,0 +1,53 @@ +#include "node_io.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/mem.h> + +using namespace NYson; + +namespace { + void GenerateDeepJson(TStringStream& stream, ui64 depth) { + stream << "{\"key\":"; + for (ui32 i = 0; i < depth - 1; ++i) { + stream << "["; + } + for (ui32 i = 0; i < depth - 1; ++i) { + stream << "]"; + } + stream << "}"; + } +} + +Y_UNIT_TEST_SUITE(TestNodeFromJsonStringIterativeTest) { + Y_UNIT_TEST(NoCrashOn1e5Brackets) { + constexpr ui32 brackets = static_cast<ui32>(1e5); + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + UNIT_ASSERT_EXCEPTION( + NYT::NodeFromJsonStringIterative(jsonStream.Str()), + std::exception); + } + + Y_UNIT_TEST(NoCrashOn1025Brackets) { + constexpr ui32 brackets = 1025; + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + UNIT_ASSERT_EXCEPTION( + NYT::NodeFromJsonStringIterative(jsonStream.Str()), + std::exception); + } + + Y_UNIT_TEST(NoErrorOn1024Brackets) { + constexpr ui32 brackets = 1024; + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + UNIT_ASSERT_NO_EXCEPTION(NYT::NodeFromJsonStringIterative(jsonStream.Str())); + } +} diff --git a/library/cpp/yson/node/ut/ya.make b/library/cpp/yson/node/ut/ya.make index a3f79f7403..269c167c14 100644 --- a/library/cpp/yson/node/ut/ya.make +++ b/library/cpp/yson/node/ut/ya.make @@ -2,6 +2,7 @@ UNITTEST_FOR(library/cpp/yson/node) SRCS( node_ut.cpp + node_io_ut.cpp ) END() |