aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authorionagamed <ionagamed@yandex-team.com>2023-12-06 11:15:42 +0300
committerionagamed <ionagamed@yandex-team.com>2023-12-06 11:59:15 +0300
commitbbdd76a9666c19dd2eb27a842909358e8b541a67 (patch)
tree4dd28e53f070d9f33c58df8631357e0b7125ce54 /library/cpp
parent19dfab524551ce06a74e89eda1a7dd2c95663e9c (diff)
downloadydb-bbdd76a9666c19dd2eb27a842909358e8b541a67.tar.gz
library/yson: Add NodeFromJsonStringIterative
AI для
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/yson/json/yson2json_adapter.cpp12
-rw-r--r--library/cpp/yson/json/yson2json_adapter.h6
-rw-r--r--library/cpp/yson/node/node_io.cpp16
-rw-r--r--library/cpp/yson/node/node_io.h6
-rw-r--r--library/cpp/yson/node/node_io_ut.cpp53
-rw-r--r--library/cpp/yson/node/ut/ya.make1
6 files changed, 92 insertions, 2 deletions
diff --git a/library/cpp/yson/json/yson2json_adapter.cpp b/library/cpp/yson/json/yson2json_adapter.cpp
index b5e7c49d4d..1dbe770e5b 100644
--- a/library/cpp/yson/json/yson2json_adapter.cpp
+++ b/library/cpp/yson/json/yson2json_adapter.cpp
@@ -1,9 +1,13 @@
#include "yson2json_adapter.h"
namespace NYT {
- TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException)
+ TYson2JsonCallbacksAdapter::TYson2JsonCallbacksAdapter(
+ ::NYson::TYsonConsumerBase* impl,
+ bool throwException,
+ ui64 maxDepth)
: NJson::TJsonCallbacks(throwException)
, Impl_(impl)
+ , MaxDepth_(maxDepth)
{
}
@@ -46,6 +50,9 @@ namespace NYT {
bool TYson2JsonCallbacksAdapter::OnOpenArray() {
WrapIfListItem();
State_.ContextStack.push(true);
+ if (State_.ContextStack.size() > MaxDepth_) {
+ return false;
+ }
Impl_->OnBeginList();
return true;
}
@@ -59,6 +66,9 @@ namespace NYT {
bool TYson2JsonCallbacksAdapter::OnOpenMap() {
WrapIfListItem();
State_.ContextStack.push(false);
+ if (State_.ContextStack.size() > MaxDepth_) {
+ return false;
+ }
Impl_->OnBeginMap();
return true;
}
diff --git a/library/cpp/yson/json/yson2json_adapter.h b/library/cpp/yson/json/yson2json_adapter.h
index da1bf5ba70..b8270a6e62 100644
--- a/library/cpp/yson/json/yson2json_adapter.h
+++ b/library/cpp/yson/json/yson2json_adapter.h
@@ -21,7 +21,10 @@ namespace NYT {
};
public:
- TYson2JsonCallbacksAdapter(::NYson::TYsonConsumerBase* impl, bool throwException = false);
+ TYson2JsonCallbacksAdapter(
+ ::NYson::TYsonConsumerBase* impl,
+ bool throwException = false,
+ ui64 maxDepth = std::numeric_limits<ui64>::max());
bool OnNull() override;
bool OnBoolean(bool val) override;
@@ -49,5 +52,6 @@ namespace NYT {
private:
::NYson::TYsonConsumerBase* Impl_;
TState State_;
+ ui64 MaxDepth_;
};
}
diff --git a/library/cpp/yson/node/node_io.cpp b/library/cpp/yson/node/node_io.cpp
index 26b95ef996..2e191d8d48 100644
--- a/library/cpp/yson/node/node_io.cpp
+++ b/library/cpp/yson/node/node_io.cpp
@@ -151,6 +151,22 @@ TNode NodeFromJsonString(const TStringBuf input)
return result;
}
+TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth)
+{
+ TMemoryInput stream(input);
+
+ TNode result;
+
+ TNodeBuilder builder(&result);
+ TYson2JsonCallbacksAdapter callbacks(&builder, /*throwException*/ true, maxDepth);
+ NJson::TJsonReaderConfig config;
+ config.DontValidateUtf8 = true;
+ config.UseIterativeParser = true;
+ config.MaxDepth = maxDepth;
+ NJson::ReadJson(&stream, &config, &callbacks);
+ return result;
+}
+
TNode NodeFromJsonValue(const NJson::TJsonValue& input)
{
TNode result;
diff --git a/library/cpp/yson/node/node_io.h b/library/cpp/yson/node/node_io.h
index 02067045b8..1348d88bbb 100644
--- a/library/cpp/yson/node/node_io.h
+++ b/library/cpp/yson/node/node_io.h
@@ -33,6 +33,12 @@ void NodeToCanonicalYsonStream(const TNode& node, IOutputStream* output, ::NYson
TNode NodeFromJsonString(const TStringBuf input);
bool TryNodeFromJsonString(const TStringBuf input, TNode& dst);
+// Parse TNode from string in JSON format using an iterative JSON parser.
+// Iterative JSON parsers still use the stack, but allocate it on the heap (instead of using the system call stack).
+// Needed to mitigate stack overflow with short stacks on deeply nested JSON strings
+// (e.g. 256kb of stack when parsing "[[[[[[...]]]]]]" crashes the whole binary).
+TNode NodeFromJsonStringIterative(const TStringBuf input, ui64 maxDepth = 1024);
+
// Convert TJsonValue to TNode
TNode NodeFromJsonValue(const ::NJson::TJsonValue& input);
diff --git a/library/cpp/yson/node/node_io_ut.cpp b/library/cpp/yson/node/node_io_ut.cpp
new file mode 100644
index 0000000000..90720cd3a1
--- /dev/null
+++ b/library/cpp/yson/node/node_io_ut.cpp
@@ -0,0 +1,53 @@
+#include "node_io.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/stream/mem.h>
+
+using namespace NYson;
+
+namespace {
+ void GenerateDeepJson(TStringStream& stream, ui64 depth) {
+ stream << "{\"key\":";
+ for (ui32 i = 0; i < depth - 1; ++i) {
+ stream << "[";
+ }
+ for (ui32 i = 0; i < depth - 1; ++i) {
+ stream << "]";
+ }
+ stream << "}";
+ }
+}
+
+Y_UNIT_TEST_SUITE(TestNodeFromJsonStringIterativeTest) {
+ Y_UNIT_TEST(NoCrashOn1e5Brackets) {
+ constexpr ui32 brackets = static_cast<ui32>(1e5);
+
+ TStringStream jsonStream;
+ GenerateDeepJson(jsonStream, brackets);
+
+ UNIT_ASSERT_EXCEPTION(
+ NYT::NodeFromJsonStringIterative(jsonStream.Str()),
+ std::exception);
+ }
+
+ Y_UNIT_TEST(NoCrashOn1025Brackets) {
+ constexpr ui32 brackets = 1025;
+
+ TStringStream jsonStream;
+ GenerateDeepJson(jsonStream, brackets);
+
+ UNIT_ASSERT_EXCEPTION(
+ NYT::NodeFromJsonStringIterative(jsonStream.Str()),
+ std::exception);
+ }
+
+ Y_UNIT_TEST(NoErrorOn1024Brackets) {
+ constexpr ui32 brackets = 1024;
+
+ TStringStream jsonStream;
+ GenerateDeepJson(jsonStream, brackets);
+
+ UNIT_ASSERT_NO_EXCEPTION(NYT::NodeFromJsonStringIterative(jsonStream.Str()));
+ }
+}
diff --git a/library/cpp/yson/node/ut/ya.make b/library/cpp/yson/node/ut/ya.make
index a3f79f7403..269c167c14 100644
--- a/library/cpp/yson/node/ut/ya.make
+++ b/library/cpp/yson/node/ut/ya.make
@@ -2,6 +2,7 @@ UNITTEST_FOR(library/cpp/yson/node)
SRCS(
node_ut.cpp
+ node_io_ut.cpp
)
END()