aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryurial <yurial@yandex-team.com>2023-11-01 11:44:41 +0300
committeryurial <yurial@yandex-team.com>2023-11-01 12:15:19 +0300
commit7f97144a42658fab5a97c83c0c66b0ad957b5b0f (patch)
tree23c3e41ea20c6797441e26bce13ffbb1eb0df11e
parent948552a0ebc797aecf90797407b0333fbccc43e9 (diff)
downloadydb-7f97144a42658fab5a97c83c0c66b0ad957b5b0f.tar.gz
Add DepthLimit to TJsonFormatConfig
-rw-r--r--yt/yt/core/json/config.h1
-rw-r--r--yt/yt/core/json/json_callbacks.cpp31
-rw-r--r--yt/yt/core/json/json_callbacks.h10
-rw-r--r--yt/yt/core/json/json_parser.cpp1
-rw-r--r--yt/yt/core/json/public.h4
-rw-r--r--yt/yt/core/json/unittests/parser_ut.cpp68
-rw-r--r--yt/yt/library/formats/format.cpp3
7 files changed, 101 insertions, 17 deletions
diff --git a/yt/yt/core/json/config.h b/yt/yt/core/json/config.h
index 8d9a951631..71fa8c46e7 100644
--- a/yt/yt/core/json/config.h
+++ b/yt/yt/core/json/config.h
@@ -26,6 +26,7 @@ public:
bool Plain;
bool EncodeUtf8;
i64 MemoryLimit = 256_MB;
+ int NestingLevelLimit = 0;
std::optional<int> StringLengthLimit;
diff --git a/yt/yt/core/json/json_callbacks.cpp b/yt/yt/core/json/json_callbacks.cpp
index 1edea77312..39703c007f 100644
--- a/yt/yt/core/json/json_callbacks.cpp
+++ b/yt/yt/core/json/json_callbacks.cpp
@@ -19,11 +19,13 @@ TJsonCallbacksBuildingNodesImpl::TJsonCallbacksBuildingNodesImpl(
NYson::EYsonType ysonType,
const TUtf8Transcoder& utf8Transcoder,
i64 memoryLimit,
+ int nestingLevelLimit,
NJson::EJsonAttributesMode attributesMode)
: Consumer_(consumer)
, YsonType_(ysonType)
, Utf8Transcoder_(utf8Transcoder)
, MemoryLimit_(memoryLimit)
+ , NestingLevelLimit_(nestingLevelLimit)
, AttributesMode_(attributesMode)
, TreeBuilder_(CreateBuilderFromFactory(GetEphemeralNodeFactory()))
{
@@ -140,14 +142,17 @@ void TJsonCallbacksBuildingNodesImpl::OnItemFinished()
if (YsonType_ == EYsonType::ListFragment) {
Consumer_->OnListItem();
}
- ConsumeNode(TreeBuilder_->EndTree());
+ ConsumeNode(TreeBuilder_->EndTree(), Stack_.size());
TreeBuilder_->BeginTree();
ConsumedMemory_ = 0;
}
}
-void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node)
+void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node, int nestingLevel)
{
+ if (NestingLevelLimit_ > 0 && nestingLevel > NestingLevelLimit_) {
+ THROW_ERROR_EXCEPTION("JSON nesting level limit exceeded") << NYT::TErrorAttribute("NestingLevelLimit", NestingLevelLimit_);
+ }
switch (node->GetType()) {
case ENodeType::Int64:
Consumer_->OnInt64Scalar(node->AsInt64()->GetValue());
@@ -168,10 +173,10 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node)
Consumer_->OnStringScalar(node->AsString()->GetValue());
break;
case ENodeType::Map:
- ConsumeNode(node->AsMap());
+ ConsumeNode(node->AsMap(), nestingLevel + 1);
break;
case ENodeType::List:
- ConsumeNode(node->AsList());
+ ConsumeNode(node->AsList(), nestingLevel + 1);
break;
default:
YT_ABORT();
@@ -179,7 +184,7 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node)
};
}
-void TJsonCallbacksBuildingNodesImpl::ConsumeMapFragment(IMapNodePtr map)
+void TJsonCallbacksBuildingNodesImpl::ConsumeMapFragment(IMapNodePtr map, int nestingLevel)
{
for (const auto& [key, value] : map->GetChildren()) {
auto adjustedKey = TStringBuf(key);
@@ -194,11 +199,11 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeMapFragment(IMapNodePtr map)
adjustedKey = adjustedKey.substr(1);
}
Consumer_->OnKeyedItem(adjustedKey);
- ConsumeNode(value);
+ ConsumeNode(value, nestingLevel);
}
}
-void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map)
+void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map, int nestingLevel)
{
auto node = map->FindChild("$value");
if (node) {
@@ -208,7 +213,7 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map)
THROW_ERROR_EXCEPTION("Value of \"$attributes\" must be a map");
}
Consumer_->OnBeginAttributes();
- ConsumeMapFragment(attributes->AsMap());
+ ConsumeMapFragment(attributes->AsMap(), nestingLevel);
Consumer_->OnEndAttributes();
}
@@ -235,7 +240,7 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map)
}
if (node->GetType() == expectedType) {
- ConsumeNode(node);
+ ConsumeNode(node, nestingLevel);
} else if (node->GetType() == ENodeType::String) {
auto nodeAsString = node->AsString()->GetValue();
switch (expectedType) {
@@ -290,24 +295,24 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map)
<< TErrorAttribute("actual_type", node->GetType());
}
} else {
- ConsumeNode(node);
+ ConsumeNode(node, nestingLevel);
}
} else {
if (map->FindChild("$attributes")) {
THROW_ERROR_EXCEPTION("Found key \"$attributes\" without key \"$value\"");
}
Consumer_->OnBeginMap();
- ConsumeMapFragment(map);
+ ConsumeMapFragment(map, nestingLevel);
Consumer_->OnEndMap();
}
}
-void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IListNodePtr list)
+void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IListNodePtr list, int nestingLevel)
{
Consumer_->OnBeginList();
for (int i = 0; i < list->GetChildCount(); ++i) {
Consumer_->OnListItem();
- ConsumeNode(list->GetChildOrThrow(i));
+ ConsumeNode(list->GetChildOrThrow(i), nestingLevel);
}
Consumer_->OnEndList();
}
diff --git a/yt/yt/core/json/json_callbacks.h b/yt/yt/core/json/json_callbacks.h
index ffae11185c..47556a8232 100644
--- a/yt/yt/core/json/json_callbacks.h
+++ b/yt/yt/core/json/json_callbacks.h
@@ -51,6 +51,7 @@ public:
NYson::EYsonType ysonType,
const TUtf8Transcoder& utf8Transcoder,
i64 memoryLimit,
+ int nestingLevelLimit,
NJson::EJsonAttributesMode attributesMode);
void OnStringScalar(TStringBuf value) override;
@@ -71,16 +72,17 @@ private:
void OnItemStarted();
void OnItemFinished();
- void ConsumeNode(NYTree::INodePtr node);
- void ConsumeNode(NYTree::IMapNodePtr map);
- void ConsumeNode(NYTree::IListNodePtr list);
- void ConsumeMapFragment(NYTree::IMapNodePtr map);
+ void ConsumeNode(NYTree::INodePtr node, int nestingLevel);
+ void ConsumeNode(NYTree::IMapNodePtr map, int nestingLevel);
+ void ConsumeNode(NYTree::IListNodePtr list, int nestingLevel);
+ void ConsumeMapFragment(NYTree::IMapNodePtr map, int nestingLevel);
NYson::IYsonConsumer* Consumer_;
NYson::EYsonType YsonType_;
TUtf8Transcoder Utf8Transcoder_;
i64 ConsumedMemory_ = 0;
const i64 MemoryLimit_;
+ const int NestingLevelLimit_;
const NJson::EJsonAttributesMode AttributesMode_;
TCompactVector<EJsonCallbacksNodeType, 4> Stack_;
diff --git a/yt/yt/core/json/json_parser.cpp b/yt/yt/core/json/json_parser.cpp
index 55aedb9917..989c2099fa 100644
--- a/yt/yt/core/json/json_parser.cpp
+++ b/yt/yt/core/json/json_parser.cpp
@@ -131,6 +131,7 @@ public:
Type_,
TUtf8Transcoder(Config_->EncodeUtf8),
Config_->MemoryLimit,
+ Config_->NestingLevelLimit,
Config_->AttributesMode);
}
YajlHandle_.reset(yajl_alloc(&YajlCallbacks, nullptr, Callbacks_.get()));
diff --git a/yt/yt/core/json/public.h b/yt/yt/core/json/public.h
index 860acdc88c..6e7b4486d1 100644
--- a/yt/yt/core/json/public.h
+++ b/yt/yt/core/json/public.h
@@ -1,6 +1,7 @@
#pragma once
#include <yt/yt/core/misc/public.h>
+#include <yt/yt/core/yson/public.h>
namespace NYT::NJson {
@@ -11,6 +12,9 @@ DECLARE_REFCOUNTED_CLASS(TJsonFormatConfig)
struct IJsonConsumer;
struct IJsonWriter;
+// YSON with attributes is represented in JSON with additional nested objects. It leads to doubled nesting levels.
+constexpr int NestingLevelLimit = NYT::NYson::NewNestingLevelLimit * 2;
+
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NJson
diff --git a/yt/yt/core/json/unittests/parser_ut.cpp b/yt/yt/core/json/unittests/parser_ut.cpp
index b9e24ec128..c1d6ed0787 100644
--- a/yt/yt/core/json/unittests/parser_ut.cpp
+++ b/yt/yt/core/json/unittests/parser_ut.cpp
@@ -802,5 +802,73 @@ TEST(TJsonParserTest, MemoryLimit4)
////////////////////////////////////////////////////////////////////////////////
+TString MakeDeepMapJson(int depth)
+{
+ TString result;
+ for (int i = 0; i < depth; ++i) {
+ result += "{\"k\":";
+ }
+ result += "0";
+ for (int i = 0; i < depth; ++i) {
+ result += "}";
+ }
+ return result;
+}
+
+TString MakeDeepListJson(int depth)
+{
+ TString result;
+ for (int i = 0; i < depth; ++i) {
+ result += "[";
+ }
+ result += "0";
+ for (int i = 0; i < depth; ++i) {
+ result += "]";
+ }
+ return result;
+}
+
+TEST(TJsonParser, ParseDeepMapNoExcept)
+{
+ TStringStream yson;
+ NYT::NYson::TYsonWriter writer(&yson);
+ auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>();
+ configPtr->NestingLevelLimit = 20;
+ NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node);
+ EXPECT_NO_THROW(parser.Read(MakeDeepMapJson(configPtr->NestingLevelLimit)));
+}
+
+TEST(TJsonParser, ParseDeepMapExcept)
+{
+ TStringStream yson;
+ NYT::NYson::TYsonWriter writer(&yson);
+ auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>();
+ configPtr->NestingLevelLimit = 20;
+ NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node);
+ EXPECT_THROW(parser.Read(MakeDeepMapJson(configPtr->NestingLevelLimit + 1)), NYT::TErrorException);
+}
+
+TEST(TJsonParser, ParseDeepListNoExcept)
+{
+ TStringStream yson;
+ NYT::NYson::TYsonWriter writer(&yson);
+ auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>();
+ configPtr->NestingLevelLimit = 20;
+ NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node);
+ EXPECT_NO_THROW(parser.Read(MakeDeepListJson(configPtr->NestingLevelLimit)));
+}
+
+TEST(TJsonParser, ParseDeepListExcept)
+{
+ TStringStream yson;
+ NYT::NYson::TYsonWriter writer(&yson);
+ auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>();
+ configPtr->NestingLevelLimit = 20;
+ NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node);
+ EXPECT_THROW(parser.Read(MakeDeepListJson(configPtr->NestingLevelLimit + 1)), NYT::TErrorException);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
} // namespace
} // namespace NYT::NJson
diff --git a/yt/yt/library/formats/format.cpp b/yt/yt/library/formats/format.cpp
index 1fdd5e3836..a8ac005ae5 100644
--- a/yt/yt/library/formats/format.cpp
+++ b/yt/yt/library/formats/format.cpp
@@ -465,6 +465,9 @@ std::unique_ptr<IParser> CreateParserForFormat(const TFormat& format, EDataType
return CreateParserForYson(consumer, DataTypeToYsonType(dataType));
case EFormatType::Json: {
auto config = ConvertTo<TJsonFormatConfigPtr>(&format.Attributes());
+ if (config->NestingLevelLimit == 0) {
+ config->NestingLevelLimit = NYT::NJson::NestingLevelLimit;
+ }
return std::unique_ptr<IParser>(new TParserAdapter<TJsonParser>(consumer, config, DataTypeToYsonType(dataType)));
}
case EFormatType::Dsv: {