diff options
author | yurial <yurial@yandex-team.com> | 2023-11-01 11:44:41 +0300 |
---|---|---|
committer | yurial <yurial@yandex-team.com> | 2023-11-01 12:15:19 +0300 |
commit | 7f97144a42658fab5a97c83c0c66b0ad957b5b0f (patch) | |
tree | 23c3e41ea20c6797441e26bce13ffbb1eb0df11e | |
parent | 948552a0ebc797aecf90797407b0333fbccc43e9 (diff) | |
download | ydb-7f97144a42658fab5a97c83c0c66b0ad957b5b0f.tar.gz |
Add DepthLimit to TJsonFormatConfig
-rw-r--r-- | yt/yt/core/json/config.h | 1 | ||||
-rw-r--r-- | yt/yt/core/json/json_callbacks.cpp | 31 | ||||
-rw-r--r-- | yt/yt/core/json/json_callbacks.h | 10 | ||||
-rw-r--r-- | yt/yt/core/json/json_parser.cpp | 1 | ||||
-rw-r--r-- | yt/yt/core/json/public.h | 4 | ||||
-rw-r--r-- | yt/yt/core/json/unittests/parser_ut.cpp | 68 | ||||
-rw-r--r-- | yt/yt/library/formats/format.cpp | 3 |
7 files changed, 101 insertions, 17 deletions
diff --git a/yt/yt/core/json/config.h b/yt/yt/core/json/config.h index 8d9a951631..71fa8c46e7 100644 --- a/yt/yt/core/json/config.h +++ b/yt/yt/core/json/config.h @@ -26,6 +26,7 @@ public: bool Plain; bool EncodeUtf8; i64 MemoryLimit = 256_MB; + int NestingLevelLimit = 0; std::optional<int> StringLengthLimit; diff --git a/yt/yt/core/json/json_callbacks.cpp b/yt/yt/core/json/json_callbacks.cpp index 1edea77312..39703c007f 100644 --- a/yt/yt/core/json/json_callbacks.cpp +++ b/yt/yt/core/json/json_callbacks.cpp @@ -19,11 +19,13 @@ TJsonCallbacksBuildingNodesImpl::TJsonCallbacksBuildingNodesImpl( NYson::EYsonType ysonType, const TUtf8Transcoder& utf8Transcoder, i64 memoryLimit, + int nestingLevelLimit, NJson::EJsonAttributesMode attributesMode) : Consumer_(consumer) , YsonType_(ysonType) , Utf8Transcoder_(utf8Transcoder) , MemoryLimit_(memoryLimit) + , NestingLevelLimit_(nestingLevelLimit) , AttributesMode_(attributesMode) , TreeBuilder_(CreateBuilderFromFactory(GetEphemeralNodeFactory())) { @@ -140,14 +142,17 @@ void TJsonCallbacksBuildingNodesImpl::OnItemFinished() if (YsonType_ == EYsonType::ListFragment) { Consumer_->OnListItem(); } - ConsumeNode(TreeBuilder_->EndTree()); + ConsumeNode(TreeBuilder_->EndTree(), Stack_.size()); TreeBuilder_->BeginTree(); ConsumedMemory_ = 0; } } -void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node) +void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node, int nestingLevel) { + if (NestingLevelLimit_ > 0 && nestingLevel > NestingLevelLimit_) { + THROW_ERROR_EXCEPTION("JSON nesting level limit exceeded") << NYT::TErrorAttribute("NestingLevelLimit", NestingLevelLimit_); + } switch (node->GetType()) { case ENodeType::Int64: Consumer_->OnInt64Scalar(node->AsInt64()->GetValue()); @@ -168,10 +173,10 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node) Consumer_->OnStringScalar(node->AsString()->GetValue()); break; case ENodeType::Map: - ConsumeNode(node->AsMap()); + ConsumeNode(node->AsMap(), nestingLevel + 1); break; case ENodeType::List: - ConsumeNode(node->AsList()); + ConsumeNode(node->AsList(), nestingLevel + 1); break; default: YT_ABORT(); @@ -179,7 +184,7 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(INodePtr node) }; } -void TJsonCallbacksBuildingNodesImpl::ConsumeMapFragment(IMapNodePtr map) +void TJsonCallbacksBuildingNodesImpl::ConsumeMapFragment(IMapNodePtr map, int nestingLevel) { for (const auto& [key, value] : map->GetChildren()) { auto adjustedKey = TStringBuf(key); @@ -194,11 +199,11 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeMapFragment(IMapNodePtr map) adjustedKey = adjustedKey.substr(1); } Consumer_->OnKeyedItem(adjustedKey); - ConsumeNode(value); + ConsumeNode(value, nestingLevel); } } -void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map) +void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map, int nestingLevel) { auto node = map->FindChild("$value"); if (node) { @@ -208,7 +213,7 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map) THROW_ERROR_EXCEPTION("Value of \"$attributes\" must be a map"); } Consumer_->OnBeginAttributes(); - ConsumeMapFragment(attributes->AsMap()); + ConsumeMapFragment(attributes->AsMap(), nestingLevel); Consumer_->OnEndAttributes(); } @@ -235,7 +240,7 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map) } if (node->GetType() == expectedType) { - ConsumeNode(node); + ConsumeNode(node, nestingLevel); } else if (node->GetType() == ENodeType::String) { auto nodeAsString = node->AsString()->GetValue(); switch (expectedType) { @@ -290,24 +295,24 @@ void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IMapNodePtr map) << TErrorAttribute("actual_type", node->GetType()); } } else { - ConsumeNode(node); + ConsumeNode(node, nestingLevel); } } else { if (map->FindChild("$attributes")) { THROW_ERROR_EXCEPTION("Found key \"$attributes\" without key \"$value\""); } Consumer_->OnBeginMap(); - ConsumeMapFragment(map); + ConsumeMapFragment(map, nestingLevel); Consumer_->OnEndMap(); } } -void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IListNodePtr list) +void TJsonCallbacksBuildingNodesImpl::ConsumeNode(IListNodePtr list, int nestingLevel) { Consumer_->OnBeginList(); for (int i = 0; i < list->GetChildCount(); ++i) { Consumer_->OnListItem(); - ConsumeNode(list->GetChildOrThrow(i)); + ConsumeNode(list->GetChildOrThrow(i), nestingLevel); } Consumer_->OnEndList(); } diff --git a/yt/yt/core/json/json_callbacks.h b/yt/yt/core/json/json_callbacks.h index ffae11185c..47556a8232 100644 --- a/yt/yt/core/json/json_callbacks.h +++ b/yt/yt/core/json/json_callbacks.h @@ -51,6 +51,7 @@ public: NYson::EYsonType ysonType, const TUtf8Transcoder& utf8Transcoder, i64 memoryLimit, + int nestingLevelLimit, NJson::EJsonAttributesMode attributesMode); void OnStringScalar(TStringBuf value) override; @@ -71,16 +72,17 @@ private: void OnItemStarted(); void OnItemFinished(); - void ConsumeNode(NYTree::INodePtr node); - void ConsumeNode(NYTree::IMapNodePtr map); - void ConsumeNode(NYTree::IListNodePtr list); - void ConsumeMapFragment(NYTree::IMapNodePtr map); + void ConsumeNode(NYTree::INodePtr node, int nestingLevel); + void ConsumeNode(NYTree::IMapNodePtr map, int nestingLevel); + void ConsumeNode(NYTree::IListNodePtr list, int nestingLevel); + void ConsumeMapFragment(NYTree::IMapNodePtr map, int nestingLevel); NYson::IYsonConsumer* Consumer_; NYson::EYsonType YsonType_; TUtf8Transcoder Utf8Transcoder_; i64 ConsumedMemory_ = 0; const i64 MemoryLimit_; + const int NestingLevelLimit_; const NJson::EJsonAttributesMode AttributesMode_; TCompactVector<EJsonCallbacksNodeType, 4> Stack_; diff --git a/yt/yt/core/json/json_parser.cpp b/yt/yt/core/json/json_parser.cpp index 55aedb9917..989c2099fa 100644 --- a/yt/yt/core/json/json_parser.cpp +++ b/yt/yt/core/json/json_parser.cpp @@ -131,6 +131,7 @@ public: Type_, TUtf8Transcoder(Config_->EncodeUtf8), Config_->MemoryLimit, + Config_->NestingLevelLimit, Config_->AttributesMode); } YajlHandle_.reset(yajl_alloc(&YajlCallbacks, nullptr, Callbacks_.get())); diff --git a/yt/yt/core/json/public.h b/yt/yt/core/json/public.h index 860acdc88c..6e7b4486d1 100644 --- a/yt/yt/core/json/public.h +++ b/yt/yt/core/json/public.h @@ -1,6 +1,7 @@ #pragma once #include <yt/yt/core/misc/public.h> +#include <yt/yt/core/yson/public.h> namespace NYT::NJson { @@ -11,6 +12,9 @@ DECLARE_REFCOUNTED_CLASS(TJsonFormatConfig) struct IJsonConsumer; struct IJsonWriter; +// YSON with attributes is represented in JSON with additional nested objects. It leads to doubled nesting levels. +constexpr int NestingLevelLimit = NYT::NYson::NewNestingLevelLimit * 2; + //////////////////////////////////////////////////////////////////////////////// } // namespace NYT::NJson diff --git a/yt/yt/core/json/unittests/parser_ut.cpp b/yt/yt/core/json/unittests/parser_ut.cpp index b9e24ec128..c1d6ed0787 100644 --- a/yt/yt/core/json/unittests/parser_ut.cpp +++ b/yt/yt/core/json/unittests/parser_ut.cpp @@ -802,5 +802,73 @@ TEST(TJsonParserTest, MemoryLimit4) //////////////////////////////////////////////////////////////////////////////// +TString MakeDeepMapJson(int depth) +{ + TString result; + for (int i = 0; i < depth; ++i) { + result += "{\"k\":"; + } + result += "0"; + for (int i = 0; i < depth; ++i) { + result += "}"; + } + return result; +} + +TString MakeDeepListJson(int depth) +{ + TString result; + for (int i = 0; i < depth; ++i) { + result += "["; + } + result += "0"; + for (int i = 0; i < depth; ++i) { + result += "]"; + } + return result; +} + +TEST(TJsonParser, ParseDeepMapNoExcept) +{ + TStringStream yson; + NYT::NYson::TYsonWriter writer(&yson); + auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>(); + configPtr->NestingLevelLimit = 20; + NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node); + EXPECT_NO_THROW(parser.Read(MakeDeepMapJson(configPtr->NestingLevelLimit))); +} + +TEST(TJsonParser, ParseDeepMapExcept) +{ + TStringStream yson; + NYT::NYson::TYsonWriter writer(&yson); + auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>(); + configPtr->NestingLevelLimit = 20; + NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node); + EXPECT_THROW(parser.Read(MakeDeepMapJson(configPtr->NestingLevelLimit + 1)), NYT::TErrorException); +} + +TEST(TJsonParser, ParseDeepListNoExcept) +{ + TStringStream yson; + NYT::NYson::TYsonWriter writer(&yson); + auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>(); + configPtr->NestingLevelLimit = 20; + NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node); + EXPECT_NO_THROW(parser.Read(MakeDeepListJson(configPtr->NestingLevelLimit))); +} + +TEST(TJsonParser, ParseDeepListExcept) +{ + TStringStream yson; + NYT::NYson::TYsonWriter writer(&yson); + auto configPtr = NYT::New<NYT::NJson::TJsonFormatConfig>(); + configPtr->NestingLevelLimit = 20; + NYT::NJson::TJsonParser parser(&writer, configPtr, NYT::NJson::EYsonType::Node); + EXPECT_THROW(parser.Read(MakeDeepListJson(configPtr->NestingLevelLimit + 1)), NYT::TErrorException); +} + +//////////////////////////////////////////////////////////////////////////////// + } // namespace } // namespace NYT::NJson diff --git a/yt/yt/library/formats/format.cpp b/yt/yt/library/formats/format.cpp index 1fdd5e3836..a8ac005ae5 100644 --- a/yt/yt/library/formats/format.cpp +++ b/yt/yt/library/formats/format.cpp @@ -465,6 +465,9 @@ std::unique_ptr<IParser> CreateParserForFormat(const TFormat& format, EDataType return CreateParserForYson(consumer, DataTypeToYsonType(dataType)); case EFormatType::Json: { auto config = ConvertTo<TJsonFormatConfigPtr>(&format.Attributes()); + if (config->NestingLevelLimit == 0) { + config->NestingLevelLimit = NYT::NJson::NestingLevelLimit; + } return std::unique_ptr<IParser>(new TParserAdapter<TJsonParser>(consumer, config, DataTypeToYsonType(dataType))); } case EFormatType::Dsv: { |