diff options
author | ionagamed <ionagamed@yandex-team.com> | 2023-11-29 08:16:34 +0300 |
---|---|---|
committer | ionagamed <ionagamed@yandex-team.com> | 2023-11-29 08:51:03 +0300 |
commit | 55fc158d4d2d8ae4d9f026e1afac03c7daae294d (patch) | |
tree | bff2aa2edeae94bbf21a7a9875c6d8a1cbe8974c | |
parent | 4c48882379d93a43bd74060add09e309ce5e5002 (diff) | |
download | ydb-55fc158d4d2d8ae4d9f026e1afac03c7daae294d.tar.gz |
library/json: add MaxDepth and iterative parsing into config
rationale: у нас была проблема где приезжает пользовательский json небольшой (на пару-тройку кб) и рвет нам стек из-за глубины.
Со стороны rapidjson это проще всего поправить включив у них итеративный парсинг, который не будет использовать системный стек.
Но это не совсем все — с нашей стороны оно все развалится когда надо будет вызвать рекурсивно деструкторы TJsonValue — и тут уже можно будет покрутить MaxDepth.
-rw-r--r-- | library/cpp/json/json_reader.cpp | 99 | ||||
-rw-r--r-- | library/cpp/json/json_reader.h | 20 | ||||
-rw-r--r-- | library/cpp/json/ut/json_reader_ut.cpp | 49 |
3 files changed, 135 insertions, 33 deletions
diff --git a/library/cpp/json/json_reader.cpp b/library/cpp/json/json_reader.cpp index 072c8deafe..9080e6dbca 100644 --- a/library/cpp/json/json_reader.cpp +++ b/library/cpp/json/json_reader.cpp @@ -154,6 +154,10 @@ namespace NJson { } namespace { + struct TJsonValueBuilderConfig { + ui64 MaxDepth = 0; + }; + struct TJsonValueBuilder { #ifdef NDEBUG using TItem = TJsonValue*; @@ -182,12 +186,21 @@ namespace NJson { TStack<TItem> S; + TJsonValueBuilderConfig Config; + TJsonValueBuilder(NJson::TJsonValue& v) : V(v) { S.emplace(&V); } + TJsonValueBuilder(NJson::TJsonValue& v, const TJsonValueBuilderConfig& config) + : V(v) + , Config(config) + { + S.emplace(&V); + } + template <class T> void Set(const T& t) { if (Access(S.top()).IsArray()) { @@ -258,6 +271,9 @@ namespace NJson { bool StartObject() { if (Access(S.top()).IsArray()) { S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP)); + if (!IsWithinStackBounds()) { + return false; + } } else { Access(S.top()).SetType(NJson::JSON_MAP); } @@ -294,6 +310,9 @@ namespace NJson { bool StartArray() { if (Access(S.top()).IsArray()) { S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY)); + if (!IsWithinStackBounds()) { + return false; + } } else { Access(S.top()).SetType(NJson::JSON_ARRAY); } @@ -305,15 +324,72 @@ namespace NJson { S.pop(); return true; } + + bool IsWithinStackBounds() { + return Config.MaxDepth == 0 || (S.size() <= Config.MaxDepth); + } }; + constexpr ui32 ConvertToRapidJsonFlags(ui8 flags) { + ui32 rapidjsonFlags = rapidjson::kParseNoFlags; + + if (flags & ReaderConfigFlags::ITERATIVE) { + rapidjsonFlags |= rapidjson::kParseIterativeFlag; + } + + if (flags & ReaderConfigFlags::COMMENTS) { + rapidjsonFlags |= rapidjson::kParseCommentsFlag; + } + + if (flags & ReaderConfigFlags::VALIDATE) { + rapidjsonFlags |= rapidjson::kParseValidateEncodingFlag; + } + + if (flags & ReaderConfigFlags::ESCAPE) { + rapidjsonFlags |= rapidjson::kParseEscapedApostropheFlag; + } + + return rapidjsonFlags; + } + + template <class TRapidJsonCompliantInputStream, class THandler, ui8 currentFlags = 0> + auto ReadWithRuntimeFlags(ui8 runtimeFlags, + rapidjson::Reader& reader, + TRapidJsonCompliantInputStream& is, + THandler& handler) { + if (runtimeFlags == 0) { + return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler); + } + +#define TRY_EXTRACT_FLAG(flag) \ + if (runtimeFlags & flag) { \ + return ReadWithRuntimeFlags<TRapidJsonCompliantInputStream, THandler, currentFlags | flag>( \ + runtimeFlags ^ flag, reader, is, handler \ + ); \ + } + + TRY_EXTRACT_FLAG(ReaderConfigFlags::ITERATIVE); + TRY_EXTRACT_FLAG(ReaderConfigFlags::COMMENTS); + TRY_EXTRACT_FLAG(ReaderConfigFlags::VALIDATE); + TRY_EXTRACT_FLAG(ReaderConfigFlags::ESCAPE); + +#undef TRY_EXTRACT_FLAG + + return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler); + } + template <class TRapidJsonCompliantInputStream, class THandler> auto Read(const TJsonReaderConfig& config, rapidjson::Reader& reader, TRapidJsonCompliantInputStream& is, THandler& handler) { - ui8 flags = ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE; + // validate by default + ui8 flags = ReaderConfigFlags::VALIDATE; + + if (config.UseIterativeParser) { + flags |= ReaderConfigFlags::ITERATIVE; + } if (config.AllowComments) { flags |= ReaderConfigFlags::COMMENTS; @@ -327,24 +403,7 @@ namespace NJson { flags |= ReaderConfigFlags::ESCAPE; } - switch (flags) { - case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_NOESCAPE: - return reader.Parse<rapidjson::kParseCommentsFlag>(is, handler); - case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_NOESCAPE: - return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag>(is, handler); - case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_ESCAPE: - return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler); - case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_ESCAPE: - return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler); - case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE: - return reader.Parse<rapidjson::kParseValidateEncodingFlag>(is, handler); - case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_ESCAPE: - return reader.Parse<rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler); - case ReaderConfigToRapidJsonFlags::NOCOMMENTS_NOVALID_ESCAPE: - return reader.Parse<rapidjson::kParseEscapedApostropheFlag>(is, handler); - default: - return reader.Parse<rapidjson::kParseNoFlags>(is, handler); - } + return ReadWithRuntimeFlags(flags, reader, is, handler); } template <class TRapidJsonCompliantInputStream, class THandler> @@ -368,7 +427,7 @@ namespace NJson { bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) { out->SetType(NJson::JSON_NULL); - TJsonValueBuilder handler(*out); + TJsonValueBuilder handler(*out, { .MaxDepth = config->MaxDepth }); return ReadJson(is, config, handler, throwOnError); } diff --git a/library/cpp/json/json_reader.h b/library/cpp/json/json_reader.h index b673788330..6c8e8c32e2 100644 --- a/library/cpp/json/json_reader.h +++ b/library/cpp/json/json_reader.h @@ -15,11 +15,14 @@ namespace NJson { struct TJsonReaderConfig { TJsonReaderConfig(); + bool UseIterativeParser = false; // js-style comments (both // and /**/) bool AllowComments = false; bool DontValidateUtf8 = false; bool AllowEscapedApostrophe = false; + ui64 MaxDepth = 0; + void SetBufferSize(size_t bufferSize); size_t GetBufferSize() const; @@ -45,19 +48,10 @@ namespace NJson { bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* callbacks); enum ReaderConfigFlags { - COMMENTS = 0b100, - VALIDATE = 0b010, - ESCAPE = 0b001, - }; - - enum ReaderConfigToRapidJsonFlags { - COMMENTS_NOVALID_NOESCAPE = 0b100, - COMMENTS_VALID_NOESCAPE = 0b110, - COMMENTS_VALID_ESCAPE = 0b111, - COMMENTS_NOVALID_ESCAPE = 0b101, - NOCOMMENTS_VALID_NOESCAPE = 0b010, - NOCOMMENTS_VALID_ESCAPE = 0b011, - NOCOMMENTS_NOVALID_ESCAPE = 0b001, + ITERATIVE = 0b1000, + COMMENTS = 0b0100, + VALIDATE = 0b0010, + ESCAPE = 0b0001, }; inline bool ValidateJson(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError = false) { diff --git a/library/cpp/json/ut/json_reader_ut.cpp b/library/cpp/json/ut/json_reader_ut.cpp index cd31afa0b8..115c85e1c7 100644 --- a/library/cpp/json/ut/json_reader_ut.cpp +++ b/library/cpp/json/ut/json_reader_ut.cpp @@ -66,6 +66,17 @@ public: } }; +void GenerateDeepJson(TStringStream& stream, ui64 depth) { + stream << "{\"key\":"; + for (ui32 i = 0; i < depth - 1; ++i) { + stream << "["; + } + for (ui32 i = 0; i < depth - 1; ++i) { + stream << "]"; + } + stream << "}"; +} + Y_UNIT_TEST_SUITE(TJsonReaderTest) { Y_UNIT_TEST(JsonReformatTest) { TString data = "{\"null value\": null, \"intkey\": 10, \"double key\": 11.11, \"string key\": \"string\", \"array\": [1,2,3,\"TString\"], \"bool key\": true}"; @@ -396,6 +407,44 @@ Y_UNIT_TEST_SUITE(TJsonReaderTest) { UNIT_ASSERT(v.GetMap().begin()->second.IsString()); UNIT_ASSERT_VALUES_EQUAL("", v.GetMap().begin()->second.GetString()); } + + // Parsing an extremely deep json tree would result in stack overflow. + // Not crashing on one is a good indicator of iterative mode. + Y_UNIT_TEST(TJsonIterativeTest) { + constexpr ui32 brackets = static_cast<ui32>(1e5); + + TStringStream jsonStream; + GenerateDeepJson(jsonStream, brackets); + + TJsonReaderConfig config; + config.UseIterativeParser = true; + config.MaxDepth = static_cast<ui32>(1e3); + + TJsonValue v; + UNIT_ASSERT(!ReadJsonTree(&jsonStream, &config, &v)); + } + + Y_UNIT_TEST(TJsonMaxDepthTest) { + constexpr ui32 depth = static_cast<ui32>(1e3); + + { + TStringStream jsonStream; + GenerateDeepJson(jsonStream, depth); + TJsonReaderConfig config; + config.MaxDepth = depth; + TJsonValue v; + UNIT_ASSERT(ReadJsonTree(&jsonStream, &config, &v)); + } + + { + TStringStream jsonStream; + GenerateDeepJson(jsonStream, depth); + TJsonReaderConfig config; + config.MaxDepth = depth - 1; + TJsonValue v; + UNIT_ASSERT(!ReadJsonTree(&jsonStream, &config, &v)); + } + } } |