aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorionagamed <ionagamed@yandex-team.com>2023-11-29 08:16:34 +0300
committerionagamed <ionagamed@yandex-team.com>2023-11-29 08:51:03 +0300
commit55fc158d4d2d8ae4d9f026e1afac03c7daae294d (patch)
treebff2aa2edeae94bbf21a7a9875c6d8a1cbe8974c
parent4c48882379d93a43bd74060add09e309ce5e5002 (diff)
downloadydb-55fc158d4d2d8ae4d9f026e1afac03c7daae294d.tar.gz
library/json: add MaxDepth and iterative parsing into config
rationale: у нас была проблема где приезжает пользовательский json небольшой (на пару-тройку кб) и рвет нам стек из-за глубины. Со стороны rapidjson это проще всего поправить включив у них итеративный парсинг, который не будет использовать системный стек. Но это не совсем все — с нашей стороны оно все развалится когда надо будет вызвать рекурсивно деструкторы TJsonValue — и тут уже можно будет покрутить MaxDepth.
-rw-r--r--library/cpp/json/json_reader.cpp99
-rw-r--r--library/cpp/json/json_reader.h20
-rw-r--r--library/cpp/json/ut/json_reader_ut.cpp49
3 files changed, 135 insertions, 33 deletions
diff --git a/library/cpp/json/json_reader.cpp b/library/cpp/json/json_reader.cpp
index 072c8deafe..9080e6dbca 100644
--- a/library/cpp/json/json_reader.cpp
+++ b/library/cpp/json/json_reader.cpp
@@ -154,6 +154,10 @@ namespace NJson {
}
namespace {
+ struct TJsonValueBuilderConfig {
+ ui64 MaxDepth = 0;
+ };
+
struct TJsonValueBuilder {
#ifdef NDEBUG
using TItem = TJsonValue*;
@@ -182,12 +186,21 @@ namespace NJson {
TStack<TItem> S;
+ TJsonValueBuilderConfig Config;
+
TJsonValueBuilder(NJson::TJsonValue& v)
: V(v)
{
S.emplace(&V);
}
+ TJsonValueBuilder(NJson::TJsonValue& v, const TJsonValueBuilderConfig& config)
+ : V(v)
+ , Config(config)
+ {
+ S.emplace(&V);
+ }
+
template <class T>
void Set(const T& t) {
if (Access(S.top()).IsArray()) {
@@ -258,6 +271,9 @@ namespace NJson {
bool StartObject() {
if (Access(S.top()).IsArray()) {
S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP));
+ if (!IsWithinStackBounds()) {
+ return false;
+ }
} else {
Access(S.top()).SetType(NJson::JSON_MAP);
}
@@ -294,6 +310,9 @@ namespace NJson {
bool StartArray() {
if (Access(S.top()).IsArray()) {
S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY));
+ if (!IsWithinStackBounds()) {
+ return false;
+ }
} else {
Access(S.top()).SetType(NJson::JSON_ARRAY);
}
@@ -305,15 +324,72 @@ namespace NJson {
S.pop();
return true;
}
+
+ bool IsWithinStackBounds() {
+ return Config.MaxDepth == 0 || (S.size() <= Config.MaxDepth);
+ }
};
+ constexpr ui32 ConvertToRapidJsonFlags(ui8 flags) {
+ ui32 rapidjsonFlags = rapidjson::kParseNoFlags;
+
+ if (flags & ReaderConfigFlags::ITERATIVE) {
+ rapidjsonFlags |= rapidjson::kParseIterativeFlag;
+ }
+
+ if (flags & ReaderConfigFlags::COMMENTS) {
+ rapidjsonFlags |= rapidjson::kParseCommentsFlag;
+ }
+
+ if (flags & ReaderConfigFlags::VALIDATE) {
+ rapidjsonFlags |= rapidjson::kParseValidateEncodingFlag;
+ }
+
+ if (flags & ReaderConfigFlags::ESCAPE) {
+ rapidjsonFlags |= rapidjson::kParseEscapedApostropheFlag;
+ }
+
+ return rapidjsonFlags;
+ }
+
+ template <class TRapidJsonCompliantInputStream, class THandler, ui8 currentFlags = 0>
+ auto ReadWithRuntimeFlags(ui8 runtimeFlags,
+ rapidjson::Reader& reader,
+ TRapidJsonCompliantInputStream& is,
+ THandler& handler) {
+ if (runtimeFlags == 0) {
+ return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
+ }
+
+#define TRY_EXTRACT_FLAG(flag) \
+ if (runtimeFlags & flag) { \
+ return ReadWithRuntimeFlags<TRapidJsonCompliantInputStream, THandler, currentFlags | flag>( \
+ runtimeFlags ^ flag, reader, is, handler \
+ ); \
+ }
+
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::ITERATIVE);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::COMMENTS);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::VALIDATE);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::ESCAPE);
+
+#undef TRY_EXTRACT_FLAG
+
+ return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
+ }
+
template <class TRapidJsonCompliantInputStream, class THandler>
auto Read(const TJsonReaderConfig& config,
rapidjson::Reader& reader,
TRapidJsonCompliantInputStream& is,
THandler& handler) {
- ui8 flags = ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE;
+ // validate by default
+ ui8 flags = ReaderConfigFlags::VALIDATE;
+
+ if (config.UseIterativeParser) {
+ flags |= ReaderConfigFlags::ITERATIVE;
+ }
if (config.AllowComments) {
flags |= ReaderConfigFlags::COMMENTS;
@@ -327,24 +403,7 @@ namespace NJson {
flags |= ReaderConfigFlags::ESCAPE;
}
- switch (flags) {
- case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_NOESCAPE:
- return reader.Parse<rapidjson::kParseCommentsFlag>(is, handler);
- case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_NOESCAPE:
- return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag>(is, handler);
- case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_ESCAPE:
- return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
- case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_ESCAPE:
- return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
- case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE:
- return reader.Parse<rapidjson::kParseValidateEncodingFlag>(is, handler);
- case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_ESCAPE:
- return reader.Parse<rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
- case ReaderConfigToRapidJsonFlags::NOCOMMENTS_NOVALID_ESCAPE:
- return reader.Parse<rapidjson::kParseEscapedApostropheFlag>(is, handler);
- default:
- return reader.Parse<rapidjson::kParseNoFlags>(is, handler);
- }
+ return ReadWithRuntimeFlags(flags, reader, is, handler);
}
template <class TRapidJsonCompliantInputStream, class THandler>
@@ -368,7 +427,7 @@ namespace NJson {
bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
out->SetType(NJson::JSON_NULL);
- TJsonValueBuilder handler(*out);
+ TJsonValueBuilder handler(*out, { .MaxDepth = config->MaxDepth });
return ReadJson(is, config, handler, throwOnError);
}
diff --git a/library/cpp/json/json_reader.h b/library/cpp/json/json_reader.h
index b673788330..6c8e8c32e2 100644
--- a/library/cpp/json/json_reader.h
+++ b/library/cpp/json/json_reader.h
@@ -15,11 +15,14 @@ namespace NJson {
struct TJsonReaderConfig {
TJsonReaderConfig();
+ bool UseIterativeParser = false;
// js-style comments (both // and /**/)
bool AllowComments = false;
bool DontValidateUtf8 = false;
bool AllowEscapedApostrophe = false;
+ ui64 MaxDepth = 0;
+
void SetBufferSize(size_t bufferSize);
size_t GetBufferSize() const;
@@ -45,19 +48,10 @@ namespace NJson {
bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* callbacks);
enum ReaderConfigFlags {
- COMMENTS = 0b100,
- VALIDATE = 0b010,
- ESCAPE = 0b001,
- };
-
- enum ReaderConfigToRapidJsonFlags {
- COMMENTS_NOVALID_NOESCAPE = 0b100,
- COMMENTS_VALID_NOESCAPE = 0b110,
- COMMENTS_VALID_ESCAPE = 0b111,
- COMMENTS_NOVALID_ESCAPE = 0b101,
- NOCOMMENTS_VALID_NOESCAPE = 0b010,
- NOCOMMENTS_VALID_ESCAPE = 0b011,
- NOCOMMENTS_NOVALID_ESCAPE = 0b001,
+ ITERATIVE = 0b1000,
+ COMMENTS = 0b0100,
+ VALIDATE = 0b0010,
+ ESCAPE = 0b0001,
};
inline bool ValidateJson(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError = false) {
diff --git a/library/cpp/json/ut/json_reader_ut.cpp b/library/cpp/json/ut/json_reader_ut.cpp
index cd31afa0b8..115c85e1c7 100644
--- a/library/cpp/json/ut/json_reader_ut.cpp
+++ b/library/cpp/json/ut/json_reader_ut.cpp
@@ -66,6 +66,17 @@ public:
}
};
+void GenerateDeepJson(TStringStream& stream, ui64 depth) {
+ stream << "{\"key\":";
+ for (ui32 i = 0; i < depth - 1; ++i) {
+ stream << "[";
+ }
+ for (ui32 i = 0; i < depth - 1; ++i) {
+ stream << "]";
+ }
+ stream << "}";
+}
+
Y_UNIT_TEST_SUITE(TJsonReaderTest) {
Y_UNIT_TEST(JsonReformatTest) {
TString data = "{\"null value\": null, \"intkey\": 10, \"double key\": 11.11, \"string key\": \"string\", \"array\": [1,2,3,\"TString\"], \"bool key\": true}";
@@ -396,6 +407,44 @@ Y_UNIT_TEST_SUITE(TJsonReaderTest) {
UNIT_ASSERT(v.GetMap().begin()->second.IsString());
UNIT_ASSERT_VALUES_EQUAL("", v.GetMap().begin()->second.GetString());
}
+
+ // Parsing an extremely deep json tree would result in stack overflow.
+ // Not crashing on one is a good indicator of iterative mode.
+ Y_UNIT_TEST(TJsonIterativeTest) {
+ constexpr ui32 brackets = static_cast<ui32>(1e5);
+
+ TStringStream jsonStream;
+ GenerateDeepJson(jsonStream, brackets);
+
+ TJsonReaderConfig config;
+ config.UseIterativeParser = true;
+ config.MaxDepth = static_cast<ui32>(1e3);
+
+ TJsonValue v;
+ UNIT_ASSERT(!ReadJsonTree(&jsonStream, &config, &v));
+ }
+
+ Y_UNIT_TEST(TJsonMaxDepthTest) {
+ constexpr ui32 depth = static_cast<ui32>(1e3);
+
+ {
+ TStringStream jsonStream;
+ GenerateDeepJson(jsonStream, depth);
+ TJsonReaderConfig config;
+ config.MaxDepth = depth;
+ TJsonValue v;
+ UNIT_ASSERT(ReadJsonTree(&jsonStream, &config, &v));
+ }
+
+ {
+ TStringStream jsonStream;
+ GenerateDeepJson(jsonStream, depth);
+ TJsonReaderConfig config;
+ config.MaxDepth = depth - 1;
+ TJsonValue v;
+ UNIT_ASSERT(!ReadJsonTree(&jsonStream, &config, &v));
+ }
+ }
}