summaryrefslogtreecommitdiffstats
path: root/library/cpp/json/ordered_maps/json_reader_ordered.cpp
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2025-08-08 10:48:03 +0300
committerrobot-piglet <[email protected]>2025-08-08 11:07:59 +0300
commit6399e7ae57f1556e7b777a0fe1d6b0bf3b36b90e (patch)
tree7991057328ee56fb23dfb43c2f48b72e1ba7413d /library/cpp/json/ordered_maps/json_reader_ordered.cpp
parent9fc8652742d52b557fedfd87254a6065c37b27aa (diff)
Intermediate changes
commit_hash:369029716d3b4afaec45df28e06b27c781a5b564
Diffstat (limited to 'library/cpp/json/ordered_maps/json_reader_ordered.cpp')
-rw-r--r--library/cpp/json/ordered_maps/json_reader_ordered.cpp636
1 files changed, 636 insertions, 0 deletions
diff --git a/library/cpp/json/ordered_maps/json_reader_ordered.cpp b/library/cpp/json/ordered_maps/json_reader_ordered.cpp
new file mode 100644
index 00000000000..babb4201978
--- /dev/null
+++ b/library/cpp/json/ordered_maps/json_reader_ordered.cpp
@@ -0,0 +1,636 @@
+#include "json_reader_ordered.h"
+
+#include <library/cpp/json/rapidjson_helpers.h>
+
+#include <contrib/libs/rapidjson/include/rapidjson/error/en.h>
+#include <contrib/libs/rapidjson/include/rapidjson/error/error.h>
+#include <contrib/libs/rapidjson/include/rapidjson/reader.h>
+
+#include <util/generic/stack.h>
+#include <util/string/cast.h>
+#include <util/system/yassert.h>
+#include <util/string/builder.h>
+
+namespace NJson::NOrderedJson {
+ namespace {
+ TString PrintError(const rapidjson::ParseResult& result) {
+ return TStringBuilder() << TStringBuf("Offset: ") << result.Offset()
+ << TStringBuf(", Code: ") << (int)result.Code()
+ << TStringBuf(", Error: ") << GetParseError_En(result.Code());
+ }
+ }
+
+ static const size_t DEFAULT_BUFFER_LEN = 65536;
+
+ bool TParserCallbacks::OpenComplexValue(EJsonValueType type) {
+ TJsonValue* pvalue;
+ switch (CurrentState) {
+ case START:
+ Value.SetType(type);
+ ValuesStack.push_back(&Value);
+ break;
+ case IN_ARRAY:
+ pvalue = &ValuesStack.back()->AppendValue(type);
+ ValuesStack.push_back(pvalue);
+ break;
+ case AFTER_MAP_KEY:
+ pvalue = &ValuesStack.back()->InsertValue(Key, type);
+ ValuesStack.push_back(pvalue);
+ CurrentState = IN_MAP;
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool TParserCallbacks::CloseComplexValue() {
+ if (ValuesStack.empty()) {
+ return false;
+ }
+
+ ValuesStack.pop_back();
+ if (!ValuesStack.empty()) {
+ switch (ValuesStack.back()->GetType()) {
+ case JSON_ARRAY:
+ CurrentState = IN_ARRAY;
+ break;
+ case JSON_MAP:
+ CurrentState = IN_MAP;
+ break;
+ default:
+ return false;
+ }
+ } else {
+ CurrentState = FINISH;
+ }
+ return true;
+ }
+
+ TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError)
+ : TJsonCallbacks(throwOnError)
+ , Value(value)
+ , NotClosedBracketIsError(notClosedBracketIsError)
+ , CurrentState(START)
+ {
+ }
+
+ bool TParserCallbacks::OnNull() {
+ return SetValue(JSON_NULL);
+ }
+
+ bool TParserCallbacks::OnBoolean(bool val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnInteger(long long val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnUInteger(unsigned long long val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnString(const TStringBuf& val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnDouble(double val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnOpenArray() {
+ bool res = OpenComplexValue(JSON_ARRAY);
+ if (res)
+ CurrentState = IN_ARRAY;
+ return res;
+ }
+
+ bool TParserCallbacks::OnCloseArray() {
+ return CloseComplexValue();
+ }
+
+ bool TParserCallbacks::OnOpenMap() {
+ bool res = OpenComplexValue(JSON_MAP);
+ if (res)
+ CurrentState = IN_MAP;
+ return res;
+ }
+
+ bool TParserCallbacks::OnCloseMap() {
+ return CloseComplexValue();
+ }
+
+ bool TParserCallbacks::OnMapKey(const TStringBuf& val) {
+ switch (CurrentState) {
+ case IN_MAP:
+ Key = val;
+ CurrentState = AFTER_MAP_KEY;
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool TParserCallbacks::OnEnd() {
+ if (NotClosedBracketIsError){
+ return ValuesStack.empty();
+ }
+ return true;
+ }
+
+ TJsonReaderConfig::TJsonReaderConfig()
+ : BufferSize(DEFAULT_BUFFER_LEN)
+ {
+ }
+
+ void TJsonReaderConfig::SetBufferSize(size_t bufferSize) {
+ BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN));
+ }
+
+ size_t TJsonReaderConfig::GetBufferSize() const {
+ return BufferSize;
+ }
+
+ namespace {
+ struct TJsonValueBuilderConfig {
+ ui64 MaxDepth = 0;
+ };
+
+ struct TJsonValueBuilder {
+#ifdef NDEBUG
+ using TItem = TJsonValue*;
+
+ inline TJsonValue& Access(TItem& item) const {
+ return *item;
+ }
+#else
+ struct TItem {
+ TJsonValue* V;
+ size_t DuplicateKeyCount;
+
+ TItem(TJsonValue* v)
+ : V(v)
+ , DuplicateKeyCount(0)
+ {
+ }
+ };
+
+ inline TJsonValue& Access(TItem& item) const {
+ return *item.V;
+ }
+#endif
+
+ NJson::NOrderedJson::TJsonValue& V;
+
+ TStack<TItem> S;
+
+ TJsonValueBuilderConfig Config;
+
+ TJsonValueBuilder(NJson::NOrderedJson::TJsonValue& v)
+ : V(v)
+ {
+ S.emplace(&V);
+ }
+
+ TJsonValueBuilder(NJson::NOrderedJson::TJsonValue& v, const TJsonValueBuilderConfig& config)
+ : V(v)
+ , Config(config)
+ {
+ S.emplace(&V);
+ }
+
+ template <class T>
+ void Set(const T& t) {
+ if (Access(S.top()).IsArray()) {
+ Access(S.top()).AppendValue(t);
+ } else {
+ Access(S.top()) = t;
+ S.pop();
+ }
+ }
+
+ bool Null() {
+ Set(NJson::NOrderedJson::JSON_NULL);
+ return true;
+ }
+
+ bool Bool(bool b) {
+ Set(b);
+ return true;
+ }
+
+ bool Int(int i) {
+ Set(i);
+ return true;
+ }
+
+ template <class U>
+ bool ProcessUint(U u) {
+ if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) {
+ Set(i64(u));
+ } else {
+ Set(u);
+ }
+ return true;
+ }
+
+ bool Uint(unsigned u) {
+ return ProcessUint(u);
+ }
+
+ bool Int64(i64 i) {
+ Set(i);
+ return true;
+ }
+
+ bool Uint64(ui64 u) {
+ return ProcessUint(u);
+ }
+
+ bool Double(double d) {
+ Set(d);
+ return true;
+ }
+
+ bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(false && "this method should never be called");
+ Y_UNUSED(str);
+ Y_UNUSED(length);
+ Y_UNUSED(copy);
+ return true;
+ }
+
+ bool String(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ Set(TStringBuf(str, length));
+ return true;
+ }
+
+ bool StartObject() {
+ if (Access(S.top()).IsArray()) {
+ S.emplace(&Access(S.top()).AppendValue(NJson::NOrderedJson::JSON_MAP));
+ if (!IsWithinStackBounds()) {
+ return false;
+ }
+ } else {
+ Access(S.top()).SetType(NJson::NOrderedJson::JSON_MAP);
+ }
+ return true;
+ }
+
+ bool Key(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ auto& value = Access(S.top())[TStringBuf(str, length)];
+ if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) {
+#ifndef NDEBUG
+ ++S.top().DuplicateKeyCount;
+#endif
+ value.SetType(JSON_UNDEFINED);
+ }
+ S.emplace(&value);
+ if (!IsWithinStackBounds()) {
+ return false;
+ }
+ return true;
+ }
+
+ inline int GetDuplicateKeyCount() const {
+#ifdef NDEBUG
+ return 0;
+#else
+ return S.top().DuplicateKeyCount;
+#endif
+ }
+
+ bool EndObject(rapidjson::SizeType memberCount) {
+ Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount());
+ S.pop();
+ return true;
+ }
+
+ bool StartArray() {
+ if (Access(S.top()).IsArray()) {
+ S.emplace(&Access(S.top()).AppendValue(NJson::NOrderedJson::JSON_ARRAY));
+ if (!IsWithinStackBounds()) {
+ return false;
+ }
+ } else {
+ Access(S.top()).SetType(NJson::NOrderedJson::JSON_ARRAY);
+ }
+ return true;
+ }
+
+ bool EndArray(rapidjson::SizeType elementCount) {
+ Y_ASSERT(elementCount == Access(S.top()).GetArray().size());
+ S.pop();
+ return true;
+ }
+
+ bool IsWithinStackBounds() {
+ return Config.MaxDepth == 0 || (S.size() <= Config.MaxDepth);
+ }
+ };
+
+ constexpr ui32 ConvertToRapidJsonFlags(ui8 flags) {
+ ui32 rapidjsonFlags = rapidjson::kParseNoFlags;
+
+ if (flags & ReaderConfigFlags::NANINF) {
+ rapidjsonFlags |= rapidjson::kParseNanAndInfFlag;
+ }
+ if (flags & ReaderConfigFlags::ITERATIVE) {
+ rapidjsonFlags |= rapidjson::kParseIterativeFlag;
+ }
+
+ if (flags & ReaderConfigFlags::COMMENTS) {
+ rapidjsonFlags |= rapidjson::kParseCommentsFlag;
+ }
+
+ if (flags & ReaderConfigFlags::VALIDATE) {
+ rapidjsonFlags |= rapidjson::kParseValidateEncodingFlag;
+ }
+
+ if (flags & ReaderConfigFlags::ESCAPE) {
+ rapidjsonFlags |= rapidjson::kParseEscapedApostropheFlag;
+ }
+
+ return rapidjsonFlags;
+ }
+
+ template <class TRapidJsonCompliantInputStream, class THandler, ui8 currentFlags = 0>
+ auto ReadWithRuntimeFlags(ui8 runtimeFlags,
+ rapidjson::Reader& reader,
+ TRapidJsonCompliantInputStream& is,
+ THandler& handler) {
+ if (runtimeFlags == 0) {
+ return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
+ }
+
+#define TRY_EXTRACT_FLAG(flag) \
+ if (runtimeFlags & flag) { \
+ return ReadWithRuntimeFlags<TRapidJsonCompliantInputStream, THandler, currentFlags | flag>( \
+ runtimeFlags ^ flag, reader, is, handler \
+ ); \
+ }
+
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::NANINF);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::ITERATIVE);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::COMMENTS);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::VALIDATE);
+ TRY_EXTRACT_FLAG(ReaderConfigFlags::ESCAPE);
+
+#undef TRY_EXTRACT_FLAG
+
+ return reader.Parse<ConvertToRapidJsonFlags(currentFlags)>(is, handler);
+ }
+
+ template <class TRapidJsonCompliantInputStream, class THandler>
+ auto Read(const TJsonReaderConfig& config,
+ rapidjson::Reader& reader,
+ TRapidJsonCompliantInputStream& is,
+ THandler& handler) {
+
+ // validate by default
+ ui8 flags = ReaderConfigFlags::VALIDATE;
+
+ if (config.UseIterativeParser) {
+ flags |= ReaderConfigFlags::ITERATIVE;
+ }
+
+ if (config.AllowComments) {
+ flags |= ReaderConfigFlags::COMMENTS;
+ }
+
+ if (config.DontValidateUtf8) {
+ flags &= ~(ReaderConfigFlags::VALIDATE);
+ }
+
+ if (config.AllowEscapedApostrophe) {
+ flags |= ReaderConfigFlags::ESCAPE;
+ }
+ if (config.AllowReadNanInf) {
+ flags |= ReaderConfigFlags::NANINF;
+ }
+
+ return ReadWithRuntimeFlags(flags, reader, is, handler);
+ }
+
+ template <class TRapidJsonCompliantInputStream, class THandler>
+ bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) {
+ rapidjson::Reader reader;
+
+ auto result = Read(*config, reader, is, handler);
+
+ if (result.IsError()) {
+ if (throwOnError) {
+ ythrow TJsonException() << PrintError(result);
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ template <class TRapidJsonCompliantInputStream>
+ bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ out->SetType(NJson::NOrderedJson::JSON_NULL);
+
+ TJsonValueBuilder handler(*out, { .MaxDepth = config->MaxDepth });
+
+ return ReadJson(is, config, handler, throwOnError);
+ }
+
+ template <class TData>
+ bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in);
+ return ReadJsonTree(is, config, out, throwOnError);
+ }
+
+ template <class TData>
+ bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) {
+ TJsonReaderConfig config;
+ config.AllowComments = allowComments;
+ return ReadJsonTreeImpl(in, &config, out, throwOnError);
+ }
+
+ template <class TData>
+ bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, false, out, throwOnError);
+ }
+ } //namespace
+
+ bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(&in, out, throwOnError);
+ }
+
+ bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(&in, allowComments, out, throwOnError);
+ }
+
+ bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(&in, config, out, throwOnError);
+ }
+
+ bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, out, throwOnError);
+ }
+
+ bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, allowComments, out, throwOnError);
+ }
+
+ bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, config, out, throwOnError);
+ }
+
+ bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) {
+ TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError);
+
+ return ReadJsonFast(in, &cb);
+ }
+
+ TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) {
+ TJsonValue value;
+ // There is no way to report an error apart from throwing an exception when we return result by value.
+ ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError);
+ return value;
+ }
+
+ namespace {
+ struct TJsonCallbacksWrapper {
+ TJsonCallbacks& Impl;
+
+ TJsonCallbacksWrapper(TJsonCallbacks& impl)
+ : Impl(impl)
+ {
+ }
+
+ bool Null() {
+ return Impl.OnNull();
+ }
+
+ bool Bool(bool b) {
+ return Impl.OnBoolean(b);
+ }
+
+ template <class U>
+ bool ProcessUint(U u) {
+ if (Y_LIKELY(u <= ui64(Max<i64>()))) {
+ return Impl.OnInteger(i64(u));
+ } else {
+ return Impl.OnUInteger(u);
+ }
+ }
+
+ bool Int(int i) {
+ return Impl.OnInteger(i);
+ }
+
+ bool Uint(unsigned u) {
+ return ProcessUint(u);
+ }
+
+ bool Int64(i64 i) {
+ return Impl.OnInteger(i);
+ }
+
+ bool Uint64(ui64 u) {
+ return ProcessUint(u);
+ }
+
+ bool Double(double d) {
+ return Impl.OnDouble(d);
+ }
+
+ bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(false && "this method should never be called");
+ Y_UNUSED(str);
+ Y_UNUSED(length);
+ Y_UNUSED(copy);
+ return true;
+ }
+
+ bool String(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ return Impl.OnString(TStringBuf(str, length));
+ }
+
+ bool StartObject() {
+ return Impl.OnOpenMap();
+ }
+
+ bool Key(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ return Impl.OnMapKey(TStringBuf(str, length));
+ }
+
+ bool EndObject(rapidjson::SizeType memberCount) {
+ Y_UNUSED(memberCount);
+ return Impl.OnCloseMap();
+ }
+
+ bool StartArray() {
+ return Impl.OnOpenArray();
+ }
+
+ bool EndArray(rapidjson::SizeType elementCount) {
+ Y_UNUSED(elementCount);
+ return Impl.OnCloseArray();
+ }
+ };
+ }
+
+ bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) {
+ return ReadJson(in, false, cbs);
+ }
+
+ bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) {
+ TJsonReaderConfig config;
+ config.AllowComments = allowComments;
+ return ReadJson(in, &config, cbs);
+ }
+
+ bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) {
+ TJsonReaderConfig config;
+ config.AllowComments = allowComments;
+ config.AllowEscapedApostrophe = allowEscapedApostrophe;
+ return ReadJson(in, &config, cbs);
+ }
+
+ bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) {
+ TJsonCallbacksWrapper wrapper(*cbs);
+ TInputStreamWrapper is(*in);
+
+ rapidjson::Reader reader;
+ auto result = Read(*config, reader, is, wrapper);
+
+ if (result.IsError()) {
+ cbs->OnError(result.Offset(), PrintError(result));
+
+ return false;
+ }
+
+ return cbs->OnEnd();
+ }
+
+ TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) {
+ TJsonValue out;
+ ReadJsonTree(in, &out, throwOnError);
+ return out;
+ }
+
+ TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) {
+ TJsonValue out;
+ ReadJsonTree(in, allowComments, &out, throwOnError);
+ return out;
+ }
+
+ TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) {
+ TJsonValue out;
+ ReadJsonTree(in, config, &out, throwOnError);
+ return out;
+ }
+
+}