aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/json/json_reader.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json/json_reader.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json/json_reader.cpp')
-rw-r--r--library/cpp/json/json_reader.cpp567
1 files changed, 567 insertions, 0 deletions
diff --git a/library/cpp/json/json_reader.cpp b/library/cpp/json/json_reader.cpp
new file mode 100644
index 0000000000..072c8deafe
--- /dev/null
+++ b/library/cpp/json/json_reader.cpp
@@ -0,0 +1,567 @@
+#include "json_reader.h"
+
+#include "rapidjson_helpers.h"
+
+#include <contrib/libs/rapidjson/include/rapidjson/error/en.h>
+#include <contrib/libs/rapidjson/include/rapidjson/error/error.h>
+#include <contrib/libs/rapidjson/include/rapidjson/reader.h>
+
+#include <util/generic/stack.h>
+#include <util/string/cast.h>
+#include <util/system/yassert.h>
+#include <util/string/builder.h>
+
+namespace NJson {
+ namespace {
+ TString PrintError(const rapidjson::ParseResult& result) {
+ return TStringBuilder() << TStringBuf("Offset: ") << result.Offset()
+ << TStringBuf(", Code: ") << (int)result.Code()
+ << TStringBuf(", Error: ") << GetParseError_En(result.Code());
+ }
+ }
+
+ static const size_t DEFAULT_BUFFER_LEN = 65536;
+
+ bool TParserCallbacks::OpenComplexValue(EJsonValueType type) {
+ TJsonValue* pvalue;
+ switch (CurrentState) {
+ case START:
+ Value.SetType(type);
+ ValuesStack.push_back(&Value);
+ break;
+ case IN_ARRAY:
+ pvalue = &ValuesStack.back()->AppendValue(type);
+ ValuesStack.push_back(pvalue);
+ break;
+ case AFTER_MAP_KEY:
+ pvalue = &ValuesStack.back()->InsertValue(Key, type);
+ ValuesStack.push_back(pvalue);
+ CurrentState = IN_MAP;
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool TParserCallbacks::CloseComplexValue() {
+ if (ValuesStack.empty()) {
+ return false;
+ }
+
+ ValuesStack.pop_back();
+ if (!ValuesStack.empty()) {
+ switch (ValuesStack.back()->GetType()) {
+ case JSON_ARRAY:
+ CurrentState = IN_ARRAY;
+ break;
+ case JSON_MAP:
+ CurrentState = IN_MAP;
+ break;
+ default:
+ return false;
+ }
+ } else {
+ CurrentState = FINISH;
+ }
+ return true;
+ }
+
+ TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError)
+ : TJsonCallbacks(throwOnError)
+ , Value(value)
+ , NotClosedBracketIsError(notClosedBracketIsError)
+ , CurrentState(START)
+ {
+ }
+
+ bool TParserCallbacks::OnNull() {
+ return SetValue(JSON_NULL);
+ }
+
+ bool TParserCallbacks::OnBoolean(bool val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnInteger(long long val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnUInteger(unsigned long long val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnString(const TStringBuf& val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnDouble(double val) {
+ return SetValue(val);
+ }
+
+ bool TParserCallbacks::OnOpenArray() {
+ bool res = OpenComplexValue(JSON_ARRAY);
+ if (res)
+ CurrentState = IN_ARRAY;
+ return res;
+ }
+
+ bool TParserCallbacks::OnCloseArray() {
+ return CloseComplexValue();
+ }
+
+ bool TParserCallbacks::OnOpenMap() {
+ bool res = OpenComplexValue(JSON_MAP);
+ if (res)
+ CurrentState = IN_MAP;
+ return res;
+ }
+
+ bool TParserCallbacks::OnCloseMap() {
+ return CloseComplexValue();
+ }
+
+ bool TParserCallbacks::OnMapKey(const TStringBuf& val) {
+ switch (CurrentState) {
+ case IN_MAP:
+ Key = val;
+ CurrentState = AFTER_MAP_KEY;
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool TParserCallbacks::OnEnd() {
+ if (NotClosedBracketIsError){
+ return ValuesStack.empty();
+ }
+ return true;
+ }
+
+ TJsonReaderConfig::TJsonReaderConfig()
+ : BufferSize(DEFAULT_BUFFER_LEN)
+ {
+ }
+
+ void TJsonReaderConfig::SetBufferSize(size_t bufferSize) {
+ BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN));
+ }
+
+ size_t TJsonReaderConfig::GetBufferSize() const {
+ return BufferSize;
+ }
+
+ namespace {
+ struct TJsonValueBuilder {
+#ifdef NDEBUG
+ using TItem = TJsonValue*;
+
+ inline TJsonValue& Access(TItem& item) const {
+ return *item;
+ }
+#else
+ struct TItem {
+ TJsonValue* V;
+ size_t DuplicateKeyCount;
+
+ TItem(TJsonValue* v)
+ : V(v)
+ , DuplicateKeyCount(0)
+ {
+ }
+ };
+
+ inline TJsonValue& Access(TItem& item) const {
+ return *item.V;
+ }
+#endif
+
+ NJson::TJsonValue& V;
+
+ TStack<TItem> S;
+
+ TJsonValueBuilder(NJson::TJsonValue& v)
+ : V(v)
+ {
+ S.emplace(&V);
+ }
+
+ template <class T>
+ void Set(const T& t) {
+ if (Access(S.top()).IsArray()) {
+ Access(S.top()).AppendValue(t);
+ } else {
+ Access(S.top()) = t;
+ S.pop();
+ }
+ }
+
+ bool Null() {
+ Set(NJson::JSON_NULL);
+ return true;
+ }
+
+ bool Bool(bool b) {
+ Set(b);
+ return true;
+ }
+
+ bool Int(int i) {
+ Set(i);
+ return true;
+ }
+
+ template <class U>
+ bool ProcessUint(U u) {
+ if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) {
+ Set(i64(u));
+ } else {
+ Set(u);
+ }
+ return true;
+ }
+
+ bool Uint(unsigned u) {
+ return ProcessUint(u);
+ }
+
+ bool Int64(i64 i) {
+ Set(i);
+ return true;
+ }
+
+ bool Uint64(ui64 u) {
+ return ProcessUint(u);
+ }
+
+ bool Double(double d) {
+ Set(d);
+ return true;
+ }
+
+ bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(false && "this method should never be called");
+ Y_UNUSED(str);
+ Y_UNUSED(length);
+ Y_UNUSED(copy);
+ return true;
+ }
+
+ bool String(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ Set(TStringBuf(str, length));
+ return true;
+ }
+
+ bool StartObject() {
+ if (Access(S.top()).IsArray()) {
+ S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP));
+ } else {
+ Access(S.top()).SetType(NJson::JSON_MAP);
+ }
+ return true;
+ }
+
+ bool Key(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ auto& value = Access(S.top())[TStringBuf(str, length)];
+ if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) {
+#ifndef NDEBUG
+ ++S.top().DuplicateKeyCount;
+#endif
+ value.SetType(JSON_UNDEFINED);
+ }
+ S.emplace(&value);
+ return true;
+ }
+
+ inline int GetDuplicateKeyCount() const {
+#ifdef NDEBUG
+ return 0;
+#else
+ return S.top().DuplicateKeyCount;
+#endif
+ }
+
+ bool EndObject(rapidjson::SizeType memberCount) {
+ Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount());
+ S.pop();
+ return true;
+ }
+
+ bool StartArray() {
+ if (Access(S.top()).IsArray()) {
+ S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY));
+ } else {
+ Access(S.top()).SetType(NJson::JSON_ARRAY);
+ }
+ return true;
+ }
+
+ bool EndArray(rapidjson::SizeType elementCount) {
+ Y_ASSERT(elementCount == Access(S.top()).GetArray().size());
+ S.pop();
+ return true;
+ }
+ };
+
+ template <class TRapidJsonCompliantInputStream, class THandler>
+ auto Read(const TJsonReaderConfig& config,
+ rapidjson::Reader& reader,
+ TRapidJsonCompliantInputStream& is,
+ THandler& handler) {
+
+ ui8 flags = ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE;
+
+ if (config.AllowComments) {
+ flags |= ReaderConfigFlags::COMMENTS;
+ }
+
+ if (config.DontValidateUtf8) {
+ flags &= ~(ReaderConfigFlags::VALIDATE);
+ }
+
+ if (config.AllowEscapedApostrophe) {
+ flags |= ReaderConfigFlags::ESCAPE;
+ }
+
+ switch (flags) {
+ case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_NOESCAPE:
+ return reader.Parse<rapidjson::kParseCommentsFlag>(is, handler);
+ case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_NOESCAPE:
+ return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag>(is, handler);
+ case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_ESCAPE:
+ return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
+ case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_ESCAPE:
+ return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
+ case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE:
+ return reader.Parse<rapidjson::kParseValidateEncodingFlag>(is, handler);
+ case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_ESCAPE:
+ return reader.Parse<rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler);
+ case ReaderConfigToRapidJsonFlags::NOCOMMENTS_NOVALID_ESCAPE:
+ return reader.Parse<rapidjson::kParseEscapedApostropheFlag>(is, handler);
+ default:
+ return reader.Parse<rapidjson::kParseNoFlags>(is, handler);
+ }
+ }
+
+ template <class TRapidJsonCompliantInputStream, class THandler>
+ bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) {
+ rapidjson::Reader reader;
+
+ auto result = Read(*config, reader, is, handler);
+
+ if (result.IsError()) {
+ if (throwOnError) {
+ ythrow TJsonException() << PrintError(result);
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ template <class TRapidJsonCompliantInputStream>
+ bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ out->SetType(NJson::JSON_NULL);
+
+ TJsonValueBuilder handler(*out);
+
+ return ReadJson(is, config, handler, throwOnError);
+ }
+
+ template <class TData>
+ bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in);
+ return ReadJsonTree(is, config, out, throwOnError);
+ }
+
+ template <class TData>
+ bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) {
+ TJsonReaderConfig config;
+ config.AllowComments = allowComments;
+ return ReadJsonTreeImpl(in, &config, out, throwOnError);
+ }
+
+ template <class TData>
+ bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, false, out, throwOnError);
+ }
+ } //namespace
+
+ bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(&in, out, throwOnError);
+ }
+
+ bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(&in, allowComments, out, throwOnError);
+ }
+
+ bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(&in, config, out, throwOnError);
+ }
+
+ bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, out, throwOnError);
+ }
+
+ bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, allowComments, out, throwOnError);
+ }
+
+ bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) {
+ return ReadJsonTreeImpl(in, config, out, throwOnError);
+ }
+
+ bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) {
+ TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError);
+
+ return ReadJsonFast(in, &cb);
+ }
+
+ TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) {
+ TJsonValue value;
+ // There is no way to report an error apart from throwing an exception when we return result by value.
+ ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError);
+ return value;
+ }
+
+ namespace {
+ struct TJsonCallbacksWrapper {
+ TJsonCallbacks& Impl;
+
+ TJsonCallbacksWrapper(TJsonCallbacks& impl)
+ : Impl(impl)
+ {
+ }
+
+ bool Null() {
+ return Impl.OnNull();
+ }
+
+ bool Bool(bool b) {
+ return Impl.OnBoolean(b);
+ }
+
+ template <class U>
+ bool ProcessUint(U u) {
+ if (Y_LIKELY(u <= ui64(Max<i64>()))) {
+ return Impl.OnInteger(i64(u));
+ } else {
+ return Impl.OnUInteger(u);
+ }
+ }
+
+ bool Int(int i) {
+ return Impl.OnInteger(i);
+ }
+
+ bool Uint(unsigned u) {
+ return ProcessUint(u);
+ }
+
+ bool Int64(i64 i) {
+ return Impl.OnInteger(i);
+ }
+
+ bool Uint64(ui64 u) {
+ return ProcessUint(u);
+ }
+
+ bool Double(double d) {
+ return Impl.OnDouble(d);
+ }
+
+ bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(false && "this method should never be called");
+ Y_UNUSED(str);
+ Y_UNUSED(length);
+ Y_UNUSED(copy);
+ return true;
+ }
+
+ bool String(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ return Impl.OnString(TStringBuf(str, length));
+ }
+
+ bool StartObject() {
+ return Impl.OnOpenMap();
+ }
+
+ bool Key(const char* str, rapidjson::SizeType length, bool copy) {
+ Y_ASSERT(copy);
+ return Impl.OnMapKey(TStringBuf(str, length));
+ }
+
+ bool EndObject(rapidjson::SizeType memberCount) {
+ Y_UNUSED(memberCount);
+ return Impl.OnCloseMap();
+ }
+
+ bool StartArray() {
+ return Impl.OnOpenArray();
+ }
+
+ bool EndArray(rapidjson::SizeType elementCount) {
+ Y_UNUSED(elementCount);
+ return Impl.OnCloseArray();
+ }
+ };
+ }
+
+ bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) {
+ return ReadJson(in, false, cbs);
+ }
+
+ bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) {
+ TJsonReaderConfig config;
+ config.AllowComments = allowComments;
+ return ReadJson(in, &config, cbs);
+ }
+
+ bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) {
+ TJsonReaderConfig config;
+ config.AllowComments = allowComments;
+ config.AllowEscapedApostrophe = allowEscapedApostrophe;
+ return ReadJson(in, &config, cbs);
+ }
+
+ bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) {
+ TJsonCallbacksWrapper wrapper(*cbs);
+ TInputStreamWrapper is(*in);
+
+ rapidjson::Reader reader;
+ auto result = Read(*config, reader, is, wrapper);
+
+ if (result.IsError()) {
+ cbs->OnError(result.Offset(), PrintError(result));
+
+ return false;
+ }
+
+ return cbs->OnEnd();
+ }
+
+ TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) {
+ TJsonValue out;
+ ReadJsonTree(in, &out, throwOnError);
+ return out;
+ }
+
+ TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) {
+ TJsonValue out;
+ ReadJsonTree(in, allowComments, &out, throwOnError);
+ return out;
+ }
+
+ TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) {
+ TJsonValue out;
+ ReadJsonTree(in, config, &out, throwOnError);
+ return out;
+ }
+
+}