diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json')
50 files changed, 7668 insertions, 0 deletions
diff --git a/library/cpp/json/common/defs.cpp b/library/cpp/json/common/defs.cpp new file mode 100644 index 0000000000..da86da82e4 --- /dev/null +++ b/library/cpp/json/common/defs.cpp @@ -0,0 +1,68 @@ +#include "defs.h" + +using namespace NJson; + +TJsonCallbacks::~TJsonCallbacks() { +} + +bool TJsonCallbacks::OnNull() { + return true; +} + +bool TJsonCallbacks::OnBoolean(bool) { + return true; +} + +bool TJsonCallbacks::OnInteger(long long) { + return true; +} + +bool TJsonCallbacks::OnUInteger(unsigned long long) { + return true; +} + +bool TJsonCallbacks::OnDouble(double) { + return true; +} + +bool TJsonCallbacks::OnString(const TStringBuf&) { + return true; +} + +bool TJsonCallbacks::OnOpenMap() { + return true; +} + +bool TJsonCallbacks::OnMapKey(const TStringBuf&) { + return true; +} + +bool TJsonCallbacks::OnCloseMap() { + return true; +} + +bool TJsonCallbacks::OnOpenArray() { + return true; +} + +bool TJsonCallbacks::OnCloseArray() { + return true; +} + +bool TJsonCallbacks::OnStringNoCopy(const TStringBuf& s) { + return OnString(s); +} + +bool TJsonCallbacks::OnMapKeyNoCopy(const TStringBuf& s) { + return OnMapKey(s); +} + +bool TJsonCallbacks::OnEnd() { + return true; +} + +void TJsonCallbacks::OnError(size_t off, TStringBuf reason) { + if (ThrowException) { + ythrow TJsonException() << "JSON error at offset " << off << " (" << reason << ")"; + } +} diff --git a/library/cpp/json/common/defs.h b/library/cpp/json/common/defs.h new file mode 100644 index 0000000000..d3c8761bcc --- /dev/null +++ b/library/cpp/json/common/defs.h @@ -0,0 +1,38 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> + +namespace NJson { + class TJsonException: public yexception { + }; + + class TJsonCallbacks { + public: + explicit TJsonCallbacks(bool throwException = false) + : ThrowException(throwException) + { + } + + virtual ~TJsonCallbacks(); + + virtual bool OnNull(); + virtual bool OnBoolean(bool); + virtual bool OnInteger(long long); + virtual bool OnUInteger(unsigned long long); + virtual bool OnDouble(double); + virtual bool OnString(const TStringBuf&); + virtual bool OnOpenMap(); + virtual bool OnMapKey(const TStringBuf&); + virtual bool OnCloseMap(); + virtual bool OnOpenArray(); + virtual bool OnCloseArray(); + virtual bool OnStringNoCopy(const TStringBuf& s); + virtual bool OnMapKeyNoCopy(const TStringBuf& s); + virtual bool OnEnd(); + virtual void OnError(size_t off, TStringBuf reason); + + protected: + bool ThrowException; + }; +} diff --git a/library/cpp/json/common/ya.make b/library/cpp/json/common/ya.make new file mode 100644 index 0000000000..5bbd3b0792 --- /dev/null +++ b/library/cpp/json/common/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(pg) + +SRCS( + defs.cpp +) + +END() diff --git a/library/cpp/json/domscheme_traits.h b/library/cpp/json/domscheme_traits.h new file mode 100644 index 0000000000..a5a99cd8cf --- /dev/null +++ b/library/cpp/json/domscheme_traits.h @@ -0,0 +1,216 @@ +#pragma once + +#include "json_value.h" +#include "json_reader.h" +#include "json_writer.h" +#include <util/generic/algorithm.h> + +struct TJsonTraits { + using TValue = NJson::TJsonValue; + using TValueRef = TValue*; + using TConstValueRef = const TValue*; + using TStringType = TStringBuf; + + // anyvalue defaults + template <class T> + static inline TValue Value(T&& t) { + return TValue(std::forward<T>(t)); + } + + template <class T> + static inline TValue Value(std::initializer_list<T> t) { + TValue result(NJson::JSON_ARRAY); + result.GetArraySafe() = NJson::TJsonValue::TArray(t.begin(), t.end()); + return result; + } + + static inline TValueRef Ref(TValue& v) { + return &v; + } + + static inline TConstValueRef Ref(const TValue& v) { + return &v; + } + + // common ops + static inline bool IsNull(TConstValueRef v) { + return v->GetType() == NJson::JSON_UNDEFINED || v->IsNull(); + } + + static inline TString ToJson(TConstValueRef v) { + return NJson::WriteJson(v, false); + } + + // struct ops + static inline TValueRef GetField(TValueRef v, const TStringBuf& name) { + return &(*v)[name]; + } + + static inline TConstValueRef GetField(TConstValueRef v, const TStringBuf& name) { + return &(*v)[name]; + } + + // array ops + static bool IsArray(TConstValueRef v) { + return v->IsArray(); + } + + static inline void ArrayClear(TValueRef v) { + v->SetType(NJson::JSON_NULL); + v->SetType(NJson::JSON_ARRAY); + } + + using TArrayIterator = size_t; + + static inline TValueRef ArrayElement(TValueRef v, TArrayIterator n) { + return &(*v)[n]; + } + + static inline TConstValueRef ArrayElement(TConstValueRef v, TArrayIterator n) { + return &(*v)[n]; + } + + static inline size_t ArraySize(TConstValueRef v) { + return v->GetArray().size(); + } + + static inline TArrayIterator ArrayBegin(TConstValueRef) { + return 0; + } + + static inline TArrayIterator ArrayEnd(TConstValueRef v) { + return ArraySize(v); + } + + // dict ops + static bool IsDict(TConstValueRef v) { + return v->IsMap(); + } + + static inline void DictClear(TValueRef v) { + v->SetType(NJson::JSON_NULL); + v->SetType(NJson::JSON_MAP); + } + + static inline TValueRef DictElement(TValueRef v, TStringBuf key) { + return &(*v)[key]; + } + + static inline TConstValueRef DictElement(TConstValueRef v, TStringBuf key) { + return &(*v)[key]; + } + + static inline size_t DictSize(TConstValueRef v) { + return v->GetMap().size(); + } + + using TDictIterator = NJson::TJsonValue::TMapType::const_iterator; + + static inline TDictIterator DictBegin(TConstValueRef v) { + return v->GetMap().begin(); + } + + static inline TDictIterator DictEnd(TConstValueRef v) { + return v->GetMap().end(); + } + + static inline TStringBuf DictIteratorKey(TConstValueRef /*dict*/, const TDictIterator& it) { + return it->first; + } + + static inline TConstValueRef DictIteratorValue(TConstValueRef /*dict*/, const TDictIterator& it) { + return &it->second; + } + + // boolean ops + static inline void Get(TConstValueRef v, bool def, bool& b) { + b = + v->GetType() == NJson::JSON_UNDEFINED ? def : v->IsNull() ? def : v->GetBooleanRobust(); + } + + static inline void Get(TConstValueRef v, bool& b) { + Get(v, false, b); + } + + static inline bool IsValidPrimitive(const bool&, TConstValueRef v) { + return v->IsBoolean(); + } + +#define INTEGER_OPS(type, checkOp, getOp) \ + static inline void Get(TConstValueRef v, type def, type& i) { \ + i = v->checkOp() ? v->getOp() : def; \ + } \ + static inline void Get(TConstValueRef v, type& i) { \ + i = v->getOp(); \ + } \ + static inline bool IsValidPrimitive(const type&, TConstValueRef v) { \ + return v->checkOp() && v->getOp() >= Min<type>() && v->getOp() <= Max<type>(); \ + } + + INTEGER_OPS(i8, IsInteger, GetInteger) + INTEGER_OPS(i16, IsInteger, GetInteger) + INTEGER_OPS(i32, IsInteger, GetInteger) + INTEGER_OPS(i64, IsInteger, GetInteger) + INTEGER_OPS(ui8, IsUInteger, GetUInteger) + INTEGER_OPS(ui16, IsUInteger, GetUInteger) + INTEGER_OPS(ui32, IsUInteger, GetUInteger) + INTEGER_OPS(ui64, IsUInteger, GetUInteger) + +#undef INTEGER_OPS + + // double ops + static inline bool Get(TConstValueRef v, double def, double& d) { + if (v->IsDouble()) { + d = v->GetDouble(); + return true; + } + d = def; + return false; + } + + static inline void Get(TConstValueRef v, double& d) { + d = v->GetDouble(); + } + + static inline bool IsValidPrimitive(const double&, TConstValueRef v) { + return v->IsDouble(); + } + + // string ops + static inline void Get(TConstValueRef v, TStringBuf def, TStringBuf& s) { + s = v->IsString() ? v->GetString() : def; + } + + static inline void Get(TConstValueRef v, TStringBuf& s) { + s = v->GetString(); + } + + static inline bool IsValidPrimitive(const TStringBuf&, TConstValueRef v) { + return v->IsString(); + } + + // generic set + template <class T> + static inline void Set(TValueRef v, T&& t) { + v->SetValue(t); + } + + static inline void Clear(TValueRef v) { + v->SetType(NJson::JSON_NULL); + } + + // validation ops + static inline TVector<TString> GetKeys(TConstValueRef v) { + TVector<TString> res; + for (const auto& it : v->GetMap()) { + res.push_back(it.first); + } + Sort(res.begin(), res.end()); + return res; + } + + template <typename T> + static inline bool IsValidPrimitive(const T&, TConstValueRef) { + return false; + } +}; diff --git a/library/cpp/json/easy_parse/json_easy_parser.cpp b/library/cpp/json/easy_parse/json_easy_parser.cpp new file mode 100644 index 0000000000..3c781f544b --- /dev/null +++ b/library/cpp/json/easy_parse/json_easy_parser.cpp @@ -0,0 +1,236 @@ +#include "json_easy_parser.h" +#include <library/cpp/json/json_reader.h> +#include <util/string/cast.h> +#include <util/string/split.h> +#include <util/string/strip.h> + +namespace NJson { + static TString MAP_IDENTIFIER = "{}"; + static TString ARRAY_IDENTIFIER = "[]"; + static TString ANY_IDENTIFIER = "*"; + + static void ParsePath(TString path, TVector<TPathElem>* res) { + TVector<const char*> parts; + Split(path.begin(), '/', &parts); + for (size_t n = 0; n < parts.size(); ++n) { + TString part = Strip(parts[n]); + if (!part.empty()) { + if (part[0] != '[') { + res->push_back(TPathElem(NImpl::MAP)); + res->push_back(TPathElem(part)); + } else { + int arrayCounter; + try { + arrayCounter = FromString<int>(part.substr(1, part.length() - 2)); + } catch (yexception&) { + arrayCounter = -1; + } + res->push_back(TPathElem(arrayCounter)); + } + } + } + } + + void TJsonParser::AddField(const TString& path, bool nonEmpty) { + Fields.emplace_back(); + Fields.back().NonEmpty = nonEmpty; + ParsePath(path, &Fields.back().Path); + } + + TString TJsonParser::ConvertToTabDelimited(const TString& json) const { + TStringInput in(json); + TStringStream out; + ConvertToTabDelimited(in, out); + return out.Str(); + } + + class TRewriteJsonImpl: public NJson::TJsonCallbacks { + const TJsonParser& Parent; + TVector<TString> FieldValues; + TVector<TPathElem> Stack; + bool ShouldUpdateOnArrayChange; + int CurrentFieldIdx; + bool HasFormatError; + + private: + static bool PathElementMatch(const TPathElem& templ, const TPathElem& real) { + if (templ.Type != real.Type) + return false; + if (templ.Type == NImpl::ARRAY) + return templ.ArrayCounter == -1 || templ.ArrayCounter == real.ArrayCounter; + if (templ.Type == NImpl::MAP_KEY) + return templ.Key == ANY_IDENTIFIER || templ.Key == real.Key; + return true; + } + + bool CheckFilter(const TVector<TPathElem>& path) const { + if (Stack.size() < path.size()) + return false; + for (size_t n = 0; n < path.size(); ++n) { + if (!PathElementMatch(path[n], Stack[n])) + return false; + } + return true; + } + + void UpdateRule() { + for (size_t n = 0; n < Parent.Fields.size(); ++n) { + if (FieldValues[n].empty() && CheckFilter(Parent.Fields[n].Path)) { + CurrentFieldIdx = n; + return; + } + } + CurrentFieldIdx = -1; + } + + void Pop() { + Stack.pop_back(); + } + + void IncreaseArrayCounter() { + if (!Stack.empty() && Stack.back().Type == NImpl::ARRAY) { + ++Stack.back().ArrayCounter; + if (ShouldUpdateOnArrayChange) + UpdateRule(); + } + } + + template <class T> + bool OnValue(const T& val) { + IncreaseArrayCounter(); + if (CurrentFieldIdx >= 0) { + FieldValues[CurrentFieldIdx] = ToString(val); + UpdateRule(); + } + return true; + } + + public: + TRewriteJsonImpl(const TJsonParser& parent) + : Parent(parent) + , FieldValues(parent.Fields.size()) + , ShouldUpdateOnArrayChange(false) + , CurrentFieldIdx(-1) + , HasFormatError(false) + { + for (size_t n = 0; n < Parent.Fields.size(); ++n) { + if (!Parent.Fields[n].Path.empty() && Parent.Fields[n].Path.back().Type == NImpl::ARRAY) + ShouldUpdateOnArrayChange = true; + } + } + + bool OnOpenMap() override { + IncreaseArrayCounter(); + Stack.push_back(TPathElem(NImpl::MAP)); + if (CurrentFieldIdx >= 0) + HasFormatError = true; + else + UpdateRule(); + return true; + } + + bool OnOpenArray() override { + IncreaseArrayCounter(); + Stack.push_back(TPathElem(-1)); + if (CurrentFieldIdx >= 0) + HasFormatError = true; + else + UpdateRule(); + return true; + } + + bool OnCloseMap() override { + while (!Stack.empty() && Stack.back().Type != NImpl::MAP) + Pop(); + if (!Stack.empty()) + Pop(); + UpdateRule(); + return true; + } + + bool OnCloseArray() override { + if (!Stack.empty()) + Pop(); + UpdateRule(); + return true; + } + + bool OnMapKey(const TStringBuf& key) override { + if (!Stack.empty() && Stack.back().Type == NImpl::MAP_KEY) { + Pop(); + UpdateRule(); + } + Stack.push_back(TPathElem(TString{key})); + if (CurrentFieldIdx >= 0) + HasFormatError = true; + else + UpdateRule(); + return true; + } + + bool OnBoolean(bool b) override { + return OnValue(b); + } + + bool OnInteger(long long i) override { + return OnValue(i); + } + + bool OnDouble(double f) override { + return OnValue(f); + } + + bool OnString(const TStringBuf& str) override { + return OnValue(str); + } + + bool IsOK() const { + if (HasFormatError) + return false; + for (size_t n = 0; n < FieldValues.size(); ++n) + if (Parent.Fields[n].NonEmpty && FieldValues[n].empty()) + return false; + return true; + } + + void WriteTo(IOutputStream& out) const { + for (size_t n = 0; n < FieldValues.size(); ++n) + out << "\t" << FieldValues[n]; + } + + void WriteTo(TVector<TString>* res) const { + *res = FieldValues; + } + }; + + void TJsonParser::ConvertToTabDelimited(IInputStream& in, IOutputStream& out) const { + TRewriteJsonImpl impl(*this); + ReadJson(&in, &impl); + if (impl.IsOK()) { + out << Prefix; + impl.WriteTo(out); + out.Flush(); + } + } + + bool TJsonParser::Parse(const TString& json, TVector<TString>* res) const { + TRewriteJsonImpl impl(*this); + TStringInput in(json); + ReadJson(&in, &impl); + if (impl.IsOK()) { + impl.WriteTo(res); + return true; + } else + return false; + } + + //struct TTestMe { + // TTestMe() { + // TJsonParser worker; + // worker.AddField("/x/y/z", true); + // TString ret1 = worker.ConvertToTabDelimited("{ \"x\" : { \"y\" : { \"w\" : 1, \"z\" : 2 } } }"); + // TString ret2 = worker.ConvertToTabDelimited(" [1, 2, 3, 4, 5] "); + // } + //} testMe; + +} diff --git a/library/cpp/json/easy_parse/json_easy_parser.h b/library/cpp/json/easy_parse/json_easy_parser.h new file mode 100644 index 0000000000..59d7791ab1 --- /dev/null +++ b/library/cpp/json/easy_parse/json_easy_parser.h @@ -0,0 +1,46 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/input.h> +#include <util/stream/output.h> +#include "json_easy_parser_impl.h" + +namespace NJson { + /* This class filters out nodes from a source JSON by a xpath-style description. It represent these nodes as a tab-delimited string (or a vector). + * It is useful if you need to parse a data which comes into JSON in a known and fixed format. + * Fields are set as a list of keys separated by slash, for example: + * Field x/y/z in JSON { "x" : { "y" : { "w" : 1, "z" : 2 } } contains number 2. + * In a path to a field you can also provide a special array identifier "[]", identifier of a particular field in an array (for example "[4]") or wildcard "*". + * + * The parser of the class supports parsing of several fields. Each of them could be marked as mandatory or as optional. + * If a mandatory field is not found in JSON, then Parse() returns false and ConvertToTabDelimited() returns an empty string. + * If an optional field is not found in JSON, then it's value in Parse()/ConvertToTabDelimited() is an empty string. + * In particular ConvertToTabDelimited() always returns either an empty string, or a string of the same number of tab-delimited fields starting from the same Prefix. + * + * NB! Library can not extract values of not a simple type (namely it doesn't support the case when a result is a vocabulary or an array) from JSON. + * If you expect such a case, please check json_value.h. + */ + + class TJsonParser { + TString Prefix; + + struct TField { + TVector<TPathElem> Path; + bool NonEmpty; + }; + TVector<TField> Fields; + + friend class TRewriteJsonImpl; + + void ConvertToTabDelimited(IInputStream& in, IOutputStream& out) const; + + public: + void SetPrefix(const TString& prefix) { + Prefix = prefix; + } + void AddField(const TString& path, bool mustExist); + TString ConvertToTabDelimited(const TString& json) const; + bool Parse(const TString& json, TVector<TString>* res) const; + }; +} diff --git a/library/cpp/json/easy_parse/json_easy_parser_impl.h b/library/cpp/json/easy_parse/json_easy_parser_impl.h new file mode 100644 index 0000000000..ec55d838b3 --- /dev/null +++ b/library/cpp/json/easy_parse/json_easy_parser_impl.h @@ -0,0 +1,40 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NJson { + namespace NImpl { + enum EType { + ARRAY, + MAP, + MAP_KEY + }; + } + template <class TStringType> + struct TPathElemImpl { + NImpl::EType Type; + TStringType Key; + int ArrayCounter; + + TPathElemImpl(NImpl::EType type) + : Type(type) + , ArrayCounter() + { + } + + TPathElemImpl(const TStringType& key) + : Type(NImpl::MAP_KEY) + , Key(key) + , ArrayCounter() + { + } + + TPathElemImpl(int arrayCounter) + : Type(NImpl::ARRAY) + , ArrayCounter(arrayCounter) + { + } + }; + + typedef TPathElemImpl<TString> TPathElem; +} diff --git a/library/cpp/json/easy_parse/ya.make b/library/cpp/json/easy_parse/ya.make new file mode 100644 index 0000000000..2304c542f2 --- /dev/null +++ b/library/cpp/json/easy_parse/ya.make @@ -0,0 +1,13 @@ +OWNER(finder) + +LIBRARY() + +SRCS( + json_easy_parser.cpp +) + +PEERDIR( + library/cpp/json +) + +END() diff --git a/library/cpp/json/fast_sax/parser.h b/library/cpp/json/fast_sax/parser.h new file mode 100644 index 0000000000..b5f031dd9e --- /dev/null +++ b/library/cpp/json/fast_sax/parser.h @@ -0,0 +1,13 @@ +#pragma once + +#include <library/cpp/json/common/defs.h> + +namespace NJson { + bool ReadJsonFast(TStringBuf in, TJsonCallbacks* callbacks); + + inline bool ValidateJsonFast(TStringBuf in, bool throwOnError = false) { + Y_ASSERT(false); // this method is broken, see details in IGNIETFERRO-1243. Use NJson::ValidateJson instead, or fix this one before using + TJsonCallbacks c(throwOnError); + return ReadJsonFast(in, &c); + } +} diff --git a/library/cpp/json/fast_sax/parser.rl6 b/library/cpp/json/fast_sax/parser.rl6 new file mode 100644 index 0000000000..edb4e9ee1b --- /dev/null +++ b/library/cpp/json/fast_sax/parser.rl6 @@ -0,0 +1,314 @@ +#include <library/cpp/json/fast_sax/unescape.h> +#include <library/cpp/json/fast_sax/parser.h> + +#include <util/string/cast.h> +#include <util/generic/buffer.h> +#include <util/generic/strbuf.h> +#include <util/generic/ymath.h> + +namespace NJson { + +enum EStoredStr { + SS_NONE = 0, SS_NOCOPY, SS_MUSTCOPY +}; + +struct TParserCtx { + TJsonCallbacks& Hndl; + + TBuffer Buffer; + TStringBuf String; + EStoredStr Stored = SS_NONE; + bool ExpectValue = true; + + const char* p0 = nullptr; + const char* p = nullptr; + const char* pe = nullptr; + const char* eof = nullptr; + const char* ts = nullptr; + const char* te = nullptr; + int cs = 0; + int act = 0; + + TParserCtx(TJsonCallbacks& h, TStringBuf data) + : Hndl(h) + , p0(data.data()) + , p(data.data()) + , pe(data.end()) + , eof(data.end()) + {} + + static inline bool GoodPtrs(const char* b, const char* e) { + return b && e && b <= e; + } + + bool OnError(TStringBuf reason = TStringBuf(""), bool end = false) const { + size_t off = 0; + TStringBuf token; + + if (GoodPtrs(p0, ts)) { + off = ts - p0; + } else if (end && GoodPtrs(p0, pe)) { + off = pe - p0; + } + + if (GoodPtrs(ts, te)) { + token = TStringBuf(ts, te); + } + + if (!token) { + Hndl.OnError(off, reason); + } else { + Hndl.OnError(off, TString::Join(reason, " at token: '", token, "'")); + } + + return false; + } + + bool OnVal() { + if (Y_UNLIKELY(!ExpectValue)) { + return false; + } + ExpectValue = false; + return true; + } + + bool OnNull() { + return Y_LIKELY(OnVal()) + && Hndl.OnNull(); + } + + bool OnTrue() { + return Y_LIKELY(OnVal()) + && Hndl.OnBoolean(true); + } + + bool OnFalse() { + return Y_LIKELY(OnVal()) + && Hndl.OnBoolean(false); + } + + bool OnPInt() { + unsigned long long res = 0; + return Y_LIKELY(OnVal()) + && TryFromString<unsigned long long>(TStringBuf(ts, te), res) + && Hndl.OnUInteger(res); + } + + bool OnNInt() { + long long res = 0; + return Y_LIKELY(OnVal()) + && TryFromString<long long>(TStringBuf(ts, te), res) + && Hndl.OnInteger(res); + } + + bool OnFlt() { + double res = 0; + return Y_LIKELY(OnVal()) + && TryFromString<double>(TStringBuf(ts, te), res) + && IsFinite(res) + && Hndl.OnDouble(res); + } + + bool OnMapOpen() { + bool res = Y_LIKELY(OnVal()) + && Hndl.OnOpenMap(); + ExpectValue = true; + return res; + } + + bool OnArrOpen() { + bool res = Y_LIKELY(OnVal()) + && Hndl.OnOpenArray(); + ExpectValue = true; + return res; + } + + bool OnString(TStringBuf s, EStoredStr t) { + if (Y_LIKELY(OnVal())) { + String = s; + Stored = t; + return true; + } else { + return false; + } + } + + bool OnStrU() { + return OnString(TStringBuf(ts, te), SS_NOCOPY); + } + + bool OnStrQ() { + return OnString(TStringBuf(ts + 1, te - 1), SS_NOCOPY); + } + + bool OnStrE() { + Buffer.Clear(); + Buffer.Reserve(2 * (te - ts)); + + return OnString(UnescapeJsonUnicode(TStringBuf(ts + 1, te - ts - 2), Buffer.data()), SS_MUSTCOPY); + } + + bool OnMapClose() { + ExpectValue = false; + return Y_LIKELY(OnAfterVal()) + && Hndl.OnCloseMap(); + } + + bool OnArrClose() { + ExpectValue = false; + return Y_LIKELY(OnAfterVal()) + && Hndl.OnCloseArray(); + } + + bool OnColon() { + if (ExpectValue) { + return false; + } + + ExpectValue = true; + const auto stored = Stored; + Stored = SS_NONE; + + switch (stored) { + default: + return false; + case SS_NOCOPY: + return Hndl.OnMapKeyNoCopy(String); + case SS_MUSTCOPY: + return Hndl.OnMapKey(String); + } + } + + bool OnAfterVal() { + const auto stored = Stored; + Stored = SS_NONE; + + switch (stored) { + default: + return true; + case SS_NOCOPY: + return Hndl.OnStringNoCopy(String); + case SS_MUSTCOPY: + return Hndl.OnString(String); + } + } + + bool OnComma() { + if (Y_UNLIKELY(ExpectValue)) { + return false; + } + ExpectValue = true; + return OnAfterVal(); + } + + bool Parse(); +}; + +#if 0 +%%{ +machine fastjson; + +alphtype char; + +action OnNull { if (Y_UNLIKELY(!OnNull())) goto TOKEN_ERROR; } +action OnTrue { if (Y_UNLIKELY(!OnTrue())) goto TOKEN_ERROR; } +action OnFalse { if (Y_UNLIKELY(!OnFalse())) goto TOKEN_ERROR; } +action OnPInt { if (Y_UNLIKELY(!OnPInt())) goto TOKEN_ERROR; } +action OnNInt { if (Y_UNLIKELY(!OnNInt())) goto TOKEN_ERROR; } +action OnFlt { if (Y_UNLIKELY(!OnFlt())) goto TOKEN_ERROR; } +action OnStrU { if (Y_UNLIKELY(!OnStrU())) goto TOKEN_ERROR; } +action OnStrQ { if (Y_UNLIKELY(!OnStrQ())) goto TOKEN_ERROR; } +action OnStrE { if (Y_UNLIKELY(!OnStrE())) goto TOKEN_ERROR; } +action OnDictO { if (Y_UNLIKELY(!OnMapOpen())) goto TOKEN_ERROR; } +action OnDictC { if (Y_UNLIKELY(!OnMapClose())) goto TOKEN_ERROR; } +action OnArrO { if (Y_UNLIKELY(!OnArrOpen())) goto TOKEN_ERROR; } +action OnArrC { if (Y_UNLIKELY(!OnArrClose())) goto TOKEN_ERROR; } +action OnComma { if (Y_UNLIKELY(!OnComma())) goto TOKEN_ERROR; } +action OnColon { if (Y_UNLIKELY(!OnColon())) goto TOKEN_ERROR; } +action OnError { goto TOKEN_ERROR; } + +comment1 = "/*" (any* -- "*/") "*/"; + +pint = [0-9]+; +nint = '-'[0-9]+; +flt = '-'?[0-9.][0-9.eE+\-]+; + +uchar0 = [a-zA-Z_@$] | (0x80 .. 0xFF); +uchar = uchar0 | digit | [.\-]; + +qchar = [^'\\]; #'; +dchar = [^"\\]; #"; + +echar = "\\" any; + +qechar = qchar | echar; +dechar = dchar | echar; + +strq = "'" qchar* "'"; +strd = '"' dchar* '"'; + +strqe = "'" qechar* "'"; +strde = '"' dechar* '"'; + +strU = uchar0 uchar*; +strQ = strq | strd; +strE = strqe | strde; + +ws = (0x00 .. 0x20) | 0x7F; +sp = ws+; + +main := |* + 'null' => OnNull; + 'true' => OnTrue; + 'false' => OnFalse; + + pint => OnPInt; + nint => OnNInt; + flt => OnFlt; + + strU => OnStrU; + strQ => OnStrQ; + strE => OnStrE; + + ',' => OnComma; + ':' => OnColon; + + '{' => OnDictO; + '}' => OnDictC; + '[' => OnArrO; + ']' => OnArrC; + + sp; + comment1; + + (flt | pint | nint) (any - (ws | ',' | ':' | '{' | '}' | '[' | ']')) => OnError; + + any => OnError; + *|; +}%% +#endif + +bool TParserCtx::Parse() { + try { + %%{ + write data noerror nofinal; + write init; + write exec; + }%% + ; + Y_UNUSED(fastjson_en_main); + } catch (const TFromStringException& e) { + return OnError(e.what()); + } + + return OnAfterVal() && Hndl.OnEnd() || OnError("invalid or truncated", true); + + TOKEN_ERROR: + return OnError("invalid syntax"); +} + +bool ReadJsonFast(TStringBuf data, TJsonCallbacks* h) { + return TParserCtx(*h, data).Parse(); +} + +} diff --git a/library/cpp/json/fast_sax/unescape.cpp b/library/cpp/json/fast_sax/unescape.cpp new file mode 100644 index 0000000000..72109b0b5e --- /dev/null +++ b/library/cpp/json/fast_sax/unescape.cpp @@ -0,0 +1,7 @@ +#include "unescape.h" + +#include <util/string/escape.h> + +TStringBuf UnescapeJsonUnicode(TStringBuf data, char* scratch) { + return TStringBuf(scratch, UnescapeC(data.data(), data.size(), scratch)); +} diff --git a/library/cpp/json/fast_sax/unescape.h b/library/cpp/json/fast_sax/unescape.h new file mode 100644 index 0000000000..5e40e1e866 --- /dev/null +++ b/library/cpp/json/fast_sax/unescape.h @@ -0,0 +1,5 @@ +#pragma once + +#include <util/generic/strbuf.h> + +TStringBuf UnescapeJsonUnicode(TStringBuf data, char* scratch); diff --git a/library/cpp/json/fast_sax/ya.make b/library/cpp/json/fast_sax/ya.make new file mode 100644 index 0000000000..c6447ab6ac --- /dev/null +++ b/library/cpp/json/fast_sax/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +OWNER( + pg + velavokr +) + +PEERDIR( + library/cpp/json/common +) + +SRCS( + parser.rl6 + unescape.cpp +) + +END() diff --git a/library/cpp/json/flex_buffers/cvt.cpp b/library/cpp/json/flex_buffers/cvt.cpp new file mode 100644 index 0000000000..fee0cea0b8 --- /dev/null +++ b/library/cpp/json/flex_buffers/cvt.cpp @@ -0,0 +1,139 @@ +#include "cvt.h" + +#include <flatbuffers/flexbuffers.h> + +#include <library/cpp/json/fast_sax/parser.h> +#include <library/cpp/json/json_reader.h> + +#include <util/generic/vector.h> +#include <util/stream/output.h> +#include <util/stream/input.h> +#include <util/memory/pool.h> + +using namespace NJson; + +namespace { + struct TJsonToFlexCallbacks: public TJsonCallbacks { + inline TJsonToFlexCallbacks() + : P(8192) + { + } + + bool OnNull() override { + B.Null(); + + return true; + } + + bool OnBoolean(bool v) override { + B.Bool(v); + + return true; + } + + bool OnInteger(long long v) override { + B.Int(v); + + return true; + } + + bool OnUInteger(unsigned long long v) override { + B.UInt(v); + + return true; + } + + bool OnDouble(double v) override { + B.Double(v); + + return true; + } + + bool OnString(const TStringBuf& v) override { + B.String(v.data(), v.size()); + + return true; + } + + bool OnOpenMap() override { + S.push_back(B.StartMap()); + + return true; + } + + bool OnMapKey(const TStringBuf& v) override { + auto iv = P.AppendCString(v); + + B.Key(iv.data(), iv.size()); + + return true; + } + + bool OnCloseMap() override { + B.EndMap(PopOffset()); + + return true; + } + + bool OnOpenArray() override { + S.push_back(B.StartVector()); + + return true; + } + + bool OnCloseArray() override { + B.EndVector(PopOffset(), false, false); + + return true; + } + + bool OnStringNoCopy(const TStringBuf& s) override { + return OnString(s); + } + + bool OnMapKeyNoCopy(const TStringBuf& s) override { + return OnMapKey(s); + } + + bool OnEnd() override { + B.Finish(); + + Y_ENSURE(S.empty()); + + return true; + } + + void OnError(size_t, TStringBuf reason) override { + ythrow yexception() << reason; + } + + inline size_t PopOffset() { + auto res = S.back(); + + S.pop_back(); + + return res; + } + + inline auto& Buffer() { + return B.GetBuffer(); + } + + flexbuffers::Builder B; + TVector<size_t> S; + TMemoryPool P; + }; +} + +void NJson::ConvertJsonToFlexBuffers(TStringBuf input, TFlexBuffersData& result) { + TJsonToFlexCallbacks cb; + + ReadJsonFast(input, &cb); + result.swap(const_cast<std::vector<ui8>&>(cb.Buffer())); +} + +TString NJson::FlexToString(const TFlexBuffersData& v) { + auto root = flexbuffers::GetRoot(v.data(), v.size()); + + return TString(root.ToString()); +} diff --git a/library/cpp/json/flex_buffers/cvt.h b/library/cpp/json/flex_buffers/cvt.h new file mode 100644 index 0000000000..82d2874268 --- /dev/null +++ b/library/cpp/json/flex_buffers/cvt.h @@ -0,0 +1,20 @@ +#pragma once + +#include <util/generic/vector.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> + +namespace NJson { + using TFlexBuffersData = TVector<ui8>; + + TString FlexToString(const TFlexBuffersData& v); + void ConvertJsonToFlexBuffers(TStringBuf input, TFlexBuffersData& result); + + inline TFlexBuffersData ConvertJsonToFlexBuffers(TStringBuf input) { + TFlexBuffersData result; + + ConvertJsonToFlexBuffers(input, result); + + return result; + } +} diff --git a/library/cpp/json/flex_buffers/ut/cvt_ut.cpp b/library/cpp/json/flex_buffers/ut/cvt_ut.cpp new file mode 100644 index 0000000000..9fffef4d38 --- /dev/null +++ b/library/cpp/json/flex_buffers/ut/cvt_ut.cpp @@ -0,0 +1,21 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/json/flex_buffers/cvt.h> + +using namespace NJson; + +static auto JSON = R"({ + "a": { + "b": [1, 2, 3], + "c": ["x", "y", 3, "z"] + } +})"; + +static auto RES = R"({ a: { b: [ 1, 2, 3 ], c: [ "x", "y", 3, "z" ] } })"; + +Y_UNIT_TEST_SUITE(JsonToFlex) { + Y_UNIT_TEST(Test1) { + auto buf = ConvertJsonToFlexBuffers(JSON); + + UNIT_ASSERT_VALUES_EQUAL(FlexToString(buf), RES); + } +} diff --git a/library/cpp/json/flex_buffers/ut/ya.make b/library/cpp/json/flex_buffers/ut/ya.make new file mode 100644 index 0000000000..3fdc93f88e --- /dev/null +++ b/library/cpp/json/flex_buffers/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/json/flex_buffers) + +OWNER(pg) + +SRCS( + cvt_ut.cpp +) + +END() diff --git a/library/cpp/json/flex_buffers/ya.make b/library/cpp/json/flex_buffers/ya.make new file mode 100644 index 0000000000..3ece5e3703 --- /dev/null +++ b/library/cpp/json/flex_buffers/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +OWNER(pg) + +ADDINCL(contrib/libs/flatbuffers/include) + +PEERDIR( + library/cpp/json + contrib/libs/flatbuffers +) + +SRCS( + cvt.cpp +) + +END() diff --git a/library/cpp/json/fuzzy_test/main.cpp b/library/cpp/json/fuzzy_test/main.cpp new file mode 100644 index 0000000000..29a53aac14 --- /dev/null +++ b/library/cpp/json/fuzzy_test/main.cpp @@ -0,0 +1,30 @@ +#include <library/cpp/json/json_reader.h> + +#include <util/random/random.h> +#include <util/stream/str.h> + +extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { + const auto json = TString((const char*)data, size); + + try { + NJson::TJsonValue value; + NJson::ReadJsonFastTree(json, &value, true); + } catch (...) { + //Cout << json << " -> " << CurrentExceptionMessage() << Endl; + } + + try { + NJson::TJsonCallbacks cb; + NJson::ReadJsonFast(json, &cb); + } catch (...) { + //Cout << json << " -> " << CurrentExceptionMessage() << Endl; + } + + try { + NJson::ValidateJson(json); + } catch (...) { + //Cout << json << " -> " << CurrentExceptionMessage() << Endl; + } + + return 0; +} diff --git a/library/cpp/json/fuzzy_test/ya.make b/library/cpp/json/fuzzy_test/ya.make new file mode 100644 index 0000000000..ff50bc1f62 --- /dev/null +++ b/library/cpp/json/fuzzy_test/ya.make @@ -0,0 +1,13 @@ +FUZZ() + +OWNER(pg) + +PEERDIR( + library/cpp/json +) + +SRCS( + main.cpp +) + +END() diff --git a/library/cpp/json/json_prettifier.cpp b/library/cpp/json/json_prettifier.cpp new file mode 100644 index 0000000000..bb16aab44e --- /dev/null +++ b/library/cpp/json/json_prettifier.cpp @@ -0,0 +1,277 @@ +#include "json_prettifier.h" + +#include <util/generic/deque.h> +#include <util/generic/algorithm.h> +#include <util/memory/pool.h> +#include <util/string/util.h> + +#include <library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h> + +namespace NJson { + struct TRewritableOut { + IOutputStream& Slave; + + char Last = 0; + bool Dirty = false; + + TRewritableOut(IOutputStream& sl) + : Slave(sl) + { + } + + template <typename T> + void Write(const T& t) { + Flush(); + Slave << t; + } + + void Hold(char c) { + if (Dirty) + Flush(); + Last = c; + Dirty = true; + } + + void Flush() { + if (Dirty) { + Slave << Last; + Dirty = false; + } + } + + void Revert() { + Dirty = false; + } + }; + + struct TSpaces { + char S[256]; + + TSpaces() { + memset(&S, ' ', sizeof(S)); + } + + TStringBuf Get(ui8 sz) const { + return TStringBuf(S, sz); + } + }; + + bool TJsonPrettifier::MayUnquoteNew(TStringBuf s) { + static str_spn alpha("a-zA-Z_@$", true); + static str_spn alnum("a-zA-Z_@$0-9.-", true); + static TStringBuf true0("true"); + static TStringBuf false0("false"); + static TStringBuf null0("null"); + + return !!s && alpha.chars_table[(ui8)s[0]] && alnum.cbrk(s.begin() + 1, s.end()) == s.end() && !EqualToOneOf(s, null0, true0, false0); + } + + // to keep arcadia tests happy + bool TJsonPrettifier::MayUnquoteOld(TStringBuf s) { + static str_spn alpha("a-zA-Z_@$", true); + static str_spn alnum("a-zA-Z_@$0-9", true); + static TStringBuf true0("true"); + static TStringBuf false0("false"); + static TStringBuf true1("on"); + static TStringBuf false1("off"); + static TStringBuf true2("da"); + static TStringBuf false2("net"); + static TStringBuf null0("null"); + + return !!s && alpha.chars_table[(ui8)s[0]] && alnum.cbrk(s.begin() + 1, s.end()) == s.end() && !EqualToOneOf(s, null0, true0, false0, true1, false1, true2, false2); + } + + class TPrettifier: public TJsonCallbacks { + TRewritableOut Out; + TStringBuf Spaces; + TStringBuf Quote; + TStringBuf Unsafe; + TStringBuf Safe; + + ui32 Level = 0; + ui32 MaxPaddingLevel; + + bool Unquote = false; + bool Compactify = false; + bool NewUnquote = false; + + public: + TPrettifier(IOutputStream& out, const TJsonPrettifier& p) + : Out(out) + , MaxPaddingLevel(p.MaxPaddingLevel) + , Unquote(p.Unquote) + , Compactify(p.Compactify) + , NewUnquote(p.NewUnquote) + { + static TSpaces spaces; + Spaces = spaces.Get(p.Padding); + if (p.SingleQuotes) { + Quote = Unsafe = "'"; + Safe = "\""; + } else { + Quote = Unsafe = "\""; + Safe = "'"; + } + } + + void Pad(bool close = false) { + if (Compactify) { + Out.Flush(); + return; + } + if (Level > MaxPaddingLevel || (Level == MaxPaddingLevel && close)) { + Out.Write(" "); + return; + } + if (Level || close) { + Out.Write(Spaces ? "\n" : " "); + } + for (ui32 i = 0; i < Level; ++i) { + Out.Write(Spaces); + } + } + + void WriteSpace(char sp) { + if (Compactify) { + Out.Flush(); + return; + } + + Out.Write(sp); + } + + void OnVal() { + if (Out.Dirty && ':' == Out.Last) { + WriteSpace(' '); + } else { + Pad(); + } + } + + void AfterVal() { + Out.Hold(','); + } + + template <typename T> + bool WriteVal(const T& t) { + OnVal(); + Out.Write(t); + AfterVal(); + return true; + } + + bool OnNull() override { + return WriteVal(TStringBuf("null")); + } + + bool OnBoolean(bool v) override { + return WriteVal(v ? TStringBuf("true") : TStringBuf("false")); + } + + bool OnInteger(long long i) override { + return WriteVal(i); + } + + bool OnUInteger(unsigned long long i) override { + return WriteVal(i); + } + + bool OnDouble(double d) override { + return WriteVal(d); + } + + void WriteString(TStringBuf s) { + if (Unquote && (NewUnquote ? TJsonPrettifier::MayUnquoteNew(s) : TJsonPrettifier::MayUnquoteOld(s))) { + Out.Slave << s; + } else { + Out.Slave << Quote; + NEscJ::EscapeJ<false, true>(s, Out.Slave, Safe, Unsafe); + Out.Slave << Quote; + } + } + + bool OnString(const TStringBuf& s) override { + OnVal(); + WriteString(s); + AfterVal(); + return true; + } + + bool OnOpen(char c) { + OnVal(); + Level++; + Out.Hold(c); + return true; + } + + bool OnOpenMap() override { + return OnOpen('{'); + } + + bool OnOpenArray() override { + return OnOpen('['); + } + + bool OnMapKey(const TStringBuf& k) override { + OnVal(); + WriteString(k); + WriteSpace(' '); + Out.Hold(':'); + return true; + } + + bool OnClose(char c) { + if (!Level) + return false; + + Level--; + + if (Out.Dirty && c == Out.Last) { + WriteSpace(' '); + } else { + Out.Revert(); + Pad(true); + } + + return true; + } + + bool OnCloseMap() override { + if (!OnClose('{')) + return false; + Out.Write("}"); + AfterVal(); + return true; + } + + bool OnCloseArray() override { + if (!OnClose('[')) + return false; + Out.Write("]"); + AfterVal(); + return true; + } + + bool OnEnd() override { + return !Level; + } + }; + + bool TJsonPrettifier::Prettify(TStringBuf in, IOutputStream& out) const { + TPrettifier p(out, *this); + if (Strict) { + TMemoryInput mIn(in.data(), in.size()); + return ReadJson(&mIn, &p); + } else { + return ReadJsonFast(in, &p); + } + } + + TString TJsonPrettifier::Prettify(TStringBuf in) const { + TStringStream s; + if (Prettify(in, s)) + return s.Str(); + return TString(); + } + +} diff --git a/library/cpp/json/json_prettifier.h b/library/cpp/json/json_prettifier.h new file mode 100644 index 0000000000..27d611b0b4 --- /dev/null +++ b/library/cpp/json/json_prettifier.h @@ -0,0 +1,58 @@ +#pragma once + +#include "json_reader.h" + +#include <util/generic/ylimits.h> + +namespace NJson { + struct TJsonPrettifier { + bool Unquote = false; + ui8 Padding = 4; + bool SingleQuotes = false; + bool Compactify = false; + bool Strict = false; + bool NewUnquote = false; // use new unquote, may break old tests + ui32 MaxPaddingLevel = Max<ui32>(); + + static TJsonPrettifier Prettifier(bool unquote = false, ui8 padding = 4, bool singlequotes = false) { + TJsonPrettifier p; + p.Unquote = unquote; + p.Padding = padding; + p.SingleQuotes = singlequotes; + return p; + } + + static TJsonPrettifier Compactifier(bool unquote = false, bool singlequote = false) { + TJsonPrettifier p; + p.Unquote = unquote; + p.Padding = 0; + p.Compactify = true; + p.SingleQuotes = singlequote; + return p; + } + + bool Prettify(TStringBuf in, IOutputStream& out) const; + + TString Prettify(TStringBuf in) const; + + static bool MayUnquoteNew(TStringBuf in); + static bool MayUnquoteOld(TStringBuf in); + }; + + inline TString PrettifyJson(TStringBuf in, bool unquote = false, ui8 padding = 4, bool sq = false) { + return TJsonPrettifier::Prettifier(unquote, padding, sq).Prettify(in); + } + + inline bool PrettifyJson(TStringBuf in, IOutputStream& out, bool unquote = false, ui8 padding = 4, bool sq = false) { + return TJsonPrettifier::Prettifier(unquote, padding, sq).Prettify(in, out); + } + + inline bool CompactifyJson(TStringBuf in, IOutputStream& out, bool unquote = false, bool sq = false) { + return TJsonPrettifier::Compactifier(unquote, sq).Prettify(in, out); + } + + inline TString CompactifyJson(TStringBuf in, bool unquote = false, bool sq = false) { + return TJsonPrettifier::Compactifier(unquote, sq).Prettify(in); + } + +} diff --git a/library/cpp/json/json_reader.cpp b/library/cpp/json/json_reader.cpp new file mode 100644 index 0000000000..072c8deafe --- /dev/null +++ b/library/cpp/json/json_reader.cpp @@ -0,0 +1,567 @@ +#include "json_reader.h" + +#include "rapidjson_helpers.h" + +#include <contrib/libs/rapidjson/include/rapidjson/error/en.h> +#include <contrib/libs/rapidjson/include/rapidjson/error/error.h> +#include <contrib/libs/rapidjson/include/rapidjson/reader.h> + +#include <util/generic/stack.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> +#include <util/string/builder.h> + +namespace NJson { + namespace { + TString PrintError(const rapidjson::ParseResult& result) { + return TStringBuilder() << TStringBuf("Offset: ") << result.Offset() + << TStringBuf(", Code: ") << (int)result.Code() + << TStringBuf(", Error: ") << GetParseError_En(result.Code()); + } + } + + static const size_t DEFAULT_BUFFER_LEN = 65536; + + bool TParserCallbacks::OpenComplexValue(EJsonValueType type) { + TJsonValue* pvalue; + switch (CurrentState) { + case START: + Value.SetType(type); + ValuesStack.push_back(&Value); + break; + case IN_ARRAY: + pvalue = &ValuesStack.back()->AppendValue(type); + ValuesStack.push_back(pvalue); + break; + case AFTER_MAP_KEY: + pvalue = &ValuesStack.back()->InsertValue(Key, type); + ValuesStack.push_back(pvalue); + CurrentState = IN_MAP; + break; + default: + return false; + } + return true; + } + + bool TParserCallbacks::CloseComplexValue() { + if (ValuesStack.empty()) { + return false; + } + + ValuesStack.pop_back(); + if (!ValuesStack.empty()) { + switch (ValuesStack.back()->GetType()) { + case JSON_ARRAY: + CurrentState = IN_ARRAY; + break; + case JSON_MAP: + CurrentState = IN_MAP; + break; + default: + return false; + } + } else { + CurrentState = FINISH; + } + return true; + } + + TParserCallbacks::TParserCallbacks(TJsonValue& value, bool throwOnError, bool notClosedBracketIsError) + : TJsonCallbacks(throwOnError) + , Value(value) + , NotClosedBracketIsError(notClosedBracketIsError) + , CurrentState(START) + { + } + + bool TParserCallbacks::OnNull() { + return SetValue(JSON_NULL); + } + + bool TParserCallbacks::OnBoolean(bool val) { + return SetValue(val); + } + + bool TParserCallbacks::OnInteger(long long val) { + return SetValue(val); + } + + bool TParserCallbacks::OnUInteger(unsigned long long val) { + return SetValue(val); + } + + bool TParserCallbacks::OnString(const TStringBuf& val) { + return SetValue(val); + } + + bool TParserCallbacks::OnDouble(double val) { + return SetValue(val); + } + + bool TParserCallbacks::OnOpenArray() { + bool res = OpenComplexValue(JSON_ARRAY); + if (res) + CurrentState = IN_ARRAY; + return res; + } + + bool TParserCallbacks::OnCloseArray() { + return CloseComplexValue(); + } + + bool TParserCallbacks::OnOpenMap() { + bool res = OpenComplexValue(JSON_MAP); + if (res) + CurrentState = IN_MAP; + return res; + } + + bool TParserCallbacks::OnCloseMap() { + return CloseComplexValue(); + } + + bool TParserCallbacks::OnMapKey(const TStringBuf& val) { + switch (CurrentState) { + case IN_MAP: + Key = val; + CurrentState = AFTER_MAP_KEY; + break; + default: + return false; + } + return true; + } + + bool TParserCallbacks::OnEnd() { + if (NotClosedBracketIsError){ + return ValuesStack.empty(); + } + return true; + } + + TJsonReaderConfig::TJsonReaderConfig() + : BufferSize(DEFAULT_BUFFER_LEN) + { + } + + void TJsonReaderConfig::SetBufferSize(size_t bufferSize) { + BufferSize = Max((size_t)1, Min(bufferSize, DEFAULT_BUFFER_LEN)); + } + + size_t TJsonReaderConfig::GetBufferSize() const { + return BufferSize; + } + + namespace { + struct TJsonValueBuilder { +#ifdef NDEBUG + using TItem = TJsonValue*; + + inline TJsonValue& Access(TItem& item) const { + return *item; + } +#else + struct TItem { + TJsonValue* V; + size_t DuplicateKeyCount; + + TItem(TJsonValue* v) + : V(v) + , DuplicateKeyCount(0) + { + } + }; + + inline TJsonValue& Access(TItem& item) const { + return *item.V; + } +#endif + + NJson::TJsonValue& V; + + TStack<TItem> S; + + TJsonValueBuilder(NJson::TJsonValue& v) + : V(v) + { + S.emplace(&V); + } + + template <class T> + void Set(const T& t) { + if (Access(S.top()).IsArray()) { + Access(S.top()).AppendValue(t); + } else { + Access(S.top()) = t; + S.pop(); + } + } + + bool Null() { + Set(NJson::JSON_NULL); + return true; + } + + bool Bool(bool b) { + Set(b); + return true; + } + + bool Int(int i) { + Set(i); + return true; + } + + template <class U> + bool ProcessUint(U u) { + if (Y_LIKELY(u <= static_cast<ui64>(Max<i64>()))) { + Set(i64(u)); + } else { + Set(u); + } + return true; + } + + bool Uint(unsigned u) { + return ProcessUint(u); + } + + bool Int64(i64 i) { + Set(i); + return true; + } + + bool Uint64(ui64 u) { + return ProcessUint(u); + } + + bool Double(double d) { + Set(d); + return true; + } + + bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) { + Y_ASSERT(false && "this method should never be called"); + Y_UNUSED(str); + Y_UNUSED(length); + Y_UNUSED(copy); + return true; + } + + bool String(const char* str, rapidjson::SizeType length, bool copy) { + Y_ASSERT(copy); + Set(TStringBuf(str, length)); + return true; + } + + bool StartObject() { + if (Access(S.top()).IsArray()) { + S.emplace(&Access(S.top()).AppendValue(NJson::JSON_MAP)); + } else { + Access(S.top()).SetType(NJson::JSON_MAP); + } + return true; + } + + bool Key(const char* str, rapidjson::SizeType length, bool copy) { + Y_ASSERT(copy); + auto& value = Access(S.top())[TStringBuf(str, length)]; + if (Y_UNLIKELY(value.GetType() != JSON_UNDEFINED)) { +#ifndef NDEBUG + ++S.top().DuplicateKeyCount; +#endif + value.SetType(JSON_UNDEFINED); + } + S.emplace(&value); + return true; + } + + inline int GetDuplicateKeyCount() const { +#ifdef NDEBUG + return 0; +#else + return S.top().DuplicateKeyCount; +#endif + } + + bool EndObject(rapidjson::SizeType memberCount) { + Y_ASSERT(memberCount == Access(S.top()).GetMap().size() + GetDuplicateKeyCount()); + S.pop(); + return true; + } + + bool StartArray() { + if (Access(S.top()).IsArray()) { + S.emplace(&Access(S.top()).AppendValue(NJson::JSON_ARRAY)); + } else { + Access(S.top()).SetType(NJson::JSON_ARRAY); + } + return true; + } + + bool EndArray(rapidjson::SizeType elementCount) { + Y_ASSERT(elementCount == Access(S.top()).GetArray().size()); + S.pop(); + return true; + } + }; + + template <class TRapidJsonCompliantInputStream, class THandler> + auto Read(const TJsonReaderConfig& config, + rapidjson::Reader& reader, + TRapidJsonCompliantInputStream& is, + THandler& handler) { + + ui8 flags = ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE; + + if (config.AllowComments) { + flags |= ReaderConfigFlags::COMMENTS; + } + + if (config.DontValidateUtf8) { + flags &= ~(ReaderConfigFlags::VALIDATE); + } + + if (config.AllowEscapedApostrophe) { + flags |= ReaderConfigFlags::ESCAPE; + } + + switch (flags) { + case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_NOESCAPE: + return reader.Parse<rapidjson::kParseCommentsFlag>(is, handler); + case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_NOESCAPE: + return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag>(is, handler); + case ReaderConfigToRapidJsonFlags::COMMENTS_VALID_ESCAPE: + return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler); + case ReaderConfigToRapidJsonFlags::COMMENTS_NOVALID_ESCAPE: + return reader.Parse<rapidjson::kParseCommentsFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler); + case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_NOESCAPE: + return reader.Parse<rapidjson::kParseValidateEncodingFlag>(is, handler); + case ReaderConfigToRapidJsonFlags::NOCOMMENTS_VALID_ESCAPE: + return reader.Parse<rapidjson::kParseValidateEncodingFlag | rapidjson::kParseEscapedApostropheFlag>(is, handler); + case ReaderConfigToRapidJsonFlags::NOCOMMENTS_NOVALID_ESCAPE: + return reader.Parse<rapidjson::kParseEscapedApostropheFlag>(is, handler); + default: + return reader.Parse<rapidjson::kParseNoFlags>(is, handler); + } + } + + template <class TRapidJsonCompliantInputStream, class THandler> + bool ReadJson(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, THandler& handler, bool throwOnError) { + rapidjson::Reader reader; + + auto result = Read(*config, reader, is, handler); + + if (result.IsError()) { + if (throwOnError) { + ythrow TJsonException() << PrintError(result); + } else { + return false; + } + } + + return true; + } + + template <class TRapidJsonCompliantInputStream> + bool ReadJsonTree(TRapidJsonCompliantInputStream& is, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) { + out->SetType(NJson::JSON_NULL); + + TJsonValueBuilder handler(*out); + + return ReadJson(is, config, handler, throwOnError); + } + + template <class TData> + bool ReadJsonTreeImpl(TData* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) { + std::conditional_t<std::is_same<TData, TStringBuf>::value, TStringBufStreamWrapper, TInputStreamWrapper> is(*in); + return ReadJsonTree(is, config, out, throwOnError); + } + + template <class TData> + bool ReadJsonTreeImpl(TData* in, bool allowComments, TJsonValue* out, bool throwOnError) { + TJsonReaderConfig config; + config.AllowComments = allowComments; + return ReadJsonTreeImpl(in, &config, out, throwOnError); + } + + template <class TData> + bool ReadJsonTreeImpl(TData* in, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(in, false, out, throwOnError); + } + } //namespace + + bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(&in, out, throwOnError); + } + + bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(&in, allowComments, out, throwOnError); + } + + bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(&in, config, out, throwOnError); + } + + bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(in, out, throwOnError); + } + + bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(in, allowComments, out, throwOnError); + } + + bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError) { + return ReadJsonTreeImpl(in, config, out, throwOnError); + } + + bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError, bool notClosedBracketIsError) { + TParserCallbacks cb(*out, throwOnError, notClosedBracketIsError); + + return ReadJsonFast(in, &cb); + } + + TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError) { + TJsonValue value; + // There is no way to report an error apart from throwing an exception when we return result by value. + ReadJsonFastTree(in, &value, /* throwOnError = */ true, notClosedBracketIsError); + return value; + } + + namespace { + struct TJsonCallbacksWrapper { + TJsonCallbacks& Impl; + + TJsonCallbacksWrapper(TJsonCallbacks& impl) + : Impl(impl) + { + } + + bool Null() { + return Impl.OnNull(); + } + + bool Bool(bool b) { + return Impl.OnBoolean(b); + } + + template <class U> + bool ProcessUint(U u) { + if (Y_LIKELY(u <= ui64(Max<i64>()))) { + return Impl.OnInteger(i64(u)); + } else { + return Impl.OnUInteger(u); + } + } + + bool Int(int i) { + return Impl.OnInteger(i); + } + + bool Uint(unsigned u) { + return ProcessUint(u); + } + + bool Int64(i64 i) { + return Impl.OnInteger(i); + } + + bool Uint64(ui64 u) { + return ProcessUint(u); + } + + bool Double(double d) { + return Impl.OnDouble(d); + } + + bool RawNumber(const char* str, rapidjson::SizeType length, bool copy) { + Y_ASSERT(false && "this method should never be called"); + Y_UNUSED(str); + Y_UNUSED(length); + Y_UNUSED(copy); + return true; + } + + bool String(const char* str, rapidjson::SizeType length, bool copy) { + Y_ASSERT(copy); + return Impl.OnString(TStringBuf(str, length)); + } + + bool StartObject() { + return Impl.OnOpenMap(); + } + + bool Key(const char* str, rapidjson::SizeType length, bool copy) { + Y_ASSERT(copy); + return Impl.OnMapKey(TStringBuf(str, length)); + } + + bool EndObject(rapidjson::SizeType memberCount) { + Y_UNUSED(memberCount); + return Impl.OnCloseMap(); + } + + bool StartArray() { + return Impl.OnOpenArray(); + } + + bool EndArray(rapidjson::SizeType elementCount) { + Y_UNUSED(elementCount); + return Impl.OnCloseArray(); + } + }; + } + + bool ReadJson(IInputStream* in, TJsonCallbacks* cbs) { + return ReadJson(in, false, cbs); + } + + bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* cbs) { + TJsonReaderConfig config; + config.AllowComments = allowComments; + return ReadJson(in, &config, cbs); + } + + bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* cbs) { + TJsonReaderConfig config; + config.AllowComments = allowComments; + config.AllowEscapedApostrophe = allowEscapedApostrophe; + return ReadJson(in, &config, cbs); + } + + bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* cbs) { + TJsonCallbacksWrapper wrapper(*cbs); + TInputStreamWrapper is(*in); + + rapidjson::Reader reader; + auto result = Read(*config, reader, is, wrapper); + + if (result.IsError()) { + cbs->OnError(result.Offset(), PrintError(result)); + + return false; + } + + return cbs->OnEnd(); + } + + TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError) { + TJsonValue out; + ReadJsonTree(in, &out, throwOnError); + return out; + } + + TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError) { + TJsonValue out; + ReadJsonTree(in, allowComments, &out, throwOnError); + return out; + } + + TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError) { + TJsonValue out; + ReadJsonTree(in, config, &out, throwOnError); + return out; + } + +} diff --git a/library/cpp/json/json_reader.h b/library/cpp/json/json_reader.h new file mode 100644 index 0000000000..b673788330 --- /dev/null +++ b/library/cpp/json/json_reader.h @@ -0,0 +1,140 @@ +#pragma once + +#include "json_value.h" + +#include <library/cpp/json/common/defs.h> +#include <library/cpp/json/fast_sax/parser.h> + +#include <util/generic/yexception.h> + +#include <util/stream/input.h> +#include <util/stream/str.h> +#include <util/stream/mem.h> + +namespace NJson { + struct TJsonReaderConfig { + TJsonReaderConfig(); + + // js-style comments (both // and /**/) + bool AllowComments = false; + bool DontValidateUtf8 = false; + bool AllowEscapedApostrophe = false; + + void SetBufferSize(size_t bufferSize); + size_t GetBufferSize() const; + + private: + size_t BufferSize; + }; + + bool ReadJsonTree(TStringBuf in, TJsonValue* out, bool throwOnError = false); + bool ReadJsonTree(TStringBuf in, bool allowComments, TJsonValue* out, bool throwOnError = false); + bool ReadJsonTree(TStringBuf in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError = false); + + bool ReadJsonTree(IInputStream* in, TJsonValue* out, bool throwOnError = false); + bool ReadJsonTree(IInputStream* in, bool allowComments, TJsonValue* out, bool throwOnError = false); + bool ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, TJsonValue* out, bool throwOnError = false); + + TJsonValue ReadJsonTree(IInputStream* in, bool throwOnError = false); + TJsonValue ReadJsonTree(IInputStream* in, bool allowComments, bool throwOnError); + TJsonValue ReadJsonTree(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError = false); + + bool ReadJson(IInputStream* in, TJsonCallbacks* callbacks); + bool ReadJson(IInputStream* in, bool allowComments, TJsonCallbacks* callbacks); + bool ReadJson(IInputStream* in, bool allowComments, bool allowEscapedApostrophe, TJsonCallbacks* callbacks); + bool ReadJson(IInputStream* in, const TJsonReaderConfig* config, TJsonCallbacks* callbacks); + + enum ReaderConfigFlags { + COMMENTS = 0b100, + VALIDATE = 0b010, + ESCAPE = 0b001, + }; + + enum ReaderConfigToRapidJsonFlags { + COMMENTS_NOVALID_NOESCAPE = 0b100, + COMMENTS_VALID_NOESCAPE = 0b110, + COMMENTS_VALID_ESCAPE = 0b111, + COMMENTS_NOVALID_ESCAPE = 0b101, + NOCOMMENTS_VALID_NOESCAPE = 0b010, + NOCOMMENTS_VALID_ESCAPE = 0b011, + NOCOMMENTS_NOVALID_ESCAPE = 0b001, + }; + + inline bool ValidateJson(IInputStream* in, const TJsonReaderConfig* config, bool throwOnError = false) { + TJsonCallbacks c(throwOnError); + return ReadJson(in, config, &c); + } + + inline bool ValidateJson(TStringBuf in, const TJsonReaderConfig& config = TJsonReaderConfig(), bool throwOnError = false) { + TMemoryInput min(in.data(), in.size()); + return ValidateJson(&min, &config, throwOnError); + } + + inline bool ValidateJsonThrow(IInputStream* in, const TJsonReaderConfig* config) { + return ValidateJson(in, config, true); + } + + inline bool ValidateJsonThrow(TStringBuf in, const TJsonReaderConfig& config = TJsonReaderConfig()) { + return ValidateJson(in, config, true); + } + + class TParserCallbacks: public TJsonCallbacks { + public: + TParserCallbacks(TJsonValue& value, bool throwOnError = false, bool notClosedBracketIsError = false); + bool OnNull() override; + bool OnBoolean(bool val) override; + bool OnInteger(long long val) override; + bool OnUInteger(unsigned long long val) override; + bool OnString(const TStringBuf& val) override; + bool OnDouble(double val) override; + bool OnOpenArray() override; + bool OnCloseArray() override; + bool OnOpenMap() override; + bool OnCloseMap() override; + bool OnMapKey(const TStringBuf& val) override; + bool OnEnd() override; + + protected: + TJsonValue& Value; + TString Key; + TVector<TJsonValue*> ValuesStack; + bool NotClosedBracketIsError; + + enum { + START, + AFTER_MAP_KEY, + IN_MAP, + IN_ARRAY, + FINISH + } CurrentState; + + template <class T> + bool SetValue(const T& value) { + switch (CurrentState) { + case START: + Value.SetValue(value); + break; + case AFTER_MAP_KEY: + ValuesStack.back()->InsertValue(Key, value); + CurrentState = IN_MAP; + break; + case IN_ARRAY: + ValuesStack.back()->AppendValue(value); + break; + case IN_MAP: + case FINISH: + return false; + default: + ythrow yexception() << "TParserCallbacks::SetValue invalid enum"; + } + return true; + } + + bool OpenComplexValue(EJsonValueType type); + bool CloseComplexValue(); + }; + + //// relaxed json, used in library/cpp/scheme + bool ReadJsonFastTree(TStringBuf in, TJsonValue* out, bool throwOnError = false, bool notClosedBracketIsError = false); + TJsonValue ReadJsonFastTree(TStringBuf in, bool notClosedBracketIsError = false); +} diff --git a/library/cpp/json/json_value.h b/library/cpp/json/json_value.h new file mode 100644 index 0000000000..f70f4d2ee8 --- /dev/null +++ b/library/cpp/json/json_value.h @@ -0,0 +1,3 @@ +#pragma once + +#include <library/cpp/json/writer/json_value.h> diff --git a/library/cpp/json/json_writer.cpp b/library/cpp/json/json_writer.cpp new file mode 100644 index 0000000000..3d058bae36 --- /dev/null +++ b/library/cpp/json/json_writer.cpp @@ -0,0 +1,149 @@ +#include "json_writer.h" + +#include <util/charset/utf8.h> +#include <util/generic/algorithm.h> +#include <util/string/cast.h> +#include <util/system/yassert.h> + +namespace NJson { + TJsonWriter::TJsonWriter(IOutputStream* out, bool formatOutput, bool sortkeys, bool validateUtf8) + : Out(out) + , Buf(NJsonWriter::HEM_UNSAFE) + , DoubleNDigits(TJsonWriterConfig::DefaultDoubleNDigits) + , FloatNDigits(TJsonWriterConfig::DefaultFloatNDigits) + , FloatToStringMode(TJsonWriterConfig::DefaultFloatToStringMode) + , SortKeys(sortkeys) + , ValidateUtf8(validateUtf8) + , DontEscapeStrings(false) + , DontFlushInDestructor(false) + { + Buf.SetIndentSpaces(formatOutput ? 2 : 0); + } + + TJsonWriter::TJsonWriter(IOutputStream* out, const TJsonWriterConfig& config, bool DFID) + : Out(config.Unbuffered ? nullptr : out) + , Buf(NJsonWriter::HEM_UNSAFE, config.Unbuffered ? out : nullptr) + , DoubleNDigits(config.DoubleNDigits) + , FloatNDigits(config.FloatNDigits) + , FloatToStringMode(config.FloatToStringMode) + , SortKeys(config.SortKeys) + , ValidateUtf8(config.ValidateUtf8) + , DontEscapeStrings(config.DontEscapeStrings) + , DontFlushInDestructor(DFID) + { + Buf.SetIndentSpaces(config.FormatOutput ? 2 : 0); + Buf.SetWriteNanAsString(config.WriteNanAsString); + } + + TJsonWriter::~TJsonWriter() { + // if we write to socket it's possible to get exception here + // don't use exceptions in destructors + if (!DontFlushInDestructor) { + try { + Flush(); + } catch (...) { + } + } + } + + void TJsonWriter::Flush() { + if (Out) { + Buf.FlushTo(Out); + } + } + + void TJsonWriter::OpenMap() { + Buf.BeginObject(); + } + + void TJsonWriter::CloseMap() { + Buf.EndObject(); + } + + void TJsonWriter::OpenArray() { + Buf.BeginList(); + } + + void TJsonWriter::CloseArray() { + Buf.EndList(); + } + + void TJsonWriter::Write(const TStringBuf& value) { + if (ValidateUtf8 && !IsUtf(value)) + throw yexception() << "JSON writer: invalid UTF-8"; + if (Buf.KeyExpected()) { + Buf.WriteKey(value); + } else { + if (DontEscapeStrings) { + Buf.UnsafeWriteValue(TString("\"") + value + '"'); + } else { + Buf.WriteString(value); + } + } + } + + void TJsonWriter::WriteNull() { + Buf.WriteNull(); + } + + void TJsonWriter::Write(float value) { + Buf.WriteFloat(value, FloatToStringMode, FloatNDigits); + } + + void TJsonWriter::Write(double value) { + Buf.WriteDouble(value, FloatToStringMode, DoubleNDigits); + } + + void TJsonWriter::Write(long long value) { + Buf.WriteLongLong(value); + } + + void TJsonWriter::Write(unsigned long long value) { + Buf.WriteULongLong(value); + } + + void TJsonWriter::Write(bool value) { + Buf.WriteBool(value); + } + + namespace { + struct TLessStrPtr { + bool operator()(const TString* a, const TString* b) const { + return *a < *b; + } + }; + } + + void TJsonWriter::Write(const TJsonValue* v) { + Buf.WriteJsonValue(v, SortKeys, FloatToStringMode, DoubleNDigits); + } + + void TJsonWriter::Write(const TJsonValue& v) { + Buf.WriteJsonValue(&v, SortKeys, FloatToStringMode, DoubleNDigits); + } + + TString WriteJson(const TJsonValue* value, bool formatOutput, bool sortkeys, bool validateUtf8) { + TStringStream ss; + WriteJson(&ss, value, formatOutput, sortkeys, validateUtf8); + return ss.Str(); + } + + TString WriteJson(const TJsonValue& value, bool formatOutput, bool sortkeys, bool validateUtf8) { + TStringStream ss; + WriteJson(&ss, &value, formatOutput, sortkeys, validateUtf8); + return ss.Str(); + } + + void WriteJson(IOutputStream* out, const TJsonValue* val, bool formatOutput, bool sortkeys, bool validateUtf8) { + TJsonWriter w(out, formatOutput, sortkeys, validateUtf8); + w.Write(val); + w.Flush(); + } + + void WriteJson(IOutputStream* out, const TJsonValue* val, const TJsonWriterConfig& config) { + TJsonWriter w(out, config, true); + w.Write(val); + w.Flush(); + } + +} diff --git a/library/cpp/json/json_writer.h b/library/cpp/json/json_writer.h new file mode 100644 index 0000000000..c7f5c9499a --- /dev/null +++ b/library/cpp/json/json_writer.h @@ -0,0 +1,196 @@ +#pragma once + +// Deprecated. Use library/cpp/json/writer in new code. + +#include "json_value.h" + +#include <library/cpp/json/writer/json.h> + +#include <util/stream/output.h> +#include <util/generic/hash.h> +#include <util/generic/maybe.h> +#include <util/generic/strbuf.h> + +namespace NJson { + struct TJsonWriterConfig { + constexpr static ui32 DefaultDoubleNDigits = 10; + constexpr static ui32 DefaultFloatNDigits = 6; + constexpr static EFloatToStringMode DefaultFloatToStringMode = PREC_NDIGITS; + + inline TJsonWriterConfig& SetUnbuffered(bool v) noexcept { + Unbuffered = v; + + return *this; + } + + inline TJsonWriterConfig& SetValidateUtf8(bool v) noexcept { + ValidateUtf8 = v; + + return *this; + } + + inline TJsonWriterConfig& SetFormatOutput(bool v) noexcept { + FormatOutput = v; + + return *this; + } + + ui32 DoubleNDigits = DefaultDoubleNDigits; + ui32 FloatNDigits = DefaultFloatNDigits; + EFloatToStringMode FloatToStringMode = DefaultFloatToStringMode; + bool FormatOutput = false; + bool SortKeys = false; + bool ValidateUtf8 = true; + bool DontEscapeStrings = false; + bool Unbuffered = false; + bool WriteNanAsString = false; // NaN and Inf are not valid json values, so if WriteNanAsString is set, writer would write string intead of throwing exception (default case) + }; + + class TJsonWriter { + IOutputStream* Out; + NJsonWriter::TBuf Buf; + const ui32 DoubleNDigits; + const ui32 FloatNDigits; + const EFloatToStringMode FloatToStringMode; + const bool SortKeys; + const bool ValidateUtf8; + const bool DontEscapeStrings; + const bool DontFlushInDestructor; + + public: + TJsonWriter(IOutputStream* out, bool formatOutput, bool sortkeys = false, bool validateUtf8 = true); + TJsonWriter(IOutputStream* out, const TJsonWriterConfig& config, bool DontFlushInDestructor = false); + ~TJsonWriter(); + + void Flush(); + + void OpenMap(); + void OpenMap(const TStringBuf& key) { + Buf.WriteKey(key); + OpenMap(); + } + void CloseMap(); + + void OpenArray(); + void OpenArray(const TStringBuf& key) { + Buf.WriteKey(key); + OpenArray(); + } + void CloseArray(); + + void WriteNull(); + + void Write(const TStringBuf& value); + void Write(float value); + void Write(double value); + void Write(bool value); + void Write(const TJsonValue* value); + void Write(const TJsonValue& value); + + // must use all variations of integer types since long + // and long long are different types but with same size + void Write(long long value); + void Write(unsigned long long value); + void Write(long value) { + Write((long long)value); + } + void Write(unsigned long value) { + Write((unsigned long long)value); + } + void Write(int value) { + Write((long long)value); + } + void Write(unsigned int value) { + Write((unsigned long long)value); + } + void Write(short value) { + Write((long long)value); + } + void Write(unsigned short value) { + Write((unsigned long long)value); + } + + void Write(const unsigned char* value) { + Write((const char*)value); + } + void Write(const char* value) { + Write(TStringBuf(value)); + } + void Write(const TString& value) { + Write(TStringBuf(value)); + } + void Write(const std::string& value) { + Write(TStringBuf(value)); + } + + // write raw json without checks + void UnsafeWrite(const TStringBuf& value) { + Buf.UnsafeWriteValue(value); + } + + template <typename T> + void Write(const TStringBuf& key, const T& value) { + Buf.WriteKey(key); + Write(value); + } + + // write raw json without checks + void UnsafeWrite(const TStringBuf& key, const TStringBuf& value) { + Buf.WriteKey(key); + UnsafeWrite(value); + } + + void WriteNull(const TStringBuf& key) { + Buf.WriteKey(key); + WriteNull(); + } + + template <typename T> + void WriteOptional(const TStringBuf& key, const TMaybe<T>& value) { + if (value) { + Write(key, *value); + } + } + + void WriteOptional(const TStringBuf&, const TNothing&) { + // nothing to do + } + + void WriteKey(const TStringBuf key) { + Buf.WriteKey(key); + } + + void WriteKey(const unsigned char* key) { + WriteKey((const char*)key); + } + + void WriteKey(const char* key) { + WriteKey(TStringBuf{key}); + } + + void WriteKey(const TString& key) { + WriteKey(TStringBuf{key}); + } + + void WriteKey(const std::string& key) { + WriteKey(TStringBuf{key}); + } + + NJsonWriter::TBufState State() const { + return Buf.State(); + } + + void Reset(const NJsonWriter::TBufState& from) { + return Buf.Reset(from); + } + + void Reset(NJsonWriter::TBufState&& from) { + return Buf.Reset(std::move(from)); + } + }; + + void WriteJson(IOutputStream*, const TJsonValue*, bool formatOutput = false, bool sortkeys = false, bool validateUtf8 = true); + TString WriteJson(const TJsonValue*, bool formatOutput = true, bool sortkeys = false, bool validateUtf8 = false); + TString WriteJson(const TJsonValue&, bool formatOutput = true, bool sortkeys = false, bool validateUtf8 = false); + void WriteJson(IOutputStream*, const TJsonValue*, const TJsonWriterConfig& config); +} diff --git a/library/cpp/json/rapidjson_helpers.cpp b/library/cpp/json/rapidjson_helpers.cpp new file mode 100644 index 0000000000..2e8159a103 --- /dev/null +++ b/library/cpp/json/rapidjson_helpers.cpp @@ -0,0 +1 @@ +#include "rapidjson_helpers.h" diff --git a/library/cpp/json/rapidjson_helpers.h b/library/cpp/json/rapidjson_helpers.h new file mode 100644 index 0000000000..aeb96ff670 --- /dev/null +++ b/library/cpp/json/rapidjson_helpers.h @@ -0,0 +1,104 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/stream/input.h> + +namespace NJson { + struct TReadOnlyStreamBase { + using Ch = char; + + Ch* PutBegin() { + Y_ASSERT(false); + return nullptr; + } + + void Put(Ch) { + Y_ASSERT(false); + } + + void Flush() { + Y_ASSERT(false); + } + + size_t PutEnd(Ch*) { + Y_ASSERT(false); + return 0; + } + }; + + struct TInputStreamWrapper : TReadOnlyStreamBase { + Ch Peek() const { + if (!Eof) { + if (Pos >= Sz) { + if (Sz < BUF_SIZE) { + Sz += Helper.Read(Buf + Sz, BUF_SIZE - Sz); + } else { + Sz = Helper.Read(Buf, BUF_SIZE); + Pos = 0; + } + } + + if (Pos < Sz) { + return Buf[Pos]; + } + } + + Eof = true; + return 0; + } + + Ch Take() { + auto c = Peek(); + ++Pos; + ++Count; + return c; + } + + size_t Tell() const { + return Count; + } + + TInputStreamWrapper(IInputStream& helper) + : Helper(helper) + , Eof(false) + , Sz(0) + , Pos(0) + , Count(0) + { + } + + static const size_t BUF_SIZE = 1 << 12; + + IInputStream& Helper; + mutable char Buf[BUF_SIZE]; + mutable bool Eof; + mutable size_t Sz; + mutable size_t Pos; + size_t Count; + }; + + struct TStringBufStreamWrapper : TReadOnlyStreamBase { + Ch Peek() const { + return Pos < Data.size() ? Data[Pos] : 0; + } + + Ch Take() { + auto c = Peek(); + ++Pos; + return c; + } + + size_t Tell() const { + return Pos; + } + + TStringBufStreamWrapper(TStringBuf data) + : Data(data) + , Pos(0) + { + } + + TStringBuf Data; + size_t Pos; + }; +} diff --git a/library/cpp/json/ut/json_prettifier_ut.cpp b/library/cpp/json/ut/json_prettifier_ut.cpp new file mode 100644 index 0000000000..ae5f8dd81a --- /dev/null +++ b/library/cpp/json/ut/json_prettifier_ut.cpp @@ -0,0 +1,204 @@ +#include <library/cpp/json/json_prettifier.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(JsonPrettifier) { + Y_UNIT_TEST(PrettifyJsonShort) { + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson(""), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("null"), "null"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("true"), "true"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("false"), "false"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("1.5"), "1.5"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("test", false, 2, true), "'test'"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[]"), "[ ]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[a]", false, 2), "[\n \"a\"\n]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[a,b]", false, 2, true), "[\n 'a',\n 'b'\n]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[{},b]", false, 2, true), "[\n { },\n 'b'\n]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[a,{}]", false, 2, true), "[\n 'a',\n { }\n]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[{},{}]"), "[\n { },\n { }\n]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{}"), "{ }"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{}"), "{ }"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{k:v}", false, 2, true), "{\n 'k' : 'v'\n}"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("Test545", true, 2), "Test545"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'null'", true, 2, true), "'null'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'true'", true, 2, true), "'true'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'false'", true, 2, true), "'false'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'\"'", true, 2, true), "'\"'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'\"'", true, 2, false), "\"\\\"\""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'\\\''", true, 2, true), "'\\u0027'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'\\\''", true, 2, false), "\"'\""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'1b'", true, 2, true), "'1b'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("'Test*545'", true, 2, true), "'Test*545'"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{k:v}", true, 2), "{\n k : v\n}"); + } + + Y_UNIT_TEST(PrettifyJsonLong) { + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[{k:v},{a:b}]", false, 2, true), + "[\n" + " {\n" + " 'k' : 'v'\n" + " },\n" + " {\n" + " 'a' : 'b'\n" + " }\n" + "]"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{k:v,a:b,x:[1,2,3]}", false, 2, true), + "{\n" + " 'k' : 'v',\n" + " 'a' : 'b',\n" + " 'x' : [\n" + " 1,\n" + " 2,\n" + " 3\n" + " ]\n" + "}"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{k:v,a:b,x:[1,{f:b},3],m:n}", false, 2, true), + "{\n" + " 'k' : 'v',\n" + " 'a' : 'b',\n" + " 'x' : [\n" + " 1,\n" + " {\n" + " 'f' : 'b'\n" + " },\n" + " 3\n" + " ],\n" + " 'm' : 'n'\n" + "}"); + + NJson::TJsonPrettifier prettifierMaxLevel1 = NJson::TJsonPrettifier::Prettifier(false, 2, true); + prettifierMaxLevel1.MaxPaddingLevel = 1; + UNIT_ASSERT_STRINGS_EQUAL(prettifierMaxLevel1.Prettify("{k:v,a:b,x:[1,{f:b},3],m:n}"), + "{\n" + " 'k' : 'v',\n" + " 'a' : 'b',\n" + " 'x' : [ 1, { 'f' : 'b' }, 3 ],\n" + " 'm' : 'n'\n" + "}"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{g:{x:{a:{b:c,e:f},q:{x:y}},y:fff}}", true, 2), + "{\n" + " g : {\n" + " x : {\n" + " a : {\n" + " b : c,\n" + " e : f\n" + " },\n" + " q : {\n" + " x : y\n" + " }\n" + " },\n" + " y : fff\n" + " }\n" + "}"); + + NJson::TJsonPrettifier prettifierMaxLevel3 = NJson::TJsonPrettifier::Prettifier(true, 2); + prettifierMaxLevel3.MaxPaddingLevel = 3; + UNIT_ASSERT_STRINGS_EQUAL(prettifierMaxLevel3.Prettify("{g:{x:{a:{b:c,e:f},q:{x:y}},y:fff}}"), + "{\n" + " g : {\n" + " x : {\n" + " a : { b : c, e : f },\n" + " q : { x : y }\n" + " },\n" + " y : fff\n" + " }\n" + "}"); + } + + Y_UNIT_TEST(PrettifyJsonInvalid) { + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("}"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("}}"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{}}"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{}}}"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("]"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("]]"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[]]"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[]]]"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("[,,,]"), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::PrettifyJson("{,,,}"), ""); + } + + Y_UNIT_TEST(CompactifyJsonShort) { + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson(""), ""); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("null"), "null"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("true"), "true"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("false"), "false"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("1.5"), "1.5"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("test", true), "test"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("test", false), "\"test\""); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("[ ]"), "[]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("[\n \"a\"\n]", true), "[a]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("[\n 'a',\n 'b'\n]", true), "[a,b]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("[\n { },\n 'b'\n]", true), "[{},b]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("[\n 'a',\n { }\n]", true), "[a,{}]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("[\n { },\n { }\n]", true), "[{},{}]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("{ }"), "{}"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson("{\n 'k' : 'v'\n}", true), "{k:v}"); + } + + Y_UNIT_TEST(CompactifyJsonLong) { + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson( + "[\n" + " {\n" + " 'k' : 'v'\n" + " },\n" + " {\n" + " 'a' : 'b'\n" + " }\n" + "]", + true), + "[{k:v},{a:b}]"); + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson( + "{\n" + " 'k' : 'v',\n" + " 'a' : 'b',\n" + " 'x' : [\n" + " 1,\n" + " 2,\n" + " 3\n" + " ]\n" + "}", + true), + "{k:v,a:b,x:[1,2,3]}"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson( + "{\n" + " 'k' : 'v',\n" + " 'a' : 'b',\n" + " 'x' : [\n" + " 1,\n" + " {\n" + " 'f' : 'b'\n" + " },\n" + " 3\n" + " ],\n" + " 'm' : 'n'\n" + "}", + true), + "{k:v,a:b,x:[1,{f:b},3],m:n}"); + + UNIT_ASSERT_STRINGS_EQUAL(NJson::CompactifyJson( + "{\n" + " g : {\n" + " x : {\n" + " a : {\n" + " b : c,\n" + " e : f\n" + " },\n" + " q : {\n" + " x : y\n" + " }\n" + " },\n" + " y : fff\n" + " }\n" + "}", + true), + "{g:{x:{a:{b:c,e:f},q:{x:y}},y:fff}}"); + } +} diff --git a/library/cpp/json/ut/json_reader_fast_ut.cpp b/library/cpp/json/ut/json_reader_fast_ut.cpp new file mode 100644 index 0000000000..60dffc91c7 --- /dev/null +++ b/library/cpp/json/ut/json_reader_fast_ut.cpp @@ -0,0 +1,304 @@ +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_prettifier.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h> +#include <util/string/cast.h> +#include <util/string/printf.h> + +namespace NJson { + namespace NTest { + enum ETestEvent { + E_NO_EVENT = 0, + E_ERROR = 1, + E_DICT_OPEN, + E_DICT_CLOSE, + E_ARR_OPEN, + E_ARR_CLOSE, + E_NULL, + E_BOOL, + E_FLT, + E_INT, + E_LONG_LONG, + E_STR, + E_KEY + }; + + struct TEvent { + ETestEvent Type = E_NO_EVENT; + + i64 INum = 0; + double DNum = 0; + TString Str; + + TEvent(ETestEvent e = E_NO_EVENT) + : Type(e) + { + } + + TEvent(double v, ETestEvent e) + : Type(e) + , DNum(v) + { + } + + TEvent(i64 v, ETestEvent e) + : Type(e) + , INum(v) + { + } + + TEvent(TStringBuf t, ETestEvent e) + : Type(e) + , Str(NEscJ::EscapeJ<true, false>(t)) + { + } + + TString ToString() const { + switch (Type) { + default: + return "YOUFAILED"; + case E_ERROR: + return Sprintf("error: %s", Str.data()); + case E_DICT_OPEN: + return "{"; + case E_DICT_CLOSE: + return "}"; + case E_ARR_OPEN: + return "["; + case E_ARR_CLOSE: + return "]"; + case E_NULL: + return "null"; + case E_BOOL: + return INum ? "true" : "false"; + case E_INT: + return ::ToString(INum); + case E_FLT: + return ::ToString(DNum); + case E_STR: + return Sprintf("%s", Str.data()); + case E_KEY: + return Sprintf("key: %s", Str.data()); + } + } + }; + + using TEvents = TVector<TEvent>; + + struct TTestHandler : TJsonCallbacks { + TEvents Events; + + bool OnOpenMap() override { + Events.push_back(E_DICT_OPEN); + return true; + } + + bool OnCloseMap() override { + Events.push_back(E_DICT_CLOSE); + return true; + } + + bool OnOpenArray() override { + Events.push_back(E_ARR_OPEN); + return true; + } + + bool OnCloseArray() override { + Events.push_back(E_ARR_CLOSE); + return true; + } + + bool OnNull() override { + Events.push_back(E_NULL); + return true; + } + + bool OnBoolean(bool v) override { + Events.push_back(TEvent((i64)v, E_BOOL)); + return true; + } + + bool OnInteger(long long v) override { + Events.push_back(TEvent((i64)v, E_INT)); + return true; + } + + bool OnUInteger(unsigned long long v) override { + return OnInteger(v); + } + + bool OnDouble(double v) override { + Events.push_back(TEvent(v, E_FLT)); + return true; + } + + bool OnString(const TStringBuf& v) override { + Events.push_back(TEvent(v, E_STR)); + return true; + } + + bool OnMapKey(const TStringBuf& v) override { + Events.push_back(TEvent(v, E_KEY)); + return true; + } + + void OnError(size_t, TStringBuf token) override { + Events.push_back(TEvent(token, E_ERROR)); + } + + void Assert(const TEvents& e, TString str) { + try { + UNIT_ASSERT_VALUES_EQUAL_C(e.size(), Events.size(), str); + + for (ui32 i = 0, sz = e.size(); i < sz; ++i) { + UNIT_ASSERT_VALUES_EQUAL_C((int)e[i].Type, (int)Events[i].Type, Sprintf("'%s' %u", str.data(), i)); + UNIT_ASSERT_VALUES_EQUAL_C(e[i].INum, Events[i].INum, Sprintf("'%s' %u", str.data(), i)); + UNIT_ASSERT_VALUES_EQUAL_C(e[i].DNum, Events[i].DNum, Sprintf("'%s' %u", str.data(), i)); + UNIT_ASSERT_VALUES_EQUAL_C(e[i].Str, Events[i].Str, Sprintf("'%s' %u", str.data(), i)); + } + } catch (const yexception&) { + Clog << "Exception at '" << str << "'" << Endl; + for (const auto& event : Events) { + Clog << event.ToString() << Endl; + } + + throw; + } + } + }; + } +} + +class TFastJsonTest: public TTestBase { + UNIT_TEST_SUITE(TFastJsonTest) + UNIT_TEST(TestParse) + UNIT_TEST(TestReadJsonFastTree) + UNIT_TEST(TestNoInlineComment) + UNIT_TEST_SUITE_END(); + +public: + template <bool accept> + void DoTestParse(TStringBuf json, ui32 amount, ...) { + using namespace NJson::NTest; + TEvents evs; + va_list vl; + va_start(vl, amount); + for (ui32 i = 0; i < amount; i++) { + ETestEvent e = (ETestEvent)va_arg(vl, int); + + switch ((int)e) { + case E_NO_EVENT: + case E_DICT_OPEN: + case E_DICT_CLOSE: + case E_ARR_OPEN: + case E_ARR_CLOSE: + case E_NULL: + evs.push_back(e); + break; + case E_BOOL: { + bool v = va_arg(vl, int); + evs.push_back(TEvent((i64)v, E_BOOL)); + break; + } + case E_INT: { + i64 i = va_arg(vl, int); + evs.push_back(TEvent(i, E_INT)); + break; + } + case E_LONG_LONG: { + i64 i = va_arg(vl, long long); + evs.push_back(TEvent(i, E_INT)); + break; + } + case E_FLT: { + double f = va_arg(vl, double); + evs.push_back(TEvent(f, E_FLT)); + break; + } + case E_STR: { + const char* s = va_arg(vl, const char*); + evs.push_back(TEvent(TStringBuf(s), E_STR)); + break; + } + case E_KEY: + case E_ERROR: { + const char* s = va_arg(vl, const char*); + evs.push_back(TEvent(TStringBuf(s), e)); + break; + } + } + } + va_end(vl); + + TTestHandler h; + const bool res = ReadJsonFast(json, &h); + UNIT_ASSERT_VALUES_EQUAL_C(res, accept, Sprintf("%s (%s)", ToString(json).data(), h.Events.back().Str.data())); + h.Assert(evs, ToString(json)); + } + + void TestParse() { + using namespace NJson::NTest; + + DoTestParse<true>("", 0); + DoTestParse<true>(" \t \t ", 0); + DoTestParse<true>("a-b-c@аб_вгд909AБ", 1, E_STR, "a-b-c@аб_вгд909AБ"); + DoTestParse<true>("'я тестовая строка'", 1, E_STR, "я тестовая строка"); + DoTestParse<true>("\"я тестовая строка\"", 1, E_STR, "я тестовая строка"); + DoTestParse<true>("'\\xA\\xA\\xA'", 1, E_STR, "\n\n\n"); + DoTestParse<true>("12.15", 1, E_FLT, 12.15); + DoTestParse<true>("null", 1, E_NULL); + DoTestParse<true>("true", 1, E_BOOL, true); + DoTestParse<true>("false", 1, E_BOOL, false); + DoTestParse<true>("[]", 2, E_ARR_OPEN, E_ARR_CLOSE); + DoTestParse<true>("[ a ]", 3, E_ARR_OPEN, E_STR, "a", E_ARR_CLOSE); + DoTestParse<true>("[ a, b ]", 4, E_ARR_OPEN, E_STR, "a", E_STR, "b", E_ARR_CLOSE); + DoTestParse<true>("[a,b]", 4, E_ARR_OPEN, E_STR, "a", E_STR, "b", E_ARR_CLOSE); + DoTestParse<false>("[a,b][a,b]", 5, E_ARR_OPEN, E_STR, "a", E_STR, "b", E_ARR_CLOSE, E_ERROR, "invalid syntax at token: '['"); + DoTestParse<false>("[a,,b]", 3, E_ARR_OPEN, E_STR, "a", E_ERROR, "invalid syntax at token: ','"); + DoTestParse<true>("{ k : v }", 4, E_DICT_OPEN, E_KEY, "k", E_STR, "v", E_DICT_CLOSE); + DoTestParse<true>("{a:'\\b'/*comment*/, k /*comment*/\n : v }", 6, E_DICT_OPEN, E_KEY, "a", E_STR, "\b", E_KEY, "k", E_STR, "v", E_DICT_CLOSE); + DoTestParse<true>("{a:.15, k : v }", 6, E_DICT_OPEN, E_KEY, "a", E_FLT, .15, E_KEY, "k", E_STR, "v", E_DICT_CLOSE); + DoTestParse<true>("[ a, -.1e+5, 1E-7]", 5, E_ARR_OPEN, E_STR, "a", E_FLT, -.1e+5, E_FLT, 1e-7, E_ARR_CLOSE); + DoTestParse<true>("{}", 2, E_DICT_OPEN, E_DICT_CLOSE); + DoTestParse<true>("{ a : x, b : [ c, d, ] }", 9, E_DICT_OPEN, E_KEY, "a", E_STR, "x", E_KEY, "b", E_ARR_OPEN, E_STR, "c", E_STR, "d", E_ARR_CLOSE, E_DICT_CLOSE); + DoTestParse<false>("{ a : x, b : [ c, d,, ] }", 8, E_DICT_OPEN, E_KEY, "a", E_STR, "x", E_KEY, "b", E_ARR_OPEN, E_STR, "c", E_STR, "d", E_ERROR, "invalid syntax at token: ','"); + // DoTestParse<false>("{ a : x : y }", 4, E_DICT_OPEN + // , E_KEY, "a", E_STR, "x" + // , E_ERROR + // , ":"); + // DoTestParse<false>("{queries:{ref:[]},{nonref:[]}}", 8, E_DICT_OPEN + // , E_KEY, "queries", E_DICT_OPEN + // , E_KEY, "ref", E_ARR_OPEN, E_ARR_CLOSE + // , E_DICT_CLOSE, E_ERROR, ""); + DoTestParse<true>("'100x00'", 1, E_STR, "100x00"); + DoTestParse<true>("-1", 1, E_INT, -1); + DoTestParse<true>("-9223372036854775808", 1, E_LONG_LONG, (long long)Min<i64>()); + DoTestParse<false>("100x00", 1, E_ERROR, "invalid syntax at token: '100x'"); + DoTestParse<false>("100 200", 2, E_INT, 100, E_ERROR, "invalid syntax at token: '200'"); + DoTestParse<true>("{g:{x:{a:{b:c,e:f},q:{x:y}},y:fff}}", 22, E_DICT_OPEN, E_KEY, "g", E_DICT_OPEN, E_KEY, "x", E_DICT_OPEN, E_KEY, "a", E_DICT_OPEN, E_KEY, "b", E_STR, "c", E_KEY, "e", E_STR, "f", E_DICT_CLOSE, E_KEY, "q", E_DICT_OPEN, E_KEY, "x", E_STR, "y", E_DICT_CLOSE, E_DICT_CLOSE, E_KEY, "y", E_STR, "fff", E_DICT_CLOSE, E_DICT_CLOSE); + } + + void TestReadJsonFastTree() { + const TString json = R"( + { + "a": { + "b": {} + } + }} + )"; + NJson::TJsonValue value; + UNIT_ASSERT(!ReadJsonFastTree(json, &value)); + } + + void TestNoInlineComment() { + using namespace NJson::NTest; + DoTestParse<false>("{\"a\":1}//d{\"b\":2}", 5, E_DICT_OPEN, E_KEY, "a", E_INT, 1, E_DICT_CLOSE, E_ERROR, "invalid syntax at token: '/'"); + DoTestParse<false>("{\"a\":1}//d{\"b\":2}\n", 5, E_DICT_OPEN, E_KEY, "a", E_INT, 1, E_DICT_CLOSE, E_ERROR, "invalid syntax at token: '/'"); + DoTestParse<false>("{\"a\":{//d{\"b\":2}\n}}", 4, E_DICT_OPEN, E_KEY, "a", E_DICT_OPEN, E_ERROR, "invalid syntax at token: '/'"); + DoTestParse<false>("{\"a\":{//d{\"b\":2}}}\n", 4, E_DICT_OPEN, E_KEY, "a", E_DICT_OPEN, E_ERROR, "invalid syntax at token: '/'"); + DoTestParse<false>("{\"a\":{//d{\"b\":2}}}", 4, E_DICT_OPEN, E_KEY, "a", E_DICT_OPEN, E_ERROR, "invalid syntax at token: '/'"); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TFastJsonTest) diff --git a/library/cpp/json/ut/json_reader_ut.cpp b/library/cpp/json/ut/json_reader_ut.cpp new file mode 100644 index 0000000000..cd31afa0b8 --- /dev/null +++ b/library/cpp/json/ut/json_reader_ut.cpp @@ -0,0 +1,430 @@ +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_writer.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <util/stream/str.h> + +using namespace NJson; + +class TReformatCallbacks: public TJsonCallbacks { + TJsonWriter& Writer; + +public: + TReformatCallbacks(TJsonWriter& writer) + : Writer(writer) + { + } + + bool OnBoolean(bool val) override { + Writer.Write(val); + return true; + } + + bool OnInteger(long long val) override { + Writer.Write(val); + return true; + } + + bool OnUInteger(unsigned long long val) override { + Writer.Write(val); + return true; + } + + bool OnString(const TStringBuf& val) override { + Writer.Write(val); + return true; + } + + bool OnDouble(double val) override { + Writer.Write(val); + return true; + } + + bool OnOpenArray() override { + Writer.OpenArray(); + return true; + } + + bool OnCloseArray() override { + Writer.CloseArray(); + return true; + } + + bool OnOpenMap() override { + Writer.OpenArray(); + return true; + } + + bool OnCloseMap() override { + Writer.CloseArray(); + return true; + } + + bool OnMapKey(const TStringBuf& val) override { + Writer.Write(val); + return true; + } +}; + +Y_UNIT_TEST_SUITE(TJsonReaderTest) { + Y_UNIT_TEST(JsonReformatTest) { + TString data = "{\"null value\": null, \"intkey\": 10, \"double key\": 11.11, \"string key\": \"string\", \"array\": [1,2,3,\"TString\"], \"bool key\": true}"; + + TString result1, result2; + { + TStringStream in; + in << data; + TStringStream out; + TJsonWriter writer(&out, false); + TReformatCallbacks cb(writer); + ReadJson(&in, &cb); + writer.Flush(); + result1 = out.Str(); + } + + { + TStringStream in; + in << result1; + TStringStream out; + TJsonWriter writer(&out, false); + TReformatCallbacks cb(writer); + ReadJson(&in, &cb); + writer.Flush(); + result2 = out.Str(); + } + + UNIT_ASSERT_VALUES_EQUAL(result1, result2); + } + + Y_UNIT_TEST(TJsonEscapedApostrophe) { + TString jsonString = "{ \"foo\" : \"bar\\'buzz\" }"; + { + TStringStream in; + in << jsonString; + TStringStream out; + TJsonWriter writer(&out, false); + TReformatCallbacks cb(writer); + UNIT_ASSERT(!ReadJson(&in, &cb)); + } + + { + TStringStream in; + in << jsonString; + TStringStream out; + TJsonWriter writer(&out, false); + TReformatCallbacks cb(writer); + UNIT_ASSERT(ReadJson(&in, false, true, &cb)); + writer.Flush(); + UNIT_ASSERT_EQUAL(out.Str(), "[\"foo\",\"bar'buzz\"]"); + } + } + + Y_UNIT_TEST(TJsonTreeTest) { + TString data = "{\"intkey\": 10, \"double key\": 11.11, \"null value\":null, \"string key\": \"string\", \"array\": [1,2,3,\"TString\"], \"bool key\": true}"; + TStringStream in; + in << data; + TJsonValue value; + ReadJsonTree(&in, &value); + + UNIT_ASSERT_VALUES_EQUAL(value["intkey"].GetInteger(), 10); + UNIT_ASSERT_DOUBLES_EQUAL(value["double key"].GetDouble(), 11.11, 0.001); + UNIT_ASSERT_VALUES_EQUAL(value["bool key"].GetBoolean(), true); + UNIT_ASSERT_VALUES_EQUAL(value["absent string key"].GetString(), TString("")); + UNIT_ASSERT_VALUES_EQUAL(value["string key"].GetString(), TString("string")); + UNIT_ASSERT_VALUES_EQUAL(value["array"][0].GetInteger(), 1); + UNIT_ASSERT_VALUES_EQUAL(value["array"][1].GetInteger(), 2); + UNIT_ASSERT_VALUES_EQUAL(value["array"][2].GetInteger(), 3); + UNIT_ASSERT_VALUES_EQUAL(value["array"][3].GetString(), TString("TString")); + UNIT_ASSERT(value["null value"].IsNull()); + + // AsString + UNIT_ASSERT_VALUES_EQUAL(value["intkey"].GetStringRobust(), "10"); + UNIT_ASSERT_VALUES_EQUAL(value["double key"].GetStringRobust(), "11.11"); + UNIT_ASSERT_VALUES_EQUAL(value["bool key"].GetStringRobust(), "true"); + UNIT_ASSERT_VALUES_EQUAL(value["string key"].GetStringRobust(), "string"); + UNIT_ASSERT_VALUES_EQUAL(value["array"].GetStringRobust(), "[1,2,3,\"TString\"]"); + UNIT_ASSERT_VALUES_EQUAL(value["null value"].GetStringRobust(), "null"); + + const TJsonValue::TArray* array; + UNIT_ASSERT(GetArrayPointer(value, "array", &array)); + UNIT_ASSERT_VALUES_EQUAL(value["array"].GetArray().size(), array->size()); + UNIT_ASSERT_VALUES_EQUAL(value["array"][0].GetInteger(), (*array)[0].GetInteger()); + UNIT_ASSERT_VALUES_EQUAL(value["array"][1].GetInteger(), (*array)[1].GetInteger()); + UNIT_ASSERT_VALUES_EQUAL(value["array"][2].GetInteger(), (*array)[2].GetInteger()); + UNIT_ASSERT_VALUES_EQUAL(value["array"][3].GetString(), (*array)[3].GetString()); + } + + Y_UNIT_TEST(TJsonRomaTest) { + TString data = "{\"test\": [ {\"name\": \"A\"} ]}"; + + TStringStream in; + in << data; + TJsonValue value; + ReadJsonTree(&in, &value); + + UNIT_ASSERT_VALUES_EQUAL(value["test"][0]["name"].GetString(), TString("A")); + } + + Y_UNIT_TEST(TJsonReadTreeWithComments) { + { + TString leadingCommentData = "{ // \"test\" : 1 \n}"; + { + // No comments allowed + TStringStream in; + in << leadingCommentData; + TJsonValue value; + UNIT_ASSERT(!ReadJsonTree(&in, false, &value)); + } + + { + // Comments allowed + TStringStream in; + in << leadingCommentData; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, true, &value)); + UNIT_ASSERT(!value.Has("test")); + } + } + + { + TString trailingCommentData = "{ \"test1\" : 1 // \"test2\" : 2 \n }"; + { + // No comments allowed + TStringStream in; + in << trailingCommentData; + TJsonValue value; + UNIT_ASSERT(!ReadJsonTree(&in, false, &value)); + } + + { + // Comments allowed + TStringStream in; + in << trailingCommentData; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, true, &value)); + UNIT_ASSERT(value.Has("test1")); + UNIT_ASSERT_EQUAL(value["test1"].GetInteger(), 1); + UNIT_ASSERT(!value.Has("test2")); + } + } + } + + Y_UNIT_TEST(TJsonSignedIntegerTest) { + { + TStringStream in; + in << "{ \"test\" : " << Min<i64>() << " }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsInteger()); + UNIT_ASSERT(!value["test"].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"].GetInteger(), Min<i64>()); + UNIT_ASSERT_EQUAL(value["test"].GetIntegerRobust(), Min<i64>()); + } // Min<i64>() + + { + TStringStream in; + in << "{ \"test\" : " << Max<i64>() + 1ull << " }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(!value["test"].IsInteger()); + UNIT_ASSERT(value["test"].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"].GetIntegerRobust(), (i64)(Max<i64>() + 1ull)); + } // Max<i64>() + 1 + } + + Y_UNIT_TEST(TJsonUnsignedIntegerTest) { + { + TStringStream in; + in << "{ \"test\" : 1 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsInteger()); + UNIT_ASSERT(value["test"].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"].GetInteger(), 1); + UNIT_ASSERT_EQUAL(value["test"].GetIntegerRobust(), 1); + UNIT_ASSERT_EQUAL(value["test"].GetUInteger(), 1); + UNIT_ASSERT_EQUAL(value["test"].GetUIntegerRobust(), 1); + } // 1 + + { + TStringStream in; + in << "{ \"test\" : -1 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsInteger()); + UNIT_ASSERT(!value["test"].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"].GetInteger(), -1); + UNIT_ASSERT_EQUAL(value["test"].GetIntegerRobust(), -1); + UNIT_ASSERT_EQUAL(value["test"].GetUInteger(), 0); + UNIT_ASSERT_EQUAL(value["test"].GetUIntegerRobust(), static_cast<unsigned long long>(-1)); + } // -1 + + { + TStringStream in; + in << "{ \"test\" : 18446744073709551615 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(!value["test"].IsInteger()); + UNIT_ASSERT(value["test"].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"].GetInteger(), 0); + UNIT_ASSERT_EQUAL(value["test"].GetIntegerRobust(), static_cast<long long>(18446744073709551615ull)); + UNIT_ASSERT_EQUAL(value["test"].GetUInteger(), 18446744073709551615ull); + UNIT_ASSERT_EQUAL(value["test"].GetUIntegerRobust(), 18446744073709551615ull); + } // 18446744073709551615 + + { + TStringStream in; + in << "{ \"test\" : 1.1 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(!value["test"].IsInteger()); + UNIT_ASSERT(!value["test"].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"].GetInteger(), 0); + UNIT_ASSERT_EQUAL(value["test"].GetIntegerRobust(), static_cast<long long>(1.1)); + UNIT_ASSERT_EQUAL(value["test"].GetUInteger(), 0); + UNIT_ASSERT_EQUAL(value["test"].GetUIntegerRobust(), static_cast<unsigned long long>(1.1)); + } // 1.1 + + { + TStringStream in; + in << "{ \"test\" : [1, 18446744073709551615] }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsArray()); + UNIT_ASSERT_EQUAL(value["test"].GetArray().size(), 2); + UNIT_ASSERT(value["test"][0].IsInteger()); + UNIT_ASSERT(value["test"][0].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"][0].GetInteger(), 1); + UNIT_ASSERT_EQUAL(value["test"][0].GetUInteger(), 1); + UNIT_ASSERT(!value["test"][1].IsInteger()); + UNIT_ASSERT(value["test"][1].IsUInteger()); + UNIT_ASSERT_EQUAL(value["test"][1].GetUInteger(), 18446744073709551615ull); + } + } // TJsonUnsignedIntegerTest + + Y_UNIT_TEST(TJsonDoubleTest) { + { + TStringStream in; + in << "{ \"test\" : 1.0 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsDouble()); + UNIT_ASSERT_EQUAL(value["test"].GetDouble(), 1.0); + UNIT_ASSERT_EQUAL(value["test"].GetDoubleRobust(), 1.0); + } // 1.0 + + { + TStringStream in; + in << "{ \"test\" : 1 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsDouble()); + UNIT_ASSERT_EQUAL(value["test"].GetDouble(), 1.0); + UNIT_ASSERT_EQUAL(value["test"].GetDoubleRobust(), 1.0); + } // 1 + + { + TStringStream in; + in << "{ \"test\" : -1 }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(value["test"].IsDouble()); + UNIT_ASSERT_EQUAL(value["test"].GetDouble(), -1.0); + UNIT_ASSERT_EQUAL(value["test"].GetDoubleRobust(), -1.0); + } // -1 + + { + TStringStream in; + in << "{ \"test\" : " << Max<ui64>() << " }"; + TJsonValue value; + UNIT_ASSERT(ReadJsonTree(&in, &value)); + UNIT_ASSERT(value.Has("test")); + UNIT_ASSERT(!value["test"].IsDouble()); + UNIT_ASSERT_EQUAL(value["test"].GetDouble(), 0.0); + UNIT_ASSERT_EQUAL(value["test"].GetDoubleRobust(), static_cast<double>(Max<ui64>())); + } // Max<ui64>() + } // TJsonDoubleTest + + Y_UNIT_TEST(TJsonInvalidTest) { + { + // No exceptions mode. + TStringStream in; + in << "{ \"test\" : }"; + TJsonValue value; + UNIT_ASSERT(!ReadJsonTree(&in, &value)); + } + + { + // Exception throwing mode. + TStringStream in; + in << "{ \"test\" : }"; + TJsonValue value; + UNIT_ASSERT_EXCEPTION(ReadJsonTree(&in, &value, true), TJsonException); + } + } + + Y_UNIT_TEST(TJsonMemoryLeakTest) { + // after https://clubs.at.yandex-team.ru/stackoverflow/3691 + TString s = "."; + NJson::TJsonValue json; + try { + TStringInput in(s); + NJson::ReadJsonTree(&in, &json, true); + } catch (...) { + } + } // TJsonMemoryLeakTest + + Y_UNIT_TEST(TJsonDuplicateKeysWithNullValuesTest) { + const TString json = "{\"\":null,\"\":\"\"}"; + + TStringInput in(json); + NJson::TJsonValue v; + UNIT_ASSERT(ReadJsonTree(&in, &v)); + UNIT_ASSERT(v.IsMap()); + UNIT_ASSERT_VALUES_EQUAL(1, v.GetMap().size()); + UNIT_ASSERT_VALUES_EQUAL("", v.GetMap().begin()->first); + UNIT_ASSERT(v.GetMap().begin()->second.IsString()); + UNIT_ASSERT_VALUES_EQUAL("", v.GetMap().begin()->second.GetString()); + } +} + + +static const TString YANDEX_STREAMING_JSON("{\"a\":1}//d{\"b\":2}"); + + +Y_UNIT_TEST_SUITE(TCompareReadJsonFast) { + Y_UNIT_TEST(NoEndl) { + NJson::TJsonValue parsed; + + bool success = NJson::ReadJsonTree(YANDEX_STREAMING_JSON, &parsed, false); + bool fast_success = NJson::ReadJsonFastTree(YANDEX_STREAMING_JSON, &parsed, false); + UNIT_ASSERT(success == fast_success); + } + Y_UNIT_TEST(WithEndl) { + NJson::TJsonValue parsed1; + NJson::TJsonValue parsed2; + + bool success = NJson::ReadJsonTree(YANDEX_STREAMING_JSON + "\n", &parsed1, false); + bool fast_success = NJson::ReadJsonFastTree(YANDEX_STREAMING_JSON + "\n", &parsed2, false); + + UNIT_ASSERT_VALUES_EQUAL(success, fast_success); + } + Y_UNIT_TEST(NoQuotes) { + TString streamingJson = "{a:1}"; + NJson::TJsonValue parsed; + + bool success = NJson::ReadJsonTree(streamingJson, &parsed, false); + bool fast_success = NJson::ReadJsonFastTree(streamingJson, &parsed, false); + UNIT_ASSERT(success != fast_success); + } +} diff --git a/library/cpp/json/ut/json_saveload_ut.cpp b/library/cpp/json/ut/json_saveload_ut.cpp new file mode 100644 index 0000000000..b480a80fe4 --- /dev/null +++ b/library/cpp/json/ut/json_saveload_ut.cpp @@ -0,0 +1,36 @@ +#include <library/cpp/json/json_value.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <util/stream/buffer.h> +#include <util/generic/buffer.h> +#include <util/ysaveload.h> + +Y_UNIT_TEST_SUITE(JsonSaveLoad) { + Y_UNIT_TEST(Serialize) { + + NJson::TJsonValue expected; + + expected["ui64"] = ui64(1); + expected["i64"] = i64(2); + expected["double"] = 2.0; + expected["string"] = "text"; + expected["map"] = expected; + expected["array"].SetType(NJson::JSON_ARRAY).GetArraySafe().emplace_back(expected); + expected["null"].SetType(NJson::JSON_NULL); + expected["undefined"].SetType(NJson::JSON_UNDEFINED); + + TBuffer buffer; + { + TBufferOutput output(buffer); + ::Save(&output, expected); + } + + NJson::TJsonValue load; + { + TBufferInput input(buffer); + ::Load(&input, load); + } + + UNIT_ASSERT_EQUAL_C(expected, load, "expected: " << expected << ", got: " << load); + } +} diff --git a/library/cpp/json/ut/json_writer_ut.cpp b/library/cpp/json/ut/json_writer_ut.cpp new file mode 100644 index 0000000000..ca11d34dad --- /dev/null +++ b/library/cpp/json/ut/json_writer_ut.cpp @@ -0,0 +1,228 @@ +#include <library/cpp/json/json_writer.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/str.h> + +using namespace NJson; + +Y_UNIT_TEST_SUITE(TJsonWriterTest) { + Y_UNIT_TEST(SimpleWriteTest) { + TString expected1 = "{\"key1\":1,\"key2\":2,\"key3\":3"; + TString expected2 = expected1 + ",\"array\":[\"stroka\",false]"; + TString expected3 = expected2 + "}"; + + TStringStream out; + + TJsonWriter json(&out, false); + json.OpenMap(); + json.Write("key1", (ui16)1); + json.WriteKey("key2"); + json.Write((i32)2); + json.Write("key3", (ui64)3); + + UNIT_ASSERT(out.Empty()); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected1); + + json.Write("array"); + json.OpenArray(); + json.Write("stroka"); + json.Write(false); + json.CloseArray(); + + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected1); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected2); + + json.CloseMap(); + + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected2); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected3); + } + + Y_UNIT_TEST(SimpleWriteValueTest) { + TString expected = "{\"key1\":null,\"key2\":{\"subkey1\":[1,{\"subsubkey\":\"test2\"},null,true],\"subkey2\":\"test\"}}"; + TJsonValue v; + v["key1"] = JSON_NULL; + v["key2"]["subkey1"].AppendValue(1); + v["key2"]["subkey1"].AppendValue(JSON_MAP)["subsubkey"] = "test2"; + v["key2"]["subkey1"].AppendValue(JSON_NULL); + v["key2"]["subkey1"].AppendValue(true); + v["key2"]["subkey2"] = "test"; + TStringStream out; + WriteJson(&out, &v); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } + + Y_UNIT_TEST(FormatOutput) { + TString expected = "{\n \"key1\":null,\n \"key2\":\n {\n \"subkey1\":\n [\n 1,\n {\n \"subsubkey\":\"test2\"\n },\n null,\n true\n ],\n \"subkey2\":\"test\"\n }\n}"; + TJsonValue v; + v["key1"] = JSON_NULL; + v["key2"]["subkey1"].AppendValue(1); + v["key2"]["subkey1"].AppendValue(JSON_MAP)["subsubkey"] = "test2"; + v["key2"]["subkey1"].AppendValue(JSON_NULL); + v["key2"]["subkey1"].AppendValue(true); + v["key2"]["subkey2"] = "test"; + TStringStream out; + WriteJson(&out, &v, true); + UNIT_ASSERT_STRINGS_EQUAL(out.Str(), expected); + } + + Y_UNIT_TEST(SortKeys) { + TString expected = "{\"a\":null,\"j\":null,\"n\":null,\"y\":null,\"z\":null}"; + TJsonValue v; + v["z"] = JSON_NULL; + v["n"] = JSON_NULL; + v["a"] = JSON_NULL; + v["y"] = JSON_NULL; + v["j"] = JSON_NULL; + TStringStream out; + WriteJson(&out, &v, false, true); + UNIT_ASSERT_STRINGS_EQUAL(out.Str(), expected); + } + + Y_UNIT_TEST(SimpleUnsignedIntegerWriteTest) { + { + TString expected = "{\"test\":1}"; + TJsonValue v; + v.InsertValue("test", 1ull); + TStringStream out; + WriteJson(&out, &v); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } // 1 + + { + TString expected = "{\"test\":-1}"; + TJsonValue v; + v.InsertValue("test", -1); + TStringStream out; + WriteJson(&out, &v); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } // -1 + + { + TString expected = "{\"test\":18446744073709551615}"; + TJsonValue v; + v.InsertValue("test", 18446744073709551615ull); + TStringStream out; + WriteJson(&out, &v); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } // 18446744073709551615 + + { + TString expected = "{\"test\":[1,18446744073709551615]}"; + TJsonValue v; + v.InsertValue("test", TJsonValue()); + v["test"].AppendValue(1); + v["test"].AppendValue(18446744073709551615ull); + TStringStream out; + WriteJson(&out, &v); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } // 18446744073709551615 + } // SimpleUnsignedIntegerWriteTest + + Y_UNIT_TEST(WriteOptionalTest) { + { + TString expected = "{\"test\":1}"; + + TStringStream out; + + TJsonWriter json(&out, false); + json.OpenMap(); + json.WriteOptional("test", MakeMaybe<int>(1)); + json.CloseMap(); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } + + { + TString expected = "{}"; + + TStringStream out; + + TMaybe<int> nothing = Nothing(); + + TJsonWriter json(&out, false); + json.OpenMap(); + json.WriteOptional("test", nothing); + json.CloseMap(); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } + + { + TString expected = "{}"; + + TStringStream out; + + TMaybe<int> empty; + + TJsonWriter json(&out, false); + json.OpenMap(); + json.WriteOptional("test", empty); + json.CloseMap(); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } + + { + TString expected = "{}"; + + TStringStream out; + + TJsonWriter json(&out, false); + json.OpenMap(); + json.WriteOptional("test", Nothing()); + json.CloseMap(); + json.Flush(); + UNIT_ASSERT_VALUES_EQUAL(out.Str(), expected); + } + } + + Y_UNIT_TEST(Callback) { + NJsonWriter::TBuf json; + json.WriteString("A"); + UNIT_ASSERT_VALUES_EQUAL(json.Str(), "\"A\""); + UNIT_ASSERT_VALUES_EQUAL(WrapJsonToCallback(json, ""), "\"A\""); + UNIT_ASSERT_VALUES_EQUAL(WrapJsonToCallback(json, "Foo"), "Foo(\"A\")"); + } + + Y_UNIT_TEST(FloatPrecision) { + const double value = 1517933989.4242; + const NJson::TJsonValue json(value); + NJson::TJsonWriterConfig config; + { + TString expected = "1517933989"; + TString actual = NJson::WriteJson(json); + UNIT_ASSERT_VALUES_EQUAL(actual, expected); + } + { + TString expected = "1517933989"; + + TStringStream ss; + NJson::WriteJson(&ss, &json, config); + TString actual = ss.Str(); + UNIT_ASSERT_VALUES_EQUAL(actual, expected); + } + { + config.DoubleNDigits = 13; + TString expected = "1517933989.424"; + + TStringStream ss; + NJson::WriteJson(&ss, &json, config); + TString actual = ss.Str(); + UNIT_ASSERT_VALUES_EQUAL(actual, expected); + } + { + config.DoubleNDigits = 6; + config.FloatToStringMode = PREC_POINT_DIGITS; + TString expected = "1517933989.424200"; + + TStringStream ss; + NJson::WriteJson(&ss, &json, config); + TString actual = ss.Str(); + UNIT_ASSERT_VALUES_EQUAL(actual, expected); + } + } +} diff --git a/library/cpp/json/ut/ya.make b/library/cpp/json/ut/ya.make new file mode 100644 index 0000000000..8e0362d84b --- /dev/null +++ b/library/cpp/json/ut/ya.make @@ -0,0 +1,17 @@ +OWNER(velavokr) + +UNITTEST_FOR(library/cpp/json) + +PEERDIR( + library/cpp/string_utils/relaxed_escaper +) + +SRCS( + json_reader_fast_ut.cpp + json_reader_ut.cpp + json_prettifier_ut.cpp + json_writer_ut.cpp + json_saveload_ut.cpp +) + +END() diff --git a/library/cpp/json/writer/README b/library/cpp/json/writer/README new file mode 100644 index 0000000000..a20489f32e --- /dev/null +++ b/library/cpp/json/writer/README @@ -0,0 +1,23 @@ +JSON writer with no external dependencies, producing output +where HTML special characters are always escaped. + +Use it like this: + + #include <library/cpp/json/writer/json.h> + ... + + NJsonWriter::TBuf json; + json.BeginList() + .WriteString("<script>") + .EndList(); + Cout << json.Str(); // output: ["\u003Cscript\u003E"] + +For compatibility with legacy formats where object keys +are not quoted, use CompatWriteKeyWithoutQuotes: + + NJsonWriter::TBuf json; + json.BeginObject() + .CompatWriteKeyWithoutQuotes("r").WriteInt(1) + .CompatWriteKeyWithoutQuotes("n").WriteInt(0) + .EndObject(); + Cout << json.Str(); // output: {r:1,n:0} diff --git a/library/cpp/json/writer/json.cpp b/library/cpp/json/writer/json.cpp new file mode 100644 index 0000000000..02370c2d79 --- /dev/null +++ b/library/cpp/json/writer/json.cpp @@ -0,0 +1,517 @@ +#include "json.h" + +#include <library/cpp/json/json_value.h> + +#include <util/string/cast.h> +#include <util/string/strspn.h> +#include <util/generic/algorithm.h> +#include <util/generic/ymath.h> +#include <util/generic/singleton.h> + +namespace NJsonWriter { + TBuf::TBuf(EHtmlEscapeMode mode, IOutputStream* stream) + : Stream(stream) + , NeedComma(false) + , NeedNewline(false) + , EscapeMode(mode) + , IndentSpaces(0) + , WriteNanAsString(false) + { + Y_ASSERT(mode == HEM_DONT_ESCAPE_HTML || + mode == HEM_ESCAPE_HTML || + mode == HEM_RELAXED || + mode == HEM_UNSAFE); + if (!Stream) { + StringStream.Reset(new TStringStream); + Stream = StringStream.Get(); + } + + Stack.reserve(64); // should be enough for most cases + StackPush(JE_OUTER_SPACE); + } + + static const char* EntityToStr(EJsonEntity e) { + switch (e) { + case JE_OUTER_SPACE: + return "JE_OUTER_SPACE"; + case JE_LIST: + return "JE_LIST"; + case JE_OBJECT: + return "JE_OBJECT"; + case JE_PAIR: + return "JE_PAIR"; + default: + return "JE_unknown"; + } + } + + inline void TBuf::StackPush(EJsonEntity e) { + Stack.push_back(e); + } + + inline EJsonEntity TBuf::StackTop() const { + return Stack.back(); + } + + inline void TBuf::StackPop() { + Y_ASSERT(!Stack.empty()); + const EJsonEntity current = StackTop(); + Stack.pop_back(); + switch (current) { + case JE_OUTER_SPACE: + ythrow TError() << "JSON writer: stack empty"; + case JE_LIST: + PrintIndentation(true); + RawWriteChar(']'); + break; + case JE_OBJECT: + PrintIndentation(true); + RawWriteChar('}'); + break; + case JE_PAIR: + break; + } + NeedComma = true; + NeedNewline = true; + } + + inline void TBuf::CheckAndPop(EJsonEntity e) { + if (Y_UNLIKELY(StackTop() != e)) { + ythrow TError() << "JSON writer: unexpected value " + << EntityToStr(StackTop()) << " on the stack"; + } + StackPop(); + } + + void TBuf::PrintIndentation(bool closing) { + if (!IndentSpaces) + return; + const int indentation = IndentSpaces * (Stack.size() - 1); + if (!indentation && !closing) + return; + + PrintWhitespaces(Max(0, indentation), true); + } + + void TBuf::PrintWhitespaces(size_t count, bool prependWithNewLine) { + static constexpr TStringBuf whitespacesTemplate = "\n "; + static_assert(whitespacesTemplate[0] == '\n'); + static_assert(whitespacesTemplate[1] == ' '); + + count += (prependWithNewLine); + do { + const TStringBuf buffer = whitespacesTemplate.SubString(prependWithNewLine ? 0 : 1, count); + count -= buffer.size(); + UnsafeWriteRawBytes(buffer); + prependWithNewLine = false; // skip '\n' in subsequent writes + } while (count > 0); + } + + inline void TBuf::WriteComma() { + if (NeedComma) { + RawWriteChar(','); + } + NeedComma = true; + + if (NeedNewline) { + PrintIndentation(false); + } + NeedNewline = true; + } + + inline void TBuf::BeginValue() { + if (Y_UNLIKELY(KeyExpected())) { + ythrow TError() << "JSON writer: value written, " + "but expected a key:value pair"; + } + WriteComma(); + } + + inline void TBuf::BeginKey() { + if (Y_UNLIKELY(!KeyExpected())) { + ythrow TError() << "JSON writer: key written outside of an object"; + } + WriteComma(); + StackPush(JE_PAIR); + NeedComma = false; + NeedNewline = false; + } + + inline void TBuf::EndValue() { + if (StackTop() == JE_PAIR) { + StackPop(); + } + } + + TValueContext TBuf::BeginList() { + NeedNewline = true; + BeginValue(); + RawWriteChar('['); + StackPush(JE_LIST); + NeedComma = false; + return TValueContext(*this); + } + + TPairContext TBuf::BeginObject() { + NeedNewline = true; + BeginValue(); + RawWriteChar('{'); + StackPush(JE_OBJECT); + NeedComma = false; + return TPairContext(*this); + } + + TAfterColonContext TBuf::UnsafeWriteKey(const TStringBuf& s) { + BeginKey(); + RawWriteChar('"'); + UnsafeWriteRawBytes(s); + UnsafeWriteRawBytes("\":", 2); + return TAfterColonContext(*this); + } + + TAfterColonContext TBuf::WriteKey(const TStringBuf& s) { + // use the default escaping mode for this object + return WriteKey(s, EscapeMode); + } + + TAfterColonContext TBuf::WriteKey(const TStringBuf& s, EHtmlEscapeMode hem) { + BeginKey(); + WriteBareString(s, hem); + RawWriteChar(':'); + return TAfterColonContext(*this); + } + + TAfterColonContext TBuf::CompatWriteKeyWithoutQuotes(const TStringBuf& s) { + BeginKey(); + Y_ASSERT(AllOf(s, [](char x) { return 'a' <= x && x <= 'z'; })); + UnsafeWriteRawBytes(s); + RawWriteChar(':'); + return TAfterColonContext(*this); + } + + TBuf& TBuf::EndList() { + CheckAndPop(JE_LIST); + EndValue(); + return *this; + } + + TBuf& TBuf::EndObject() { + CheckAndPop(JE_OBJECT); + EndValue(); + return *this; + } + + TValueContext TBuf::WriteString(const TStringBuf& s) { + // use the default escaping mode for this object + return WriteString(s, EscapeMode); + } + + TValueContext TBuf::WriteString(const TStringBuf& s, EHtmlEscapeMode hem) { + BeginValue(); + WriteBareString(s, hem); + EndValue(); + return TValueContext(*this); + } + + TValueContext TBuf::WriteNull() { + UnsafeWriteValue(TStringBuf("null")); + return TValueContext(*this); + } + + TValueContext TBuf::WriteBool(bool b) { + constexpr TStringBuf trueVal = "true"; + constexpr TStringBuf falseVal = "false"; + UnsafeWriteValue(b ? trueVal : falseVal); + return TValueContext(*this); + } + + TValueContext TBuf::WriteInt(int i) { + char buf[22]; // enough to hold any 64-bit number + size_t len = ToString(i, buf, sizeof(buf)); + UnsafeWriteValue(buf, len); + return TValueContext(*this); + } + + TValueContext TBuf::WriteLongLong(long long i) { + static_assert(sizeof(long long) <= 8, "expect sizeof(long long) <= 8"); + char buf[22]; // enough to hold any 64-bit number + size_t len = ToString(i, buf, sizeof(buf)); + UnsafeWriteValue(buf, len); + return TValueContext(*this); + } + + TValueContext TBuf::WriteULongLong(unsigned long long i) { + char buf[22]; // enough to hold any 64-bit number + size_t len = ToString(i, buf, sizeof(buf)); + UnsafeWriteValue(buf, len); + return TValueContext(*this); + } + + template <class TFloat> + TValueContext TBuf::WriteFloatImpl(TFloat f, EFloatToStringMode mode, int ndigits) { + char buf[512]; // enough to hold most floats, the same buffer is used in FloatToString implementation + if (Y_UNLIKELY(!IsValidFloat(f))) { + if (WriteNanAsString) { + const size_t size = FloatToString(f, buf, Y_ARRAY_SIZE(buf)); + WriteString(TStringBuf(buf, size)); + return TValueContext(*this); + } else { + ythrow TError() << "JSON writer: invalid float value: " << FloatToString(f); + } + } + size_t len = FloatToString(f, buf, Y_ARRAY_SIZE(buf), mode, ndigits); + UnsafeWriteValue(buf, len); + return TValueContext(*this); + } + + TValueContext TBuf::WriteFloat(float f, EFloatToStringMode mode, int ndigits) { + return WriteFloatImpl(f, mode, ndigits); + } + + TValueContext TBuf::WriteDouble(double f, EFloatToStringMode mode, int ndigits) { + return WriteFloatImpl(f, mode, ndigits); + } + + namespace { + struct TFinder: public TCompactStrSpn { + inline TFinder() + : TCompactStrSpn("\xe2\\\"\b\n\f\r\t<>&\'/") + { + for (ui8 ch = 0; ch < 0x20; ++ch) { + Set(ch); + } + } + }; + } + + inline void TBuf::WriteBareString(const TStringBuf s, EHtmlEscapeMode hem) { + RawWriteChar('"'); + const auto& specialChars = *Singleton<TFinder>(); + const char* b = s.begin(); + const char* e = s.end(); + const char* i = b; + while ((i = specialChars.FindFirstOf(i, e)) != e) { + // U+2028 (line separator) and U+2029 (paragraph separator) are valid string + // contents in JSON, but are treated as line breaks in JavaScript, breaking JSONP. + // In UTF-8, U+2028 is "\xe2\x80\xa8" and U+2029 is "\xe2\x80\xa9". + if (Y_UNLIKELY(e - i >= 3 && i[0] == '\xe2' && i[1] == '\x80' && (i[2] | 1) == '\xa9')) { + UnsafeWriteRawBytes(b, i - b); + UnsafeWriteRawBytes(i[2] == '\xa9' ? "\\u2029" : "\\u2028", 6); + b = i = i + 3; + } else if (EscapedWriteChar(b, i, hem)) { + b = ++i; + } else { + ++i; + } + } + UnsafeWriteRawBytes(b, e - b); + RawWriteChar('"'); + } + + inline void TBuf::RawWriteChar(char c) { + Stream->Write(c); + } + + void TBuf::WriteHexEscape(unsigned char c) { + Y_ASSERT(c < 0x80); + UnsafeWriteRawBytes("\\u00", 4); + static const char hexDigits[] = "0123456789ABCDEF"; + RawWriteChar(hexDigits[(c & 0xf0) >> 4]); + RawWriteChar(hexDigits[(c & 0x0f)]); + } + +#define MATCH(sym, string) \ + case sym: \ + UnsafeWriteRawBytes(beg, cur - beg); \ + UnsafeWriteRawBytes(TStringBuf(string)); \ + return true + + inline bool TBuf::EscapedWriteChar(const char* beg, const char* cur, EHtmlEscapeMode hem) { + unsigned char c = *cur; + if (hem == HEM_ESCAPE_HTML) { + switch (c) { + MATCH('"', """); + MATCH('\'', "'"); + MATCH('<', "<"); + MATCH('>', ">"); + MATCH('&', "&"); + } + //for other characters, we fall through to the non-HTML-escaped part + } + + if (hem == HEM_RELAXED && c == '/') + return false; + + if (hem != HEM_UNSAFE) { + switch (c) { + case '/': + UnsafeWriteRawBytes(beg, cur - beg); + UnsafeWriteRawBytes("\\/", 2); + return true; + case '<': + case '>': + case '\'': + UnsafeWriteRawBytes(beg, cur - beg); + WriteHexEscape(c); + return true; + } + // for other characters, fall through to the non-escaped part + } + + switch (c) { + MATCH('"', "\\\""); + MATCH('\\', "\\\\"); + MATCH('\b', "\\b"); + MATCH('\f', "\\f"); + MATCH('\n', "\\n"); + MATCH('\r', "\\r"); + MATCH('\t', "\\t"); + } + if (c < 0x20) { + UnsafeWriteRawBytes(beg, cur - beg); + WriteHexEscape(c); + return true; + } + + return false; + } + +#undef MATCH + + static bool LessStrPtr(const TString* a, const TString* b) { + return *a < *b; + } + + TValueContext TBuf::WriteJsonValue(const NJson::TJsonValue* v, bool sortKeys, EFloatToStringMode mode, int ndigits) { + using namespace NJson; + switch (v->GetType()) { + default: + case JSON_NULL: + WriteNull(); + break; + case JSON_BOOLEAN: + WriteBool(v->GetBoolean()); + break; + case JSON_DOUBLE: + WriteDouble(v->GetDouble(), mode, ndigits); + break; + case JSON_INTEGER: + WriteLongLong(v->GetInteger()); + break; + case JSON_UINTEGER: + WriteULongLong(v->GetUInteger()); + break; + case JSON_STRING: + WriteString(v->GetString()); + break; + case JSON_ARRAY: { + BeginList(); + const TJsonValue::TArray& arr = v->GetArray(); + for (const auto& it : arr) + WriteJsonValue(&it, sortKeys, mode, ndigits); + EndList(); + break; + } + case JSON_MAP: { + BeginObject(); + const TJsonValue::TMapType& map = v->GetMap(); + if (sortKeys) { + const size_t oldsz = Keys.size(); + Keys.reserve(map.size() + oldsz); + for (const auto& it : map) { + Keys.push_back(&(it.first)); + } + Sort(Keys.begin() + oldsz, Keys.end(), LessStrPtr); + for (size_t i = oldsz, sz = Keys.size(); i < sz; ++i) { + TJsonValue::TMapType::const_iterator kv = map.find(*Keys[i]); + WriteKey(kv->first); + WriteJsonValue(&kv->second, sortKeys, mode, ndigits); + } + Keys.resize(oldsz); + } else { + for (const auto& it : map) { + WriteKey(it.first); + WriteJsonValue(&it.second, sortKeys, mode, ndigits); + } + } + EndObject(); + break; + } + } + return TValueContext(*this); + } + + TPairContext TBuf::UnsafeWritePair(const TStringBuf& s) { + if (Y_UNLIKELY(StackTop() != JE_OBJECT)) { + ythrow TError() << "JSON writer: key:value pair written outside of an object"; + } + WriteComma(); + UnsafeWriteRawBytes(s); + return TPairContext(*this); + } + + void TBuf::UnsafeWriteValue(const TStringBuf& s) { + BeginValue(); + UnsafeWriteRawBytes(s); + EndValue(); + } + + void TBuf::UnsafeWriteValue(const char* s, size_t len) { + BeginValue(); + UnsafeWriteRawBytes(s, len); + EndValue(); + } + + void TBuf::UnsafeWriteRawBytes(const char* src, size_t len) { + Stream->Write(src, len); + } + + void TBuf::UnsafeWriteRawBytes(const TStringBuf& s) { + UnsafeWriteRawBytes(s.data(), s.size()); + } + + const TString& TBuf::Str() const { + if (!StringStream) { + ythrow TError() << "JSON writer: Str() called " + "but writing to an external stream"; + } + if (!(Stack.size() == 1 && StackTop() == JE_OUTER_SPACE)) { + ythrow TError() << "JSON writer: incomplete object converted to string"; + } + return StringStream->Str(); + } + + void TBuf::FlushTo(IOutputStream* stream) { + if (!StringStream) { + ythrow TError() << "JSON writer: FlushTo() called " + "but writing to an external stream"; + } + stream->Write(StringStream->Str()); + StringStream->Clear(); + } + + TString WrapJsonToCallback(const TBuf& buf, TStringBuf callback) { + if (!callback) { + return buf.Str(); + } else { + return TString::Join(callback, "(", buf.Str(), ")"); + } + } + + TBufState TBuf::State() const { + return TBufState{NeedComma, NeedNewline, Stack}; + } + + void TBuf::Reset(const TBufState& from) { + NeedComma = from.NeedComma; + NeedNewline = from.NeedNewline; + Stack = from.Stack; + } + + void TBuf::Reset(TBufState&& from) { + NeedComma = from.NeedComma; + NeedNewline = from.NeedNewline; + Stack.swap(from.Stack); + } + +} diff --git a/library/cpp/json/writer/json.h b/library/cpp/json/writer/json.h new file mode 100644 index 0000000000..0aae2531b9 --- /dev/null +++ b/library/cpp/json/writer/json.h @@ -0,0 +1,289 @@ +#pragma once + +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> +#include <util/stream/str.h> +#include <util/string/cast.h> + +namespace NJson { + class TJsonValue; +} + +namespace NJsonWriter { + enum EJsonEntity : ui8 { + JE_OUTER_SPACE = 1, + JE_LIST, + JE_OBJECT, + JE_PAIR, + }; + + enum EHtmlEscapeMode { + HEM_ESCAPE_HTML = 1, // Use HTML escaping: < > & \/ + HEM_DONT_ESCAPE_HTML, // Use JSON escaping: \u003C \u003E \u0026 \/ + HEM_RELAXED, // Use JSON escaping: \u003C \u003E \u0026 / + HEM_UNSAFE, // Turn escaping off: < > & / + }; + + class TError: public yexception {}; + + class TValueContext; + class TPairContext; + class TAfterColonContext; + + struct TBufState { + bool NeedComma; + bool NeedNewline; + TVector<EJsonEntity> Stack; + }; + + class TBuf : TNonCopyable { + public: + TBuf(EHtmlEscapeMode mode = HEM_DONT_ESCAPE_HTML, IOutputStream* stream = nullptr); + + TValueContext WriteString(const TStringBuf& s, EHtmlEscapeMode hem); + TValueContext WriteString(const TStringBuf& s); + TValueContext WriteInt(int i); + TValueContext WriteLongLong(long long i); + TValueContext WriteULongLong(unsigned long long i); + TValueContext WriteFloat(float f, EFloatToStringMode mode = PREC_NDIGITS, int ndigits = 6); + TValueContext WriteDouble(double f, EFloatToStringMode mode = PREC_NDIGITS, int ndigits = 10); + TValueContext WriteBool(bool b); + TValueContext WriteNull(); + TValueContext WriteJsonValue(const NJson::TJsonValue* value, bool sortKeys = false, EFloatToStringMode mode = PREC_NDIGITS, int ndigits = 10); + + TValueContext BeginList(); + TBuf& EndList(); + + TPairContext BeginObject(); + TAfterColonContext WriteKey(const TStringBuf& key, EHtmlEscapeMode hem); + TAfterColonContext WriteKey(const TStringBuf& key); + TAfterColonContext UnsafeWriteKey(const TStringBuf& key); + bool KeyExpected() const { + return Stack.back() == JE_OBJECT; + } + + //! deprecated, do not use in new code + TAfterColonContext CompatWriteKeyWithoutQuotes(const TStringBuf& key); + + TBuf& EndObject(); + + /*** Indent the resulting JSON with spaces. + * By default (spaces==0) no formatting is done. */ + TBuf& SetIndentSpaces(int spaces) { + IndentSpaces = spaces; + return *this; + } + + /*** NaN and Inf are not valid json values, + * so if WriteNanAsString is set, writer would write string + * intead of throwing exception (default case) */ + TBuf& SetWriteNanAsString(bool writeNanAsString = true) { + WriteNanAsString = writeNanAsString; + return *this; + } + + /*** Return the string formed in the internal TStringStream. + * You may only call it if the `stream' parameter was NULL + * at construction time. */ + const TString& Str() const; + + /*** Dump and forget the string constructed so far. + * You may only call it if the `stream' parameter was NULL + * at construction time. */ + void FlushTo(IOutputStream* stream); + + /*** Write a literal string that represents a JSON value + * (string, number, object, array, bool, or null). + * + * Example: + * j.UnsafeWriteValue("[1, 2, 3, \"o'clock\", 4, \"o'clock rock\"]"); + * + * As in all of the Unsafe* functions, no escaping is done. */ + void UnsafeWriteValue(const TStringBuf& s); + void UnsafeWriteValue(const char* s, size_t len); + + /*** When in the context of an object, write a literal string + * that represents a key:value pair (or several pairs). + * + * Example: + * j.BeginObject(); + * j.UnsafeWritePair("\"adam\": \"male\", \"eve\": \"female\""); + * j.EndObject(); + * + * As in all of the Unsafe* functions, no escaping is done. */ + TPairContext UnsafeWritePair(const TStringBuf& s); + + /*** Copy the supplied string directly into the output stream. */ + void UnsafeWriteRawBytes(const TStringBuf& s); + void UnsafeWriteRawBytes(const char* c, size_t len); + + TBufState State() const; + void Reset(const TBufState& from); + void Reset(TBufState&& from); + + private: + void BeginValue(); + void EndValue(); + void BeginKey(); + void RawWriteChar(char c); + bool EscapedWriteChar(const char* b, const char* c, EHtmlEscapeMode hem); + void WriteBareString(const TStringBuf s, EHtmlEscapeMode hem); + void WriteComma(); + void PrintIndentation(bool closing); + void PrintWhitespaces(size_t count, bool prependWithNewLine); + void WriteHexEscape(unsigned char c); + + void StackPush(EJsonEntity e); + void StackPop(); + void CheckAndPop(EJsonEntity e); + EJsonEntity StackTop() const; + + template <class TFloat> + TValueContext WriteFloatImpl(TFloat f, EFloatToStringMode mode, int ndigits); + + private: + IOutputStream* Stream; + THolder<TStringStream> StringStream; + typedef TVector<const TString*> TKeys; + TKeys Keys; + + TVector<EJsonEntity> Stack; + bool NeedComma; + bool NeedNewline; + const EHtmlEscapeMode EscapeMode; + int IndentSpaces; + bool WriteNanAsString; + }; + + // Please don't try to instantiate the classes declared below this point. + + template <typename TOutContext> + class TValueWriter { + public: + TOutContext WriteNull(); + TOutContext WriteString(const TStringBuf&); + TOutContext WriteString(const TStringBuf& s, EHtmlEscapeMode hem); + TOutContext WriteInt(int); + TOutContext WriteLongLong(long long); + TOutContext WriteULongLong(unsigned long long); + TOutContext WriteBool(bool); + TOutContext WriteFloat(float); + TOutContext WriteFloat(float, EFloatToStringMode, int ndigits); + TOutContext WriteDouble(double); + TOutContext WriteDouble(double, EFloatToStringMode, int ndigits); + TOutContext WriteJsonValue(const NJson::TJsonValue* value, bool sortKeys = false); + TOutContext UnsafeWriteValue(const TStringBuf&); + + TValueContext BeginList(); + TPairContext BeginObject(); + + protected: + TValueWriter(TBuf& buf) + : Buf(buf) + { + } + friend class TBuf; + + protected: + TBuf& Buf; + }; + + class TValueContext: public TValueWriter<TValueContext> { + public: + TBuf& EndList() { + return Buf.EndList(); + } + TString Str() const { + return Buf.Str(); + } + + private: + TValueContext(TBuf& buf) + : TValueWriter<TValueContext>(buf) + { + } + friend class TBuf; + friend class TValueWriter<TValueContext>; + }; + + class TAfterColonContext: public TValueWriter<TPairContext> { + private: + TAfterColonContext(TBuf& iBuf) + : TValueWriter<TPairContext>(iBuf) + { + } + friend class TBuf; + friend class TPairContext; + }; + + class TPairContext { + public: + TAfterColonContext WriteKey(const TStringBuf& s, EHtmlEscapeMode hem) { + return Buf.WriteKey(s, hem); + } + TAfterColonContext WriteKey(const TStringBuf& s) { + return Buf.WriteKey(s); + } + TAfterColonContext UnsafeWriteKey(const TStringBuf& s) { + return Buf.UnsafeWriteKey(s); + } + TAfterColonContext CompatWriteKeyWithoutQuotes(const TStringBuf& s) { + return Buf.CompatWriteKeyWithoutQuotes(s); + } + TPairContext UnsafeWritePair(const TStringBuf& s) { + return Buf.UnsafeWritePair(s); + } + TBuf& EndObject() { + return Buf.EndObject(); + } + + private: + TPairContext(TBuf& buf) + : Buf(buf) + { + } + + friend class TBuf; + friend class TValueWriter<TPairContext>; + + private: + TBuf& Buf; + }; + +#define JSON_VALUE_WRITER_WRAP(function, params, args) \ + template <typename TOutContext> \ + TOutContext TValueWriter<TOutContext>::function params { \ + Buf.function args; \ + return TOutContext(Buf); \ + } + + JSON_VALUE_WRITER_WRAP(WriteNull, (), ()) + JSON_VALUE_WRITER_WRAP(WriteString, (const TStringBuf& arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteString, (const TStringBuf& s, EHtmlEscapeMode hem), (s, hem)) + JSON_VALUE_WRITER_WRAP(WriteInt, (int arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteLongLong, (long long arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteULongLong, (unsigned long long arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteBool, (bool arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteFloat, (float arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteFloat, (float arg, EFloatToStringMode mode, int ndigits), (arg, mode, ndigits)) + JSON_VALUE_WRITER_WRAP(WriteDouble, (double arg), (arg)) + JSON_VALUE_WRITER_WRAP(WriteDouble, (double arg, EFloatToStringMode mode, int ndigits), (arg, mode, ndigits)) + JSON_VALUE_WRITER_WRAP(WriteJsonValue, (const NJson::TJsonValue* value, bool sortKeys), (value, sortKeys)) + JSON_VALUE_WRITER_WRAP(UnsafeWriteValue, (const TStringBuf& arg), (arg)) +#undef JSON_VALUE_WRITER_WRAP + + template <typename TOutContext> + TValueContext TValueWriter<TOutContext>::BeginList() { + return Buf.BeginList(); + } + + template <typename TOutContext> + TPairContext TValueWriter<TOutContext>::BeginObject() { + return Buf.BeginObject(); + } + + TString WrapJsonToCallback(const TBuf& buf, TStringBuf callback); +} diff --git a/library/cpp/json/writer/json_ut.cpp b/library/cpp/json/writer/json_ut.cpp new file mode 100644 index 0000000000..9980555683 --- /dev/null +++ b/library/cpp/json/writer/json_ut.cpp @@ -0,0 +1,307 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <util/system/sanitizers.h> + +#include "json.h" +#include <library/cpp/json/json_value.h> + +#include <limits> + +Y_UNIT_TEST_SUITE(JsonWriter) { + Y_UNIT_TEST(Struct) { + NJsonWriter::TBuf w; + w.BeginList(); + w.BeginObject() + .WriteKey("key") + .WriteString("value") + .UnsafeWritePair("\"xk\":13") + .WriteKey("key2") + .BeginList() + .BeginObject() + .EndObject() + .BeginObject() + .EndObject() + .EndList() + .EndObject(); + w.WriteInt(43); + w.UnsafeWriteValue("\"x\""); + w.WriteString("..."); + w.EndList(); + const char* exp = "[{\"key\":\"value\",\"xk\":13,\"key2\":[{},{}]},43,\"x\",\"...\"]"; + UNIT_ASSERT_EQUAL(w.Str(), exp); + } + Y_UNIT_TEST(EscapedString) { + NJsonWriter::TBuf w(NJsonWriter::HEM_ESCAPE_HTML); + w.WriteString(" \n \r \t \007 \b \f ' <tag> &ent; \"txt\" "); + TString ws = w.Str(); + const char* exp = "\" \\n \\r \\t \\u0007 \\b \\f ' <tag> &ent; "txt" \""; + UNIT_ASSERT_STRINGS_EQUAL(ws.c_str(), exp); + } + Y_UNIT_TEST(UnescapedString) { + NJsonWriter::TBuf w; + w.WriteString(" \n \r \t \b \f '; -- <tag> &ent; \"txt\"", NJsonWriter::HEM_DONT_ESCAPE_HTML); + TString ws = w.Str(); + const char* exp = "\" \\n \\r \\t \\b \\f \\u0027; -- \\u003Ctag\\u003E &ent; \\\"txt\\\"\""; + UNIT_ASSERT_STRINGS_EQUAL(ws.c_str(), exp); + } + Y_UNIT_TEST(UnescapedChaining) { + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML); + w.UnsafeWriteRawBytes("(", 1); + w.BeginList().WriteString("<>&'\\").BeginList(); + w.EndList().EndList(); + TString ws = w.Str(); + const char* exp = "([\"\\u003C\\u003E&\\u0027\\\\\",[]]"; + UNIT_ASSERT_STRINGS_EQUAL(ws.c_str(), exp); + } + Y_UNIT_TEST(Utf8) { + TString ws = NJsonWriter::TBuf().WriteString("яЯ σΣ ש א").Str(); + const char* exp = "\"яЯ σΣ ש א\""; + UNIT_ASSERT_STRINGS_EQUAL(ws.c_str(), exp); + } + Y_UNIT_TEST(WrongObject) { + NJsonWriter::TBuf w; + w.BeginObject(); + UNIT_ASSERT_EXCEPTION(w.WriteString("hehe"), NJsonWriter::TError); + } + Y_UNIT_TEST(WrongList) { + NJsonWriter::TBuf w; + w.BeginList(); + UNIT_ASSERT_EXCEPTION(w.WriteKey("hehe"), NJsonWriter::TError); + } + Y_UNIT_TEST(Incomplete) { + NJsonWriter::TBuf w; + w.BeginList(); + UNIT_ASSERT_EXCEPTION(w.Str(), NJsonWriter::TError); + } + Y_UNIT_TEST(BareKey) { + NJsonWriter::TBuf w; + w.BeginObject() + .CompatWriteKeyWithoutQuotes("p") + .WriteInt(1) + .CompatWriteKeyWithoutQuotes("n") + .WriteInt(0) + .EndObject(); + TString ws = w.Str(); + const char* exp = "{p:1,n:0}"; + UNIT_ASSERT_STRINGS_EQUAL(ws.c_str(), exp); + } + Y_UNIT_TEST(UnescapedStringInObject) { + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML); + w.BeginObject().WriteKey("key").WriteString("</&>'").EndObject(); + TString ws = w.Str(); + const char* exp = "{\"key\":\"\\u003C\\/&\\u003E\\u0027\"}"; + UNIT_ASSERT_STRINGS_EQUAL(ws.c_str(), exp); + } + Y_UNIT_TEST(ForeignStreamStr) { + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML, &Cerr); + UNIT_ASSERT_EXCEPTION(w.Str(), NJsonWriter::TError); + } + Y_UNIT_TEST(ForeignStreamValue) { + TStringStream ss; + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML, &ss); + w.WriteInt(1543); + UNIT_ASSERT_STRINGS_EQUAL(ss.Str(), "1543"); + } + Y_UNIT_TEST(Indentation) { + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML); + w.SetIndentSpaces(2); + w.BeginList() + .WriteInt(1) + .WriteString("hello") + .BeginObject() + .WriteKey("abc") + .WriteInt(3) + .WriteKey("def") + .WriteInt(4) + .EndObject() + .EndList(); + const char* exp = "[\n" + " 1,\n" + " \"hello\",\n" + " {\n" + " \"abc\":3,\n" + " \"def\":4\n" + " }\n" + "]"; + UNIT_ASSERT_STRINGS_EQUAL(exp, w.Str()); + } + Y_UNIT_TEST(WriteJsonValue) { + using namespace NJson; + TJsonValue val; + val.AppendValue(1); + val.AppendValue("2"); + val.AppendValue(3.5); + TJsonValue obj; + obj.InsertValue("key", TJsonValue("value")); + + val.AppendValue(obj); + val.AppendValue(TJsonValue(JSON_NULL)); + + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML); + w.WriteJsonValue(&val); + + const char exp[] = "[1,\"2\",3.5,{\"key\":\"value\"},null]"; + UNIT_ASSERT_STRINGS_EQUAL(exp, w.Str()); + } + Y_UNIT_TEST(WriteJsonValueSorted) { + using namespace NJson; + TJsonValue val; + val.InsertValue("1", TJsonValue(1)); + val.InsertValue("2", TJsonValue(2)); + + TJsonValue obj; + obj.InsertValue("zero", TJsonValue(0)); + obj.InsertValue("succ", TJsonValue(1)); + val.InsertValue("0", obj); + + NJsonWriter::TBuf w(NJsonWriter::HEM_DONT_ESCAPE_HTML); + w.WriteJsonValue(&val, true); + + const char exp[] = "{\"0\":{\"succ\":1,\"zero\":0},\"1\":1,\"2\":2}"; + UNIT_ASSERT_STRINGS_EQUAL(exp, w.Str()); + } + Y_UNIT_TEST(Unescaped) { + NJsonWriter::TBuf buf(NJsonWriter::HEM_UNSAFE); + buf.WriteString("</security>'"); + UNIT_ASSERT_STRINGS_EQUAL("\"</security>'\"", buf.Str()); + } + Y_UNIT_TEST(LittleBobbyJsonp) { + NJsonWriter::TBuf buf; + buf.WriteString("hello\xe2\x80\xa8\xe2\x80\xa9stranger"); + UNIT_ASSERT_STRINGS_EQUAL("\"hello\\u2028\\u2029stranger\"", buf.Str()); + } + Y_UNIT_TEST(LittleBobbyInvalid) { + NJsonWriter::TBuf buf; + TStringBuf incomplete("\xe2\x80\xa8", 2); + buf.WriteString(incomplete); + // garbage in - garbage out + UNIT_ASSERT_STRINGS_EQUAL("\"\xe2\x80\"", buf.Str()); + } + Y_UNIT_TEST(OverlyZealous) { + NJsonWriter::TBuf buf; + buf.WriteString("—"); + UNIT_ASSERT_STRINGS_EQUAL("\"—\"", buf.Str()); + } + Y_UNIT_TEST(RelaxedEscaping) { + NJsonWriter::TBuf buf(NJsonWriter::HEM_RELAXED); + buf.WriteString("</>"); + UNIT_ASSERT_STRINGS_EQUAL("\"\\u003C/\\u003E\"", buf.Str()); + } + + Y_UNIT_TEST(FloatFormatting) { + NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML); + buf.BeginList() + .WriteFloat(0.12345678987654321f) + .WriteDouble(0.12345678987654321) + .WriteFloat(0.315501, PREC_NDIGITS, 3) + .WriteFloat(244.13854, PREC_NDIGITS, 4) + .WriteFloat(10385.8324, PREC_POINT_DIGITS, 2) + .BeginObject() + .WriteKey("1") + .WriteDouble(1111.71, PREC_POINT_DIGITS, 0) + .WriteKey("2") + .WriteDouble(1111.71, PREC_NDIGITS, 1) + .EndObject() + .EndList(); + const char exp[] = "[0.123457,0.1234567899,0.316,244.1,10385.83,{\"1\":1112,\"2\":1e+03}]"; + UNIT_ASSERT_STRINGS_EQUAL(exp, buf.Str()); + } + + Y_UNIT_TEST(NanFormatting) { + { + NJsonWriter::TBuf buf; + buf.BeginObject(); + buf.WriteKey("nanvalue"); + UNIT_ASSERT_EXCEPTION(buf.WriteFloat(std::numeric_limits<double>::quiet_NaN()), yexception); + } + + { + NJsonWriter::TBuf buf; + buf.BeginObject(); + buf.WriteKey("infvalue"); + UNIT_ASSERT_EXCEPTION(buf.WriteFloat(std::numeric_limits<double>::infinity()), yexception); + } + + { + NJsonWriter::TBuf buf; + buf.BeginList(); + UNIT_ASSERT_EXCEPTION(buf.WriteFloat(std::numeric_limits<double>::quiet_NaN()), yexception); + } + + { + NJsonWriter::TBuf buf; + buf.BeginList(); + UNIT_ASSERT_EXCEPTION(buf.WriteFloat(std::numeric_limits<double>::infinity()), yexception); + } + + { + NJsonWriter::TBuf buf; + buf.SetWriteNanAsString(); + + buf.BeginObject() + .WriteKey("nanvalue") + .WriteFloat(std::numeric_limits<double>::quiet_NaN()) + .WriteKey("infvalue") + .WriteFloat(std::numeric_limits<double>::infinity()) + .WriteKey("minus_infvalue") + .WriteFloat(-std::numeric_limits<float>::infinity()) + .WriteKey("l") + .BeginList() + .WriteFloat(std::numeric_limits<float>::quiet_NaN()) + .EndList() + .EndObject(); + + UNIT_ASSERT_STRINGS_EQUAL(buf.Str(), R"raw_json({"nanvalue":"nan","infvalue":"inf","minus_infvalue":"-inf","l":["nan"]})raw_json"); + } + + { + NJsonWriter::TBuf buf; + buf.BeginObject() + .WriteKey("<>&") + .WriteString("Ololo") + .UnsafeWriteKey("<>&") + .WriteString("Ololo2") + .EndObject(); + + UNIT_ASSERT_STRINGS_EQUAL(buf.Str(), R"({"\u003C\u003E&":"Ololo","<>&":"Ololo2"})"); + } + } + + Y_UNIT_TEST(WriteUninitializedBoolDoesntCrashProgram) { + // makes sense only in release build w/ address sanitizer + // + // passing uninitialized boolean into WriteBool can make cleverly optimized code which is emitted by compiler crash program + // https://stackoverflow.com/questions/54120862/does-the-c-standard-allow-for-an-uninitialized-bool-to-crash-a-program + + // looks like compiler can detect UB at compile time in simple cases, but not in this one + class TSensorConf { + public: + class TAggrRuleItem { + public: + TVector<TString> Cond; + TVector<TString> Target; + }; + + TString ToString() const { + NJson::TJsonValue jsonValue; + NJsonWriter::TBuf jsonOutput; + jsonOutput.BeginObject() + .WriteKey("rawDataMemOnly").WriteBool(RawDataMemOnly) + .WriteKey("aggrRules").BeginList(); + + jsonOutput.EndList() + .EndObject(); + + return jsonOutput.Str(); + } + + TVector<TAggrRuleItem> AggrRules; + bool RawDataMemOnly; + }; + + TSensorConf s; + NSan::Unpoison(&s.RawDataMemOnly, sizeof(s.RawDataMemOnly)); + auto p = s.ToString(); + // doesn't really matter + UNIT_ASSERT(!p.empty()); + } +} diff --git a/library/cpp/json/writer/json_value.cpp b/library/cpp/json/writer/json_value.cpp new file mode 100644 index 0000000000..c61e8d1dc4 --- /dev/null +++ b/library/cpp/json/writer/json_value.cpp @@ -0,0 +1,1105 @@ +#include "json_value.h" +#include "json.h" + +#include <util/generic/ymath.h> +#include <util/generic/ylimits.h> +#include <util/generic/utility.h> +#include <util/generic/singleton.h> +#include <util/stream/str.h> +#include <util/stream/output.h> +#include <util/string/cast.h> +#include <util/string/type.h> +#include <util/string/vector.h> +#include <util/system/yassert.h> +#include <util/ysaveload.h> +#include <util/generic/bt_exception.h> + +static bool +AreJsonMapsEqual(const NJson::TJsonValue& lhs, const NJson::TJsonValue& rhs) { + using namespace NJson; + + Y_VERIFY(lhs.GetType() == JSON_MAP, "lhs has not a JSON_MAP type."); + + if (rhs.GetType() != JSON_MAP) + return false; + + typedef TJsonValue::TMapType TMapType; + const TMapType& lhsMap = lhs.GetMap(); + const TMapType& rhsMap = rhs.GetMap(); + + if (lhsMap.size() != rhsMap.size()) + return false; + + for (const auto& lhsIt : lhsMap) { + TMapType::const_iterator rhsIt = rhsMap.find(lhsIt.first); + if (rhsIt == rhsMap.end()) + return false; + + if (lhsIt.second != rhsIt->second) + return false; + } + + return true; +} + +static bool +AreJsonArraysEqual(const NJson::TJsonValue& lhs, const NJson::TJsonValue& rhs) { + using namespace NJson; + + Y_VERIFY(lhs.GetType() == JSON_ARRAY, "lhs has not a JSON_ARRAY type."); + + if (rhs.GetType() != JSON_ARRAY) + return false; + + typedef TJsonValue::TArray TArray; + const TArray& lhsArray = lhs.GetArray(); + const TArray& rhsArray = rhs.GetArray(); + + if (lhsArray.size() != rhsArray.size()) + return false; + + for (TArray::const_iterator lhsIt = lhsArray.begin(), rhsIt = rhsArray.begin(); + lhsIt != lhsArray.end(); ++lhsIt, ++rhsIt) { + if (*lhsIt != *rhsIt) + return false; + } + + return true; +} + +namespace NJson { + const TJsonValue TJsonValue::UNDEFINED{}; + + TJsonValue::TJsonValue(const EJsonValueType type) { + SetType(type); + } + + TJsonValue::TJsonValue(TJsonValue&& vval) noexcept + : Type(JSON_UNDEFINED) + { + vval.SwapWithUndefined(*this); + Zero(vval.Value); + } + + TJsonValue::TJsonValue(const TJsonValue& val) + : Type(val.Type) + { + switch (Type) { + case JSON_STRING: + new (&Value.String) TString(val.GetString()); + break; + case JSON_MAP: + Value.Map = new TMapType(val.GetMap()); + break; + case JSON_ARRAY: + Value.Array = new TArray(val.GetArray()); + break; + case JSON_UNDEFINED: + case JSON_NULL: + case JSON_BOOLEAN: + case JSON_INTEGER: + case JSON_UINTEGER: + case JSON_DOUBLE: + std::memcpy(&Value, &val.Value, sizeof(Value)); + break; + } + } + + TJsonValue& TJsonValue::operator=(const TJsonValue& val) { + if (this == &val) + return *this; + TJsonValue tmp(val); + tmp.Swap(*this); + return *this; + } + + TJsonValue& TJsonValue::operator=(TJsonValue&& val) noexcept { + if (this == &val) + return *this; + TJsonValue tmp(std::move(val)); + tmp.Swap(*this); + return *this; + } + + TJsonValue::TJsonValue(const bool value) noexcept { + SetType(JSON_BOOLEAN); + Value.Boolean = value; + } + + TJsonValue::TJsonValue(const long long value) noexcept { + SetType(JSON_INTEGER); + Value.Integer = value; + } + + TJsonValue::TJsonValue(const unsigned long long value) noexcept { + SetType(JSON_UINTEGER); + Value.UInteger = value; + } + + TJsonValue::TJsonValue(const int value) noexcept { + SetType(JSON_INTEGER); + Value.Integer = value; + } + + TJsonValue::TJsonValue(const unsigned int value) noexcept { + SetType(JSON_UINTEGER); + Value.UInteger = value; + } + + TJsonValue::TJsonValue(const long value) noexcept { + SetType(JSON_INTEGER); + Value.Integer = value; + } + + TJsonValue::TJsonValue(const unsigned long value) noexcept { + SetType(JSON_UINTEGER); + Value.UInteger = value; + } + + TJsonValue::TJsonValue(const double value) noexcept { + SetType(JSON_DOUBLE); + Value.Double = value; + } + + TJsonValue::TJsonValue(TString value) { + SetType(JSON_STRING); + Value.String = std::move(value); + } + + TJsonValue::TJsonValue(const TStringBuf value) { + SetType(JSON_STRING); + Value.String = value; + } + + TJsonValue::TJsonValue(const char* value) { + SetType(JSON_STRING); + Value.String = value; + } + + EJsonValueType TJsonValue::GetType() const noexcept { + return Type; + } + + TJsonValue& TJsonValue::SetType(const EJsonValueType type) { + if (Type == type) + return *this; + + Clear(); + Type = type; + + switch (Type) { + case JSON_STRING: + new (&Value.String) TString(); + break; + case JSON_MAP: + Value.Map = new TMapType(); + break; + case JSON_ARRAY: + Value.Array = new TArray(); + break; + case JSON_UNDEFINED: + case JSON_NULL: + case JSON_BOOLEAN: + case JSON_INTEGER: + case JSON_UINTEGER: + case JSON_DOUBLE: + break; + } + + return *this; + } + + TJsonValue& TJsonValue::SetValue(const TJsonValue& value) { + return *this = value; + } + + TJsonValue& TJsonValue::SetValue(TJsonValue&& value) { + *this = std::move(value); + return *this; + } + + TJsonValue& TJsonValue::InsertValue(const TString& key, const TJsonValue& value) { + SetType(JSON_MAP); + return (*Value.Map)[key] = value; + } + + TJsonValue& TJsonValue::InsertValue(const TStringBuf key, const TJsonValue& value) { + SetType(JSON_MAP); + return (*Value.Map)[key] = value; + } + + TJsonValue& TJsonValue::InsertValue(const char* key, const TJsonValue& value) { + SetType(JSON_MAP); + return (*Value.Map)[key] = value; + } + + TJsonValue& TJsonValue::InsertValue(const TString& key, TJsonValue&& value) { + SetType(JSON_MAP); + return (*Value.Map)[key] = std::move(value); + } + + TJsonValue& TJsonValue::InsertValue(const TStringBuf key, TJsonValue&& value) { + SetType(JSON_MAP); + return (*Value.Map)[key] = std::move(value); + } + + TJsonValue& TJsonValue::InsertValue(const char* key, TJsonValue&& value) { + SetType(JSON_MAP); + return (*Value.Map)[key] = std::move(value); + } + + TJsonValue& TJsonValue::Back() { + BackChecks(); + return Value.Array->back(); + } + + const TJsonValue& TJsonValue::Back() const { + BackChecks(); + return Value.Array->back(); + } + + TJsonValue& TJsonValue::AppendValue(const TJsonValue& value) { + SetType(JSON_ARRAY); + Value.Array->push_back(value); + return Value.Array->back(); + } + + TJsonValue& TJsonValue::AppendValue(TJsonValue&& value) { + SetType(JSON_ARRAY); + Value.Array->push_back(std::move(value)); + return Value.Array->back(); + } + + void TJsonValue::EraseValue(const TStringBuf key) { + if (IsMap()) { + TMapType::iterator it = Value.Map->find(key); + if (it != Value.Map->end()) + Value.Map->erase(it); + } + } + + void TJsonValue::EraseValue(const size_t index) { + if (IsArray()) { + if (index >= Value.Array->size()) { + return; + } + TArray::iterator it = Value.Array->begin() + index; + Value.Array->erase(it); + } + } + + void TJsonValue::Clear() noexcept { + switch (Type) { + case JSON_STRING: + Value.String.~TString(); + break; + case JSON_MAP: + delete Value.Map; + break; + case JSON_ARRAY: + delete Value.Array; + break; + case JSON_UNDEFINED: + case JSON_NULL: + case JSON_BOOLEAN: + case JSON_INTEGER: + case JSON_UINTEGER: + case JSON_DOUBLE: + break; + } + Zero(Value); + Type = JSON_UNDEFINED; + } + + TJsonValue& TJsonValue::operator[](const size_t idx) { + SetType(JSON_ARRAY); + if (Value.Array->size() <= idx) + Value.Array->resize(idx + 1); + return (*Value.Array)[idx]; + } + + TJsonValue& TJsonValue::operator[](const TStringBuf& key) { + SetType(JSON_MAP); + return (*Value.Map)[key]; + } + + namespace { + struct TDefaultsHolder { + const TString String{}; + const TJsonValue::TMapType Map{}; + const TJsonValue::TArray Array{}; + const TJsonValue Value{}; + }; + } + + const TJsonValue& TJsonValue::operator[](const size_t idx) const noexcept { + const TJsonValue* ret = nullptr; + if (GetValuePointer(idx, &ret)) + return *ret; + + return Singleton<TDefaultsHolder>()->Value; + } + + const TJsonValue& TJsonValue::operator[](const TStringBuf& key) const noexcept { + const TJsonValue* ret = nullptr; + if (GetValuePointer(key, &ret)) + return *ret; + + return Singleton<TDefaultsHolder>()->Value; + } + + bool TJsonValue::GetBoolean() const { + return Type != JSON_BOOLEAN ? false : Value.Boolean; + } + + long long TJsonValue::GetInteger() const { + if (!IsInteger()) + return 0; + + switch (Type) { + case JSON_INTEGER: + return Value.Integer; + + case JSON_UINTEGER: + return Value.UInteger; + + case JSON_DOUBLE: + return Value.Double; + + default: + Y_ASSERT(false && "Unexpected type."); + return 0; + } + } + + unsigned long long TJsonValue::GetUInteger() const { + if (!IsUInteger()) + return 0; + + switch (Type) { + case JSON_UINTEGER: + return Value.UInteger; + + case JSON_INTEGER: + return Value.Integer; + + case JSON_DOUBLE: + return Value.Double; + + default: + Y_ASSERT(false && "Unexpected type."); + return 0; + } + } + + double TJsonValue::GetDouble() const { + if (!IsDouble()) + return 0.0; + + switch (Type) { + case JSON_DOUBLE: + return Value.Double; + + case JSON_INTEGER: + return Value.Integer; + + case JSON_UINTEGER: + return Value.UInteger; + + default: + Y_ASSERT(false && "Unexpected type."); + return 0.0; + } + } + + const TString& TJsonValue::GetString() const { + return Type != JSON_STRING ? Singleton<TDefaultsHolder>()->String : Value.String; + } + + const TJsonValue::TMapType& TJsonValue::GetMap() const { + return Type != JSON_MAP ? Singleton<TDefaultsHolder>()->Map : *Value.Map; + } + + const TJsonValue::TArray& TJsonValue::GetArray() const { + return (Type != JSON_ARRAY) ? Singleton<TDefaultsHolder>()->Array : *Value.Array; + } + + bool TJsonValue::GetBooleanSafe() const { + if (Type != JSON_BOOLEAN) + ythrow TJsonException() << "Not a boolean"; + + return Value.Boolean; + } + + long long TJsonValue::GetIntegerSafe() const { + if (!IsInteger()) + ythrow TJsonException() << "Not an integer"; + + return GetInteger(); + } + + unsigned long long TJsonValue::GetUIntegerSafe() const { + if (!IsUInteger()) + ythrow TJsonException() << "Not an unsigned integer"; + + return GetUInteger(); + } + + double TJsonValue::GetDoubleSafe() const { + if (!IsDouble()) + ythrow TJsonException() << "Not a double"; + + return GetDouble(); + } + + const TString& TJsonValue::GetStringSafe() const { + if (Type != JSON_STRING) + ythrow TJsonException() << "Not a string"; + + return Value.String; + } + + bool TJsonValue::GetBooleanSafe(const bool defaultValue) const { + if (Type == JSON_UNDEFINED) + return defaultValue; + + return GetBooleanSafe(); + } + + long long TJsonValue::GetIntegerSafe(const long long defaultValue) const { + if (Type == JSON_UNDEFINED) + return defaultValue; + + return GetIntegerSafe(); + } + + unsigned long long TJsonValue::GetUIntegerSafe(const unsigned long long defaultValue) const { + if (Type == JSON_UNDEFINED) + return defaultValue; + + return GetUIntegerSafe(); + } + + double TJsonValue::GetDoubleSafe(const double defaultValue) const { + if (Type == JSON_UNDEFINED) + return defaultValue; + + return GetDoubleSafe(); + } + + TString TJsonValue::GetStringSafe(const TString& defaultValue) const { + if (Type == JSON_UNDEFINED) + return defaultValue; + + return GetStringSafe(); + } + + const TJsonValue::TMapType& TJsonValue::GetMapSafe() const { + if (Type != JSON_MAP) + ythrow TJsonException() << "Not a map"; + + return *Value.Map; + } + + TJsonValue::TMapType& TJsonValue::GetMapSafe() { + return const_cast<TJsonValue::TMapType&>(const_cast<const TJsonValue*>(this)->GetMapSafe()); + } + + const TJsonValue::TArray& TJsonValue::GetArraySafe() const { + if (Type != JSON_ARRAY) + ythrow TJsonException() << "Not an array"; + + return *Value.Array; + } + + TJsonValue::TArray& TJsonValue::GetArraySafe() { + return const_cast<TJsonValue::TArray&>(const_cast<const TJsonValue*>(this)->GetArraySafe()); + } + + bool TJsonValue::GetBooleanRobust() const noexcept { + switch (Type) { + case JSON_ARRAY: + return !Value.Array->empty(); + case JSON_MAP: + return !Value.Map->empty(); + case JSON_INTEGER: + case JSON_UINTEGER: + case JSON_DOUBLE: + return GetIntegerRobust(); + case JSON_STRING: + return GetIntegerRobust() || IsTrue(Value.String); + case JSON_NULL: + case JSON_UNDEFINED: + default: + return false; + case JSON_BOOLEAN: + return Value.Boolean; + } + } + + long long TJsonValue::GetIntegerRobust() const noexcept { + switch (Type) { + case JSON_ARRAY: + return Value.Array->size(); + case JSON_MAP: + return Value.Map->size(); + case JSON_BOOLEAN: + return Value.Boolean; + case JSON_DOUBLE: + return GetDoubleRobust(); + case JSON_STRING: + try { + i64 res = 0; + if (Value.String && TryFromString(Value.String, res)) { + return res; + } + } catch (const yexception&) { + } + return 0; + case JSON_NULL: + case JSON_UNDEFINED: + default: + return 0; + case JSON_INTEGER: + case JSON_UINTEGER: + return Value.Integer; + } + } + + unsigned long long TJsonValue::GetUIntegerRobust() const noexcept { + switch (Type) { + case JSON_ARRAY: + return Value.Array->size(); + case JSON_MAP: + return Value.Map->size(); + case JSON_BOOLEAN: + return Value.Boolean; + case JSON_DOUBLE: + return GetDoubleRobust(); + case JSON_STRING: + try { + ui64 res = 0; + if (Value.String && TryFromString(Value.String, res)) { + return res; + } + } catch (const yexception&) { + } + return 0; + case JSON_NULL: + case JSON_UNDEFINED: + default: + return 0; + case JSON_INTEGER: + case JSON_UINTEGER: + return Value.UInteger; + } + } + + double TJsonValue::GetDoubleRobust() const noexcept { + switch (Type) { + case JSON_ARRAY: + return Value.Array->size(); + case JSON_MAP: + return Value.Map->size(); + case JSON_BOOLEAN: + return Value.Boolean; + case JSON_INTEGER: + return Value.Integer; + case JSON_UINTEGER: + return Value.UInteger; + case JSON_STRING: + try { + double res = 0; + if (Value.String && TryFromString(Value.String, res)) { + return res; + } + } catch (const yexception&) { + } + return 0; + case JSON_NULL: + case JSON_UNDEFINED: + default: + return 0; + case JSON_DOUBLE: + return Value.Double; + } + } + + TString TJsonValue::GetStringRobust() const { + switch (Type) { + case JSON_ARRAY: + case JSON_MAP: + case JSON_BOOLEAN: + case JSON_DOUBLE: + case JSON_INTEGER: + case JSON_UINTEGER: + case JSON_NULL: + case JSON_UNDEFINED: + default: { + NJsonWriter::TBuf sout; + sout.WriteJsonValue(this); + return sout.Str(); + } + case JSON_STRING: + return Value.String; + } + } + + bool TJsonValue::GetBoolean(bool* value) const noexcept { + if (Type != JSON_BOOLEAN) + return false; + + *value = Value.Boolean; + return true; + } + + bool TJsonValue::GetInteger(long long* value) const noexcept { + if (!IsInteger()) + return false; + + *value = GetInteger(); + return true; + } + + bool TJsonValue::GetUInteger(unsigned long long* value) const noexcept { + if (!IsUInteger()) + return false; + + *value = GetUInteger(); + return true; + } + + bool TJsonValue::GetDouble(double* value) const noexcept { + if (!IsDouble()) + return false; + + *value = GetDouble(); + return true; + } + + bool TJsonValue::GetString(TString* value) const { + if (Type != JSON_STRING) + return false; + + *value = Value.String; + return true; + } + + bool TJsonValue::GetMap(TJsonValue::TMapType* value) const { + if (Type != JSON_MAP) + return false; + + *value = *Value.Map; + return true; + } + + bool TJsonValue::GetArray(TJsonValue::TArray* value) const { + if (Type != JSON_ARRAY) + return false; + + *value = *Value.Array; + return true; + } + + bool TJsonValue::GetMapPointer(const TJsonValue::TMapType** value) const noexcept { + if (Type != JSON_MAP) + return false; + + *value = Value.Map; + return true; + } + + bool TJsonValue::GetArrayPointer(const TJsonValue::TArray** value) const noexcept { + if (Type != JSON_ARRAY) + return false; + + *value = Value.Array; + return true; + } + + bool TJsonValue::GetValue(const size_t index, TJsonValue* value) const { + const TJsonValue* tmp = nullptr; + if (GetValuePointer(index, &tmp)) { + *value = *tmp; + return true; + } + return false; + } + + bool TJsonValue::GetValue(const TStringBuf key, TJsonValue* value) const { + const TJsonValue* tmp = nullptr; + if (GetValuePointer(key, &tmp)) { + *value = *tmp; + return true; + } + return false; + } + + bool TJsonValue::GetValuePointer(const size_t index, const TJsonValue** value) const noexcept { + if (Type == JSON_ARRAY && index < Value.Array->size()) { + *value = &(*Value.Array)[index]; + return true; + } + return false; + } + + bool TJsonValue::GetValuePointer(const TStringBuf key, const TJsonValue** value) const noexcept { + if (Type == JSON_MAP) { + const TMapType::const_iterator it = Value.Map->find(key); + if (it != Value.Map->end()) { + *value = &(it->second); + return true; + } + } + return false; + } + + bool TJsonValue::GetValuePointer(const TStringBuf key, TJsonValue** value) noexcept { + return static_cast<const TJsonValue*>(this)->GetValuePointer(key, const_cast<const TJsonValue**>(value)); + } + + bool TJsonValue::IsNull() const noexcept { + return Type == JSON_NULL; + } + + bool TJsonValue::IsBoolean() const noexcept { + return Type == JSON_BOOLEAN; + } + + bool TJsonValue::IsInteger() const noexcept { + switch (Type) { + case JSON_INTEGER: + return true; + + case JSON_UINTEGER: + return (Value.UInteger <= static_cast<unsigned long long>(Max<long long>())); + + case JSON_DOUBLE: + return ((long long)Value.Double == Value.Double); + + default: + return false; + } + } + + bool TJsonValue::IsUInteger() const noexcept { + switch (Type) { + case JSON_UINTEGER: + return true; + + case JSON_INTEGER: + return (Value.Integer >= 0); + + case JSON_DOUBLE: + return ((unsigned long long)Value.Double == Value.Double); + + default: + return false; + } + } + + bool TJsonValue::IsDouble() const noexcept { + // Check whether we can convert integer to floating-point + // without precision loss. + switch (Type) { + case JSON_DOUBLE: + return true; + + case JSON_INTEGER: + return (1ll << std::numeric_limits<double>::digits) >= Abs(Value.Integer); + + case JSON_UINTEGER: + return (1ull << std::numeric_limits<double>::digits) >= Value.UInteger; + + default: + return false; + } + } + + namespace { + template <class TPtr, class T> + TPtr* CreateOrNullptr(TPtr* p, T key, std::true_type /*create*/) { + return &(*p)[key]; + } + + template <class TPtr, class T> + TPtr* CreateOrNullptr(const TPtr* p, T key, std::false_type /*create*/) noexcept { + const TPtr* const next = &(*p)[key]; + return next->IsDefined() ? const_cast<TPtr*>(next) : nullptr; + } + + template <bool Create, class TJsonPtr> + TJsonPtr GetValuePtrByPath(TJsonPtr currentJson, TStringBuf path, char delimiter) noexcept(!Create) { + static_assert( + !(Create && std::is_const<std::remove_pointer_t<TJsonPtr>>::value), + "TJsonPtr must be a `TJsonValue*` if `Create` is true"); + constexpr std::integral_constant<bool, Create> create_tag{}; + + while (!path.empty()) { + size_t index = 0; + const TStringBuf step = path.NextTok(delimiter); + if (step.size() > 2 && *step.begin() == '[' && step.back() == ']' && TryFromString(step.substr(1, step.size() - 2), index)) { + currentJson = CreateOrNullptr(currentJson, index, create_tag); + } else { + currentJson = CreateOrNullptr(currentJson, step, create_tag); + } + + if (!currentJson) { + return nullptr; + } + } + + return currentJson; + } + } // anonymous namespace + + bool TJsonValue::GetValueByPath(const TStringBuf path, TJsonValue& result, char delimiter) const { + const TJsonValue* const ptr = GetValuePtrByPath<false>(this, path, delimiter); + if (ptr) { + result = *ptr; + return true; + } + return false; + } + + bool TJsonValue::SetValueByPath(const TStringBuf path, const TJsonValue& value, char delimiter) { + TJsonValue* const ptr = GetValuePtrByPath<true>(this, path, delimiter); + if (ptr) { + *ptr = value; + return true; + } + return false; + } + + bool TJsonValue::SetValueByPath(const TStringBuf path, TJsonValue&& value, char delimiter) { + TJsonValue* const ptr = GetValuePtrByPath<true>(this, path, delimiter); + if (ptr) { + *ptr = std::move(value); + return true; + } + return false; + } + + const TJsonValue* TJsonValue::GetValueByPath(const TStringBuf key, char delim) const noexcept { + return GetValuePtrByPath<false>(this, key, delim); + } + + TJsonValue* TJsonValue::GetValueByPath(const TStringBuf key, char delim) noexcept { + return GetValuePtrByPath<false>(this, key, delim); + } + + void TJsonValue::DoScan(const TString& path, TJsonValue* parent, IScanCallback& callback) { + if (!callback.Do(path, parent, *this)) { + return; + } + + if (Type == JSON_MAP) { + for (auto&& i : *Value.Map) { + i.second.DoScan(!!path ? TString::Join(path, ".", i.first) : i.first, this, callback); + } + } else if (Type == JSON_ARRAY) { + for (ui32 i = 0; i < Value.Array->size(); ++i) { + (*Value.Array)[i].DoScan(TString::Join(path, "[", ToString(i), "]"), this, callback); + } + } + } + + void TJsonValue::Scan(IScanCallback& callback) { + DoScan("", nullptr, callback); + } + + bool TJsonValue::IsString() const noexcept { + return Type == JSON_STRING; + } + + bool TJsonValue::IsMap() const noexcept { + return Type == JSON_MAP; + } + + bool TJsonValue::IsArray() const noexcept { + return Type == JSON_ARRAY; + } + + bool TJsonValue::Has(const TStringBuf& key) const noexcept { + return Type == JSON_MAP && Value.Map->contains(key); + } + + bool TJsonValue::Has(size_t key) const noexcept { + return Type == JSON_ARRAY && Value.Array->size() > key; + } + + bool TJsonValue::operator==(const TJsonValue& rhs) const { + switch (Type) { + case JSON_UNDEFINED: { + return (rhs.GetType() == JSON_UNDEFINED); + } + + case JSON_NULL: { + return rhs.IsNull(); + } + + case JSON_BOOLEAN: { + return (rhs.IsBoolean() && Value.Boolean == rhs.Value.Boolean); + } + + case JSON_INTEGER: { + return (rhs.IsInteger() && GetInteger() == rhs.GetInteger()); + } + + case JSON_UINTEGER: { + return (rhs.IsUInteger() && GetUInteger() == rhs.GetUInteger()); + } + + case JSON_STRING: { + return (rhs.IsString() && Value.String == rhs.Value.String); + } + + case JSON_DOUBLE: { + return (rhs.IsDouble() && fabs(GetDouble() - rhs.GetDouble()) <= FLT_EPSILON); + } + + case JSON_MAP: + return AreJsonMapsEqual(*this, rhs); + + case JSON_ARRAY: + return AreJsonArraysEqual(*this, rhs); + + default: + Y_ASSERT(false && "Unknown type."); + return false; + } + } + + void TJsonValue::SwapWithUndefined(TJsonValue& output) noexcept { + if (Type == JSON_STRING) { + static_assert(std::is_nothrow_move_constructible<TString>::value, "noexcept violation! Add some try {} catch (...) logic"); + new (&output.Value.String) TString(std::move(Value.String)); + Value.String.~TString(); + } else { + std::memcpy(&output.Value, &Value, sizeof(Value)); + } + + output.Type = Type; + Type = JSON_UNDEFINED; + } + + void TJsonValue::Swap(TJsonValue& rhs) noexcept { + TJsonValue tmp(std::move(*this)); + rhs.SwapWithUndefined(*this); + tmp.SwapWithUndefined(rhs); + } + + void TJsonValue::Save(IOutputStream* s) const { + ::Save(s, static_cast<ui8>(Type)); + switch (Type) { + case JSON_UNDEFINED:break; + case JSON_NULL:break; + case JSON_BOOLEAN: + ::Save(s, Value.Boolean); + break; + case JSON_INTEGER: + ::Save(s, Value.Integer); + break; + case JSON_UINTEGER: + ::Save(s, Value.UInteger); + break; + case JSON_DOUBLE: + ::Save(s, Value.Double); + break; + case JSON_STRING: + ::Save(s, Value.String); + break; + case JSON_MAP: + ::Save(s, *Value.Map); + break; + case JSON_ARRAY: + ::Save(s, *Value.Array); + break; + } + } + + void TJsonValue::Load(IInputStream* s) { + { + ui8 loadedType = {}; + ::Load(s, loadedType); + SetType(static_cast<EJsonValueType>(loadedType)); + } + switch (Type) { + case JSON_UNDEFINED:break; + case JSON_NULL:break; + case JSON_BOOLEAN: + ::Load(s, Value.Boolean); + break; + case JSON_INTEGER: + ::Load(s, Value.Integer); + break; + case JSON_UINTEGER: + ::Load(s, Value.UInteger); + break; + case JSON_DOUBLE: + ::Load(s, Value.Double); + break; + case JSON_STRING: + ::Load(s, Value.String); + break; + case JSON_MAP: + ::Load(s, *Value.Map); + break; + case JSON_ARRAY: + ::Load(s, *Value.Array); + break; + } + } + + //**************************************************************** + + bool GetMapPointer(const TJsonValue& jv, const size_t index, const TJsonValue::TMapType** value) { + const TJsonValue* v; + if (!jv.GetValuePointer(index, &v) || !v->IsMap()) + return false; + + *value = &v->GetMap(); + return true; + } + + bool GetArrayPointer(const TJsonValue& jv, const size_t index, const TJsonValue::TArray** value) { + const TJsonValue* v; + if (!jv.GetValuePointer(index, &v) || !v->IsArray()) + return false; + + *value = &v->GetArray(); + return true; + } + + bool GetMapPointer(const TJsonValue& jv, const TStringBuf key, const TJsonValue::TMapType** value) { + const TJsonValue* v; + if (!jv.GetValuePointer(key, &v) || !v->IsMap()) + return false; + + *value = &v->GetMap(); + return true; + } + + bool GetArrayPointer(const TJsonValue& jv, const TStringBuf key, const TJsonValue::TArray** value) { + const TJsonValue* v; + if (!jv.GetValuePointer(key, &v) || !v->IsArray()) + return false; + + *value = &v->GetArray(); + return true; + } + + void TJsonValue::BackChecks() const { + if (Type != JSON_ARRAY) + ythrow TJsonException() << "Not an array"; + + if (Value.Array->empty()) + ythrow TJsonException() << "Get back on empty array"; + } +} + +template <> +void Out<NJson::TJsonValue>(IOutputStream& out, const NJson::TJsonValue& v) { + NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML, &out); + buf.WriteJsonValue(&v); +} diff --git a/library/cpp/json/writer/json_value.h b/library/cpp/json/writer/json_value.h new file mode 100644 index 0000000000..3f0f50bc4c --- /dev/null +++ b/library/cpp/json/writer/json_value.h @@ -0,0 +1,294 @@ +#pragma once + +#include <library/cpp/json/common/defs.h> + +#include <util/generic/string.h> +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/generic/deque.h> +#include <util/generic/utility.h> +#include <util/generic/yexception.h> + +namespace NJson { + enum EJsonValueType { + JSON_UNDEFINED /* "Undefined" */, + JSON_NULL /* "Null" */, + JSON_BOOLEAN /* "Boolean" */, + JSON_INTEGER /* "Integer" */, + JSON_DOUBLE /* "Double" */, + JSON_STRING /* "String" */, + JSON_MAP /* "Map" */, + JSON_ARRAY /* "Array" */, + JSON_UINTEGER /* "UInteger" */ + }; + + class TJsonValue; + + class IScanCallback { + public: + virtual ~IScanCallback() = default; + + virtual bool Do(const TString& path, TJsonValue* parent, TJsonValue& value) = 0; + }; + + class TJsonValue { + void Clear() noexcept; + + public: + typedef THashMap<TString, TJsonValue> TMapType; + typedef TDeque<TJsonValue> TArray; + + TJsonValue() noexcept = default; + TJsonValue(EJsonValueType type); + TJsonValue(bool value) noexcept; + TJsonValue(int value) noexcept; + TJsonValue(unsigned int value) noexcept; + TJsonValue(long value) noexcept; + TJsonValue(unsigned long value) noexcept; + TJsonValue(long long value) noexcept; + TJsonValue(unsigned long long value) noexcept; + TJsonValue(double value) noexcept; + TJsonValue(TString value); + TJsonValue(const char* value); + template <class T> + TJsonValue(const T*) = delete; + TJsonValue(TStringBuf value); + + TJsonValue(const std::string& s) + : TJsonValue(TStringBuf(s)) + { + } + + TJsonValue(const TJsonValue& vval); + TJsonValue(TJsonValue&& vval) noexcept; + + TJsonValue& operator=(const TJsonValue& val); + TJsonValue& operator=(TJsonValue&& val) noexcept; + + ~TJsonValue() { + Clear(); + } + + EJsonValueType GetType() const noexcept; + TJsonValue& SetType(EJsonValueType type); + + TJsonValue& SetValue(const TJsonValue& value); + TJsonValue& SetValue(TJsonValue&& value); + + // for Map + TJsonValue& InsertValue(const TString& key, const TJsonValue& value); + TJsonValue& InsertValue(TStringBuf key, const TJsonValue& value); + TJsonValue& InsertValue(const char* key, const TJsonValue& value); + TJsonValue& InsertValue(const TString& key, TJsonValue&& value); + TJsonValue& InsertValue(TStringBuf key, TJsonValue&& value); + TJsonValue& InsertValue(const char* key, TJsonValue&& value); + + // for Array + TJsonValue& AppendValue(const TJsonValue& value); + TJsonValue& AppendValue(TJsonValue&& value); + TJsonValue& Back(); + const TJsonValue& Back() const; + + bool GetValueByPath(TStringBuf path, TJsonValue& result, char delimiter = '.') const; + bool SetValueByPath(TStringBuf path, const TJsonValue& value, char delimiter = '.'); + bool SetValueByPath(TStringBuf path, TJsonValue&& value, char delimiter = '.'); + + // returns NULL on failure + const TJsonValue* GetValueByPath(TStringBuf path, char delimiter = '.') const noexcept; + TJsonValue* GetValueByPath(TStringBuf path, char delimiter = '.') noexcept; + + void EraseValue(TStringBuf key); + void EraseValue(size_t index); + + TJsonValue& operator[](size_t idx); + TJsonValue& operator[](const TStringBuf& key); + const TJsonValue& operator[](size_t idx) const noexcept; + const TJsonValue& operator[](const TStringBuf& key) const noexcept; + + bool GetBoolean() const; + long long GetInteger() const; + unsigned long long GetUInteger() const; + double GetDouble() const; + const TString& GetString() const; + const TMapType& GetMap() const; + const TArray& GetArray() const; + + //throwing TJsonException possible + bool GetBooleanSafe() const; + long long GetIntegerSafe() const; + unsigned long long GetUIntegerSafe() const; + double GetDoubleSafe() const; + const TString& GetStringSafe() const; + const TMapType& GetMapSafe() const; + TMapType& GetMapSafe(); + const TArray& GetArraySafe() const; + TArray& GetArraySafe(); + + bool GetBooleanSafe(bool defaultValue) const; + long long GetIntegerSafe(long long defaultValue) const; + unsigned long long GetUIntegerSafe(unsigned long long defaultValue) const; + double GetDoubleSafe(double defaultValue) const; + TString GetStringSafe(const TString& defaultValue) const; + + bool GetBooleanRobust() const noexcept; + long long GetIntegerRobust() const noexcept; + unsigned long long GetUIntegerRobust() const noexcept; + double GetDoubleRobust() const noexcept; + TString GetStringRobust() const; + + // Exception-free accessors + bool GetBoolean(bool* value) const noexcept; + bool GetInteger(long long* value) const noexcept; + bool GetUInteger(unsigned long long* value) const noexcept; + bool GetDouble(double* value) const noexcept; + bool GetMapPointer(const TMapType** value) const noexcept; + bool GetArrayPointer(const TArray** value) const noexcept; + + bool GetString(TString* value) const; + bool GetMap(TMapType* value) const; + bool GetArray(TArray* value) const; + bool GetValue(size_t index, TJsonValue* value) const; + bool GetValue(TStringBuf key, TJsonValue* value) const; + bool GetValuePointer(size_t index, const TJsonValue** value) const noexcept; + bool GetValuePointer(TStringBuf key, const TJsonValue** value) const noexcept; + bool GetValuePointer(TStringBuf key, TJsonValue** value) noexcept; + + // Checking for defined non-null value + bool IsDefined() const noexcept { + return Type != JSON_UNDEFINED && Type != JSON_NULL; + } + + bool IsNull() const noexcept; + bool IsBoolean() const noexcept; + bool IsDouble() const noexcept; + bool IsString() const noexcept; + bool IsMap() const noexcept; + bool IsArray() const noexcept; + + /// @return true if JSON_INTEGER or (JSON_UINTEGER and Value <= Max<long long>) + bool IsInteger() const noexcept; + + /// @return true if JSON_UINTEGER or (JSON_INTEGER and Value >= 0) + bool IsUInteger() const noexcept; + + bool Has(const TStringBuf& key) const noexcept; + bool Has(size_t key) const noexcept; + + void Scan(IScanCallback& callback); + + /// Non-robust comparison. + bool operator==(const TJsonValue& rhs) const; + + bool operator!=(const TJsonValue& rhs) const { + return !(*this == rhs); + } + + void Swap(TJsonValue& rhs) noexcept; + + // save using util/ysaveload.h serialization (not to JSON stream) + void Save(IOutputStream* s) const; + + // load using util/ysaveload.h serialization (not as JSON stream) + void Load(IInputStream* s); + + static const TJsonValue UNDEFINED; + + private: + EJsonValueType Type = JSON_UNDEFINED; + union TValueUnion { + bool Boolean; + long long Integer; + unsigned long long UInteger; + double Double; + TString String; + TMapType* Map; + TArray* Array; + + TValueUnion() noexcept { + Zero(*this); + } + ~TValueUnion() noexcept { + } + }; + TValueUnion Value; + void DoScan(const TString& path, TJsonValue* parent, IScanCallback& callback); + void SwapWithUndefined(TJsonValue& output) noexcept; + + /** + @throw yexception if Back shouldn't be called on the object. + */ + void BackChecks() const; + }; + + inline bool GetBoolean(const TJsonValue& jv, size_t index, bool* value) noexcept { + return jv[index].GetBoolean(value); + } + + inline bool GetInteger(const TJsonValue& jv, size_t index, long long* value) noexcept { + return jv[index].GetInteger(value); + } + + inline bool GetUInteger(const TJsonValue& jv, size_t index, unsigned long long* value) noexcept { + return jv[index].GetUInteger(value); + } + + inline bool GetDouble(const TJsonValue& jv, size_t index, double* value) noexcept { + return jv[index].GetDouble(value); + } + + inline bool GetString(const TJsonValue& jv, size_t index, TString* value) { + return jv[index].GetString(value); + } + + bool GetMapPointer(const TJsonValue& jv, size_t index, const TJsonValue::TMapType** value); + bool GetArrayPointer(const TJsonValue& jv, size_t index, const TJsonValue::TArray** value); + + inline bool GetBoolean(const TJsonValue& jv, TStringBuf key, bool* value) noexcept { + return jv[key].GetBoolean(value); + } + + inline bool GetInteger(const TJsonValue& jv, TStringBuf key, long long* value) noexcept { + return jv[key].GetInteger(value); + } + + inline bool GetUInteger(const TJsonValue& jv, TStringBuf key, unsigned long long* value) noexcept { + return jv[key].GetUInteger(value); + } + + inline bool GetDouble(const TJsonValue& jv, TStringBuf key, double* value) noexcept { + return jv[key].GetDouble(value); + } + + inline bool GetString(const TJsonValue& jv, TStringBuf key, TString* value) { + return jv[key].GetString(value); + } + + bool GetMapPointer(const TJsonValue& jv, const TStringBuf key, const TJsonValue::TMapType** value); + bool GetArrayPointer(const TJsonValue& jv, const TStringBuf key, const TJsonValue::TArray** value); + + class TJsonMap: public TJsonValue { + public: + TJsonMap() + : TJsonValue(NJson::JSON_MAP) + {} + + TJsonMap(const std::initializer_list<std::pair<TString, TJsonValue>>& list) + : TJsonValue(NJson::JSON_MAP) + { + GetMapSafe() = THashMap<TString, TJsonValue>(list); + } + }; + + class TJsonArray: public TJsonValue { + public: + TJsonArray() + : TJsonValue(NJson::JSON_ARRAY) + {} + + TJsonArray(const std::initializer_list<TJsonValue>& list) + : TJsonValue(NJson::JSON_ARRAY) + { + GetArraySafe() = TJsonValue::TArray(list); + } + }; +} diff --git a/library/cpp/json/writer/json_value_ut.cpp b/library/cpp/json/writer/json_value_ut.cpp new file mode 100644 index 0000000000..dc7f6affdf --- /dev/null +++ b/library/cpp/json/writer/json_value_ut.cpp @@ -0,0 +1,650 @@ +#include "json_value.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/input.h> + +using namespace NJson; + +Y_UNIT_TEST_SUITE(TJsonValueTest) { + Y_UNIT_TEST(UndefTest) { + TJsonValue undef; + TJsonValue null(JSON_NULL); + TJsonValue _false(false); + TJsonValue zeroInt(0); + TJsonValue zeroDouble(0.0); + TJsonValue emptyStr(""); + TJsonValue emptyArray(JSON_ARRAY); + TJsonValue emptyMap(JSON_MAP); + + UNIT_ASSERT(!undef.IsDefined()); + UNIT_ASSERT(!null.IsDefined()); // json NULL is undefined too! + UNIT_ASSERT(_false.IsDefined()); + UNIT_ASSERT(zeroInt.IsDefined()); + UNIT_ASSERT(zeroDouble.IsDefined()); + UNIT_ASSERT(emptyStr.IsDefined()); + UNIT_ASSERT(emptyArray.IsDefined()); + UNIT_ASSERT(emptyMap.IsDefined()); + + UNIT_ASSERT(undef == TJsonValue()); + UNIT_ASSERT(undef != null); + UNIT_ASSERT(undef != _false); + UNIT_ASSERT(undef != zeroInt); + UNIT_ASSERT(undef != zeroDouble); + UNIT_ASSERT(undef != emptyStr); + UNIT_ASSERT(undef != emptyArray); + UNIT_ASSERT(undef != emptyMap); + } + + Y_UNIT_TEST(DefaultCompareTest) { + { + TJsonValue lhs; + TJsonValue rhs; + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs; + TJsonValue rhs(JSON_NULL); + UNIT_ASSERT(lhs != rhs); + UNIT_ASSERT(rhs != lhs); + } + } + + Y_UNIT_TEST(NullCompareTest) { + TJsonValue lhs(JSON_NULL); + TJsonValue rhs(JSON_NULL); + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + Y_UNIT_TEST(StringCompareTest) { + { + TJsonValue lhs(JSON_STRING); + TJsonValue rhs(JSON_STRING); + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs(""); + TJsonValue rhs(""); + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs("abc"); + TJsonValue rhs("abc"); + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs("1"); + TJsonValue rhs(1); + UNIT_ASSERT(lhs != rhs); + UNIT_ASSERT(rhs != lhs); + } + } + + Y_UNIT_TEST(ArrayCompareTest) { + { + TJsonValue lhs(JSON_ARRAY); + TJsonValue rhs(JSON_ARRAY); + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs; + TJsonValue rhs; + + lhs.AppendValue(TJsonValue()); + + UNIT_ASSERT(lhs != rhs); + UNIT_ASSERT(rhs != lhs); + } + + { + TJsonValue lhs; + TJsonValue rhs; + + lhs.AppendValue(1); + lhs.AppendValue("2"); + lhs.AppendValue(3.0); + lhs.AppendValue(TJsonValue()); + lhs.AppendValue(TJsonValue(JSON_NULL)); + + rhs.AppendValue(1); + rhs.AppendValue("2"); + rhs.AppendValue(3.0); + rhs.AppendValue(TJsonValue()); + rhs.AppendValue(TJsonValue(JSON_NULL)); + + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs; + TJsonValue rhs; + + lhs.AppendValue(1); + rhs.AppendValue("1"); + + UNIT_ASSERT(lhs != rhs); + UNIT_ASSERT(rhs != lhs); + } + } + + Y_UNIT_TEST(CompareTest) { + { + TJsonValue lhs; + lhs.InsertValue("null value", TJsonValue(JSON_NULL)); + lhs.InsertValue("int key", TJsonValue(10)); + lhs.InsertValue("double key", TJsonValue(11.11)); + lhs.InsertValue("string key", TJsonValue("string")); + + TJsonValue array; + array.AppendValue(1); + array.AppendValue(2); + array.AppendValue(3); + array.AppendValue("string"); + lhs.InsertValue("array", array); + + lhs.InsertValue("bool key", TJsonValue(true)); + + TJsonValue rhs; + rhs = lhs; + + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + // Insert keys in different orders + const int NUM_KEYS = 1000; + + TJsonValue lhs; + for (int i = 0; i < NUM_KEYS; ++i) + lhs.InsertValue(ToString(i), i); + + TJsonValue rhs; + for (int i = 0; i < NUM_KEYS; i += 2) + rhs.InsertValue(ToString(i), i); + for (int i = 1; i < NUM_KEYS; i += 2) + rhs.InsertValue(ToString(i), i); + + UNIT_ASSERT(lhs == rhs); + UNIT_ASSERT(rhs == lhs); + } + + { + TJsonValue lhs; + lhs.InsertValue("null value", TJsonValue(JSON_NULL)); + lhs.InsertValue("int key", TJsonValue(10)); + lhs.InsertValue("double key", TJsonValue(11.11)); + lhs.InsertValue("string key", TJsonValue("string")); + + TJsonValue array; + array.AppendValue(1); + array.AppendValue(2); + array.AppendValue(3); + array.AppendValue("string"); + lhs.InsertValue("array", array); + + lhs.InsertValue("bool key", TJsonValue(true)); + + TJsonValue rhs; + rhs.InsertValue("null value", TJsonValue(JSON_NULL)); + rhs.InsertValue("int key", TJsonValue(10)); + rhs.InsertValue("double key", TJsonValue(11.11)); + rhs.InsertValue("string key", TJsonValue("string")); + rhs.InsertValue("bool key", TJsonValue(true)); + + UNIT_ASSERT(lhs != rhs); + UNIT_ASSERT(rhs != lhs); + } + } + + Y_UNIT_TEST(SwapTest) { + { + TJsonValue lhs; + lhs.InsertValue("a", "b"); + TJsonValue lhsCopy = lhs; + + TJsonValue rhs(JSON_NULL); + TJsonValue rhsCopy = rhs; + + UNIT_ASSERT(lhs == lhsCopy); + UNIT_ASSERT(rhs == rhsCopy); + + lhs.Swap(rhs); + + UNIT_ASSERT(rhs == lhsCopy); + UNIT_ASSERT(lhs == rhsCopy); + + lhs.Swap(rhs); + + UNIT_ASSERT(lhs == lhsCopy); + UNIT_ASSERT(rhs == rhsCopy); + } + } + + Y_UNIT_TEST(GetValueByPathTest) { + { + TJsonValue lhs; + TJsonValue first; + TJsonValue second; + TJsonValue last; + first.InsertValue("e", "f"); + second.InsertValue("c", first); + last.InsertValue("a", second); + lhs.InsertValue("l", last); + + TJsonValue result; + UNIT_ASSERT(lhs.GetValueByPath("l/a/c/e", result, '/')); + UNIT_ASSERT(result.GetStringRobust() == "f"); + UNIT_ASSERT(!lhs.GetValueByPath("l/a/c/se", result, '/')); + UNIT_ASSERT(lhs.GetValueByPath("l/a/c", result, '/')); + UNIT_ASSERT(result.GetStringRobust() == "{\"e\":\"f\"}"); + + // faster TStringBuf version + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l", '/'), last); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l/a", '/'), second); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l/a/c", '/'), first); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l.a.c.e", '.'), "f"); + UNIT_ASSERT_EQUAL(lhs.GetValueByPath("l/a/c/e/x", '/'), NULL); + UNIT_ASSERT_EQUAL(lhs.GetValueByPath("a/c/e/x", '/'), NULL); + UNIT_ASSERT_EQUAL(lhs.GetValueByPath("nokey", '/'), NULL); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("", '/'), lhs); // itself + + TJsonValue array; + TJsonValue third; + array[0] = first; + array[1] = second; + third["t"] = array; + + UNIT_ASSERT(array.GetValueByPath("[0].e", result)); + UNIT_ASSERT(result.GetStringRobust() == "f"); + UNIT_ASSERT(third.GetValueByPath("t.[0].e", result)); + UNIT_ASSERT(result.GetStringRobust() == "f"); + UNIT_ASSERT(third.GetValueByPath("t.[1].c.e", result)); + UNIT_ASSERT(result.GetStringRobust() == "f"); + UNIT_ASSERT(!third.GetValueByPath("t.[2]", result)); + + UNIT_ASSERT(third.SetValueByPath("t.[2]", "g")); + UNIT_ASSERT(third.GetValueByPath("t.[2]", result)); + UNIT_ASSERT(result.GetStringRobust() == "g"); + + UNIT_ASSERT(lhs.SetValueByPath("l/a/c/se", "h", '/')); + UNIT_ASSERT(lhs.GetValueByPath("l/a/c/se", result, '/')); + UNIT_ASSERT(result.GetStringRobust() == "h"); + } + } + + Y_UNIT_TEST(GetValueByPathConstTest) { + TJsonValue lhs; + TJsonValue first; + TJsonValue second; + TJsonValue last; + first.InsertValue("e", "f"); + second.InsertValue("c", first); + last.InsertValue("a", second); + lhs.InsertValue("l", last); + + { + const TJsonValue* result = lhs.GetValueByPath("l", '/'); + UNIT_ASSERT_EQUAL(*result, last); + } + { + const TJsonValue* result = lhs.GetValueByPath("l/a", '/'); + UNIT_ASSERT_EQUAL(*result, second); + } + { + const TJsonValue* result = lhs.GetValueByPath("l/a/c", '/'); + UNIT_ASSERT_EQUAL(*result, first); + } + { + const TJsonValue* result = lhs.GetValueByPath("l.a.c.e", '.'); + UNIT_ASSERT_EQUAL(*result, "f"); + } + { + const TJsonValue* result = lhs.GetValueByPath("l/a/c/e/x", '/'); + UNIT_ASSERT_EQUAL(result, nullptr); + } + { + const TJsonValue* result = lhs.GetValueByPath("a/c/e/x", '/'); + UNIT_ASSERT_EQUAL(result, nullptr); + } + { + const TJsonValue* result = lhs.GetValueByPath("nokey", '/'); + UNIT_ASSERT_EQUAL(result, nullptr); + } + { + const TJsonValue* result = lhs.GetValueByPath("", '/'); + UNIT_ASSERT_EQUAL(*result, lhs); // itself + } + + TJsonValue array; + TJsonValue third; + array[0] = first; + array[1] = second; + third["t"] = array; + + UNIT_ASSERT(array.GetValueByPath("[0].e", '.')->GetStringRobust() == "f"); + UNIT_ASSERT(third.GetValueByPath("t.[0].e", '.')->GetStringRobust() == "f"); + UNIT_ASSERT(third.GetValueByPath("t.[1].c.e", '.')->GetStringRobust() == "f"); + } + + Y_UNIT_TEST(EraseValueFromArray) { + { + TJsonValue vec; + vec.AppendValue(TJsonValue(0)); + vec.AppendValue(TJsonValue(1)); + vec.AppendValue(TJsonValue("2")); + vec.AppendValue(TJsonValue("3.14")); + + TJsonValue vec1; + vec1.AppendValue(TJsonValue(0)); + vec1.AppendValue(TJsonValue("2")); + vec1.AppendValue(TJsonValue("3.14")); + + TJsonValue vec2; + vec2.AppendValue(TJsonValue(0)); + vec2.AppendValue(TJsonValue("2")); + + TJsonValue vec3; + vec3.AppendValue(TJsonValue("2")); + + TJsonValue vec4(JSON_ARRAY); + + UNIT_ASSERT(vec.IsArray()); + UNIT_ASSERT(vec.GetArray().size() == 4); + vec.EraseValue(1); + UNIT_ASSERT(vec.GetArray().size() == 3); + UNIT_ASSERT(vec == vec1); + vec.EraseValue(2); + UNIT_ASSERT(vec.GetArray().size() == 2); + UNIT_ASSERT(vec == vec2); + vec.EraseValue(0); + UNIT_ASSERT(vec.GetArray().size() == 1); + UNIT_ASSERT(vec == vec3); + vec.EraseValue(0); + UNIT_ASSERT(vec.GetArray().size() == 0); + UNIT_ASSERT(vec == vec4); + } + } + + Y_UNIT_TEST(NonConstMethodsTest) { + { + TJsonValue src; + TJsonValue value1; + value1.AppendValue(1); + value1.AppendValue(2); + src.InsertValue("key", value1); + src.InsertValue("key1", "HI!"); + + TJsonValue dst; + TJsonValue value2; + value2.AppendValue(1); + value2.AppendValue(2); + value2.AppendValue(3); + dst.InsertValue("key", value2); + + src.GetValueByPath("key", '.')->AppendValue(3); + src.EraseValue("key1"); + UNIT_ASSERT(src == dst); + + dst.GetValueByPath("key", '.')->EraseValue(0); + UNIT_ASSERT(src != dst); + src.GetValueByPath("key", '.')->EraseValue(0); + UNIT_ASSERT(src == dst); + } + + { + TJsonValue src; + TJsonValue value1; + TJsonValue arr1; + value1.InsertValue("key", "value"); + arr1.AppendValue(value1); + arr1.AppendValue(value1); + arr1.AppendValue(value1); + src.InsertValue("arr", arr1); + + TJsonValue dst; + TJsonValue value2; + TJsonValue arr2; + value2.InsertValue("key", "value"); + value2.InsertValue("yek", "eulav"); + arr2.AppendValue(value2); + arr2.AppendValue(value2); + arr2.AppendValue(value2); + arr2.AppendValue(value2); + dst.InsertValue("arr", arr2); + + src["arr"].AppendValue(value1); + for (auto& node : src["arr"].GetArraySafe()) { + node.InsertValue("yek", "eulav"); + } + UNIT_ASSERT(src == dst); + } + + { + TJsonValue src; + TJsonValue value1; + TJsonValue arr1; + value1.InsertValue("key", "value"); + arr1.AppendValue(value1); + arr1.AppendValue(value1); + arr1.AppendValue(value1); + src.InsertValue("arr", arr1); + + TJsonValue dst; + TJsonValue value2; + TJsonValue arr2; + value2.InsertValue("key", "value"); + value2.InsertValue("yek", "eulav"); + arr2.AppendValue(value2); + arr2.AppendValue(value2); + arr2.AppendValue(value2); + arr2.AppendValue(value2); + dst.InsertValue("arr", arr2); + + src["arr"].AppendValue(value1); + for (auto& node : src.GetValueByPath("arr", '.')->GetArraySafe()) { + node.InsertValue("yek", "eulav"); + } + UNIT_ASSERT(src == dst); + } + + { + TJsonValue json; + json.InsertValue("key", "value"); + try { + json.GetArraySafe(); + UNIT_ASSERT(false); + } catch (const TJsonException&) { + } + + const TJsonValue constJson(json); + try { + constJson.GetArray(); + } catch (...) { + UNIT_ASSERT(false); + } + } + + { + // Check non-const GetArraySafe() + TJsonValue json{JSON_ARRAY}; + json.GetArraySafe().push_back(TJsonValue{"foo"}); + + TJsonValue expectedJson; + expectedJson.AppendValue(TJsonValue{"foo"}); + UNIT_ASSERT(json == expectedJson); + + TJsonValue::TArray jsonArray = std::move(json.GetArraySafe()); + TJsonValue::TArray expectedArray = {TJsonValue{"foo"}}; + UNIT_ASSERT(jsonArray == expectedArray); + } + + { + // Check non-const GetMap() + TJsonValue json{JSON_MAP}; + json.GetMapSafe()["foo"] = "bar"; + + TJsonValue expectedJson; + expectedJson.InsertValue("foo", "bar"); + UNIT_ASSERT(json == expectedJson); + + TJsonValue::TMapType jsonMap = std::move(json.GetMapSafe()); + TJsonValue::TMapType expectedMap = {{"foo", TJsonValue{"bar"}}}; + UNIT_ASSERT(jsonMap == expectedMap); + } + } + + Y_UNIT_TEST(NonexistentFieldAccessTest) { + { + TJsonValue json; + json.InsertValue("some", "key"); + + UNIT_ASSERT(!json["some"]["weird"]["access"]["sequence"].Has("value")); + UNIT_ASSERT(!json["some"]["weird"]["access"]["sequence"].IsDefined()); + + UNIT_ASSERT(json["some"].GetType() == JSON_MAP); + } + } + + Y_UNIT_TEST(DefaultValuesTest) { + { + TJsonValue json; + json.InsertValue("some", "key"); + json.InsertValue("existing", 1.2); + + UNIT_ASSERT_VALUES_EQUAL(json["existing"].GetDoubleSafe(), 1.2); + UNIT_ASSERT_VALUES_EQUAL(json["existing"].GetDoubleSafe(15), 1.2); + + UNIT_ASSERT_EXCEPTION(json["some"].GetUIntegerSafe(), yexception); + UNIT_ASSERT_EXCEPTION(json["some"].GetUIntegerSafe(12), yexception); + + UNIT_ASSERT_EXCEPTION(json["nonexistent"].GetUIntegerSafe(), yexception); + UNIT_ASSERT_VALUES_EQUAL(json["nonexistent"].GetUIntegerSafe(12), 12); + UNIT_ASSERT_VALUES_EQUAL(json["nonexistent"]["more_nonexistent"].GetUIntegerSafe(12), 12); + + json.InsertValue("map", TJsonValue(JSON_MAP)); + + UNIT_ASSERT_VALUES_EQUAL(json["map"]["nonexistent"].GetUIntegerSafe(12), 12); + } + } + + Y_UNIT_TEST(GetArrayPointerInArrayTest) { + TJsonValue outer; + { + TJsonValue json; + json.AppendValue(1); + json.AppendValue(2); + json.AppendValue(3); + + outer.AppendValue(json); + } + const TJsonValue::TArray* array = nullptr; + GetArrayPointer(outer, 0, &array); + UNIT_ASSERT_VALUES_EQUAL((*array)[1], 2); + } + + Y_UNIT_TEST(GetArrayPointerInMapTest) { + TJsonValue outer; + { + TJsonValue json; + json.AppendValue(1); + json.AppendValue(2); + json.AppendValue(3); + + outer.InsertValue("x", json); + } + const TJsonValue::TArray* array = nullptr; + GetArrayPointer(outer, "x", &array); + UNIT_ASSERT_VALUES_EQUAL((*array)[1], 2); + } + + Y_UNIT_TEST(GetMapPointerInArrayTest) { + TJsonValue outer; + { + TJsonValue json; + json.InsertValue("a", 1); + json.InsertValue("b", 2); + json.InsertValue("c", 3); + + outer.AppendValue(json); + } + const TJsonValue::TMapType* map = nullptr; + GetMapPointer(outer, 0, &map); + UNIT_ASSERT_VALUES_EQUAL((*map).at("b"), 2); + } + + Y_UNIT_TEST(GetMapPointerInMapTest) { + TJsonValue outer; + { + TJsonValue json; + json.InsertValue("a", 1); + json.InsertValue("b", 2); + json.InsertValue("c", 3); + + outer.InsertValue("x", json); + } + const TJsonValue::TMapType* map = nullptr; + GetMapPointer(outer, "x", &map); + UNIT_ASSERT_VALUES_EQUAL((*map).at("b"), 2); + } + + Y_UNIT_TEST(GetIntegerRobustBignumStringTest) { + TString value = "1626862681464633683"; + TJsonValue json(value); + UNIT_ASSERT_VALUES_EQUAL(json.GetUIntegerRobust(), FromString<ui64>(value)); + UNIT_ASSERT_VALUES_EQUAL(json.GetIntegerRobust(), FromString<i64>(value)); + } + + Y_UNIT_TEST(MoveSubpartToSelf) { + TJsonValue json; + json[0] = "testing 0"; + json[1] = "testing 1"; + json[2] = "testing 2"; + json = std::move(json[1]); + UNIT_ASSERT_VALUES_EQUAL(json.GetString(), "testing 1"); + + const char* longTestString = + "Testing TJsonValue& operator=(TJsonValue&&) subpart self moving " + "after TJsonValue was constrcuted from TString&&."; + + json["hello"] = TString{longTestString}; + json = std::move(json["hello"]); + UNIT_ASSERT_VALUES_EQUAL(json.GetString(), longTestString); + } + + Y_UNIT_TEST(TJsonArrayMapConstructor) { + TJsonMap emptyMap; + UNIT_ASSERT_VALUES_EQUAL(emptyMap.GetType(), JSON_MAP); + UNIT_ASSERT_VALUES_EQUAL(emptyMap.GetMapSafe().size(), 0); + + TJsonArray emptyArray; + UNIT_ASSERT_VALUES_EQUAL(emptyArray.GetType(), JSON_ARRAY); + UNIT_ASSERT_VALUES_EQUAL(emptyArray.GetArraySafe().size(), 0); + + TJsonMap filled = { + {"1", 1}, + {"2", "2"}, + {"3", TJsonArray{3}}, + {"4", TJsonMap{{"5", 5}}}, + }; + UNIT_ASSERT_VALUES_EQUAL(filled.GetType(), JSON_MAP); + UNIT_ASSERT_VALUES_EQUAL(filled["1"], TJsonValue{1}); + UNIT_ASSERT_VALUES_EQUAL(filled["2"], TJsonValue{"2"}); + UNIT_ASSERT_VALUES_EQUAL(filled["3"].GetArraySafe().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(filled["3"][0], TJsonValue{3}); + UNIT_ASSERT_VALUES_EQUAL(filled["4"].GetMapSafe().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(filled["4"]["5"], TJsonValue{5}); + } +} // TJsonValueTest diff --git a/library/cpp/json/writer/ut/ya.make b/library/cpp/json/writer/ut/ya.make new file mode 100644 index 0000000000..1e39dae6a1 --- /dev/null +++ b/library/cpp/json/writer/ut/ya.make @@ -0,0 +1,16 @@ +UNITTEST() + +OWNER(myltsev) + +PEERDIR( + ADDINCL library/cpp/json/writer +) + +SRCDIR(library/cpp/json/writer) + +SRCS( + json_ut.cpp + json_value_ut.cpp +) + +END() diff --git a/library/cpp/json/writer/ya.make b/library/cpp/json/writer/ya.make new file mode 100644 index 0000000000..3989ff3504 --- /dev/null +++ b/library/cpp/json/writer/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +OWNER( + mvel + myltsev + pg +) + +PEERDIR( + library/cpp/json/common +) + +SRCS( + json_value.cpp + json.cpp +) + +GENERATE_ENUM_SERIALIZATION(json_value.h) + +END() diff --git a/library/cpp/json/ya.make b/library/cpp/json/ya.make new file mode 100644 index 0000000000..d58eead8ec --- /dev/null +++ b/library/cpp/json/ya.make @@ -0,0 +1,23 @@ +LIBRARY() + +OWNER( + pg + velavokr +) + +SRCS( + json_writer.cpp + json_reader.cpp + json_prettifier.cpp + rapidjson_helpers.cpp +) + +PEERDIR( + contrib/libs/rapidjson + library/cpp/json/common + library/cpp/json/fast_sax + library/cpp/json/writer + library/cpp/string_utils/relaxed_escaper +) + +END() diff --git a/library/cpp/json/yson/json2yson.cpp b/library/cpp/json/yson/json2yson.cpp new file mode 100644 index 0000000000..f72cb7a9ef --- /dev/null +++ b/library/cpp/json/yson/json2yson.cpp @@ -0,0 +1,108 @@ +#include "json2yson.h" + +#include <library/cpp/yson/parser.h> +#include <library/cpp/yson/json/json_writer.h> +#include <library/cpp/yson/json/yson2json_adapter.h> + +namespace NJson2Yson { + static void WriteJsonValue(const NJson::TJsonValue& jsonValue, NYT::TYson2JsonCallbacksAdapter* adapter) { + switch (jsonValue.GetType()) { + default: + case NJson::JSON_NULL: + adapter->OnNull(); + break; + case NJson::JSON_BOOLEAN: + adapter->OnBoolean(jsonValue.GetBoolean()); + break; + case NJson::JSON_DOUBLE: + adapter->OnDouble(jsonValue.GetDouble()); + break; + case NJson::JSON_INTEGER: + adapter->OnInteger(jsonValue.GetInteger()); + break; + case NJson::JSON_UINTEGER: + adapter->OnUInteger(jsonValue.GetUInteger()); + break; + case NJson::JSON_STRING: + adapter->OnString(jsonValue.GetString()); + break; + case NJson::JSON_ARRAY: { + adapter->OnOpenArray(); + const NJson::TJsonValue::TArray& arr = jsonValue.GetArray(); + for (const auto& it : arr) + WriteJsonValue(it, adapter); + adapter->OnCloseArray(); + break; + } + case NJson::JSON_MAP: { + adapter->OnOpenMap(); + const NJson::TJsonValue::TMapType& map = jsonValue.GetMap(); + for (const auto& it : map) { + adapter->OnMapKey(it.first); + WriteJsonValue(it.second, adapter); + } + adapter->OnCloseMap(); + break; + } + } + } + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, NYson::TYsonWriter* ysonWriter) { + NYT::TYson2JsonCallbacksAdapter adapter(ysonWriter); + WriteJsonValue(inputValue, &adapter); + } + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, IOutputStream* outputStream) { + NYson::TYsonWriter ysonWriter(outputStream, NYson::EYsonFormat::Binary, ::NYson::EYsonType::Node, false); + SerializeJsonValueAsYson(inputValue, &ysonWriter); + } + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, TString& result) { + TStringOutput resultStream(result); + SerializeJsonValueAsYson(inputValue, &resultStream); + } + + TString SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue) { + TString result; + SerializeJsonValueAsYson(inputValue, result); + return result; + } + + bool DeserializeYsonAsJsonValue(IInputStream* inputStream, NJson::TJsonValue* outputValue, bool throwOnError) { + NJson::TParserCallbacks parser(*outputValue); + NJson2Yson::TJsonBuilder consumer(&parser); + NYson::TYsonParser ysonParser(&consumer, inputStream, ::NYson::EYsonType::Node); + try { + ysonParser.Parse(); + } catch (...) { + if (throwOnError) { + throw; + } + return false; + } + return true; + } + + bool DeserializeYsonAsJsonValue(TStringBuf str, NJson::TJsonValue* outputValue, bool throwOnError) { + TMemoryInput inputStream(str); + return DeserializeYsonAsJsonValue(&inputStream, outputValue, throwOnError); + } + + void ConvertYson2Json(IInputStream* inputStream, IOutputStream* outputStream) { + NYT::TJsonWriter writer(outputStream, ::NYson::EYsonType::Node, NYT::JF_TEXT, NYT::JAM_ON_DEMAND, NYT::SBF_BOOLEAN); + NYson::TYsonParser ysonParser(&writer, inputStream, ::NYson::EYsonType::Node); + ysonParser.Parse(); + } + + void ConvertYson2Json(TStringBuf yson, IOutputStream* outputStream) { + TMemoryInput inputStream(yson); + ConvertYson2Json(&inputStream, outputStream); + } + + TString ConvertYson2Json(TStringBuf yson) { + TString json; + TStringOutput outputStream(json); + ConvertYson2Json(yson, &outputStream); + return json; + } +} diff --git a/library/cpp/json/yson/json2yson.h b/library/cpp/json/yson/json2yson.h new file mode 100644 index 0000000000..758eb6d0cf --- /dev/null +++ b/library/cpp/json/yson/json2yson.h @@ -0,0 +1,179 @@ +#pragma once + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_value.h> +#include <library/cpp/yson/writer.h> + +namespace NJson2Yson { + class TJsonBuilderImpl: public NYson::TYsonConsumerBase { + public: + TJsonBuilderImpl(NJson::TJsonCallbacks* parserCallbacks) + : ParserCallbacks_(parserCallbacks) + { + } + + void OnStringScalar(TStringBuf value) override { + ParserCallbacks_->OnString(value); + } + + void OnInt64Scalar(i64 value) override { + ParserCallbacks_->OnInteger(value); + } + + void OnUint64Scalar(ui64 value) override { + ParserCallbacks_->OnUInteger(value); + } + + void OnDoubleScalar(double value) override { + ParserCallbacks_->OnDouble(value); + } + + void OnBooleanScalar(bool value) override { + ParserCallbacks_->OnBoolean(value); + } + + void OnEntity() override { + ParserCallbacks_->OnNull(); + } + + void OnBeginList() override { + ParserCallbacks_->OnOpenArray(); + } + + void OnListItem() override { + } + + void OnEndList() override { + ParserCallbacks_->OnCloseArray(); + } + + void OnBeginMap() override { + ParserCallbacks_->OnOpenMap(); + } + + void OnKeyedItem(TStringBuf key) override { + ParserCallbacks_->OnMapKey(key); + } + + void OnEndMap() override { + ParserCallbacks_->OnCloseMap(); + } + + void OnBeginAttributes() override { + } + + void OnEndAttributes() override { + } + + private: + NJson::TJsonCallbacks* ParserCallbacks_; + }; + + template <typename TBase> + class TSkipAttributesProxy: public TBase { + public: + template <typename... TArgs> + TSkipAttributesProxy<TBase>(TArgs&&... args) + : TBase(std::forward<TArgs>(args)...) + { + } + + void OnStringScalar(TStringBuf value) override { + if (AttributesDepth == 0) { + TBase::OnStringScalar(value); + } + } + + void OnInt64Scalar(i64 value) override { + if (AttributesDepth == 0) { + TBase::OnInt64Scalar(value); + } + } + + void OnUint64Scalar(ui64 value) override { + if (AttributesDepth == 0) { + TBase::OnUint64Scalar(value); + } + } + + void OnDoubleScalar(double value) override { + if (AttributesDepth == 0) { + TBase::OnDoubleScalar(value); + } + } + + void OnBooleanScalar(bool value) override { + if (AttributesDepth == 0) { + TBase::OnBooleanScalar(value); + } + } + + void OnEntity() override { + if (AttributesDepth == 0) { + TBase::OnEntity(); + } + } + + void OnBeginList() override { + if (AttributesDepth == 0) { + TBase::OnBeginList(); + } + } + + void OnListItem() override { + if (AttributesDepth == 0) { + TBase::OnListItem(); + } + } + + void OnEndList() override { + if (AttributesDepth == 0) { + TBase::OnEndList(); + } + } + + void OnBeginMap() override { + if (AttributesDepth == 0) { + TBase::OnBeginMap(); + } + } + + void OnKeyedItem(TStringBuf key) override { + if (AttributesDepth == 0) { + TBase::OnKeyedItem(key); + } + } + + void OnEndMap() override { + if (AttributesDepth == 0) { + TBase::OnEndMap(); + } + } + + void OnBeginAttributes() override { + ++AttributesDepth; + } + + void OnEndAttributes() override { + --AttributesDepth; + Y_ASSERT(AttributesDepth >= 0); + } + + private: + int AttributesDepth = 0; + }; + + using TJsonBuilder = TSkipAttributesProxy<TJsonBuilderImpl>; + + void ConvertYson2Json(IInputStream* inputStream, IOutputStream* outputStream); + void ConvertYson2Json(TStringBuf yson, IOutputStream* outputStream); + TString ConvertYson2Json(TStringBuf yson); + + bool DeserializeYsonAsJsonValue(IInputStream* inputStream, NJson::TJsonValue* outputValue, bool throwOnError = false); + bool DeserializeYsonAsJsonValue(TStringBuf str, NJson::TJsonValue* outputValue, bool throwOnError = false); + + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, NYson::TYsonWriter* ysonWriter); + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, IOutputStream* outputStream); + void SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue, TString& result); + TString SerializeJsonValueAsYson(const NJson::TJsonValue& inputValue); +} diff --git a/library/cpp/json/yson/json2yson_ut.cpp b/library/cpp/json/yson/json2yson_ut.cpp new file mode 100644 index 0000000000..9eb23354cf --- /dev/null +++ b/library/cpp/json/yson/json2yson_ut.cpp @@ -0,0 +1,107 @@ +#include "library/cpp/json/yson/json2yson.h" + +#include <library/cpp/blockcodecs/codecs.h> +#include <library/cpp/histogram/simple/histogram.h> +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/datetime/cputimer.h> +#include <util/stream/file.h> + +template <typename TCallBack> +ui64 Run(TCallBack&& callBack) { + TSimpleTimer timer; + callBack(); + return timer.Get().MicroSeconds(); +} + +static TString GetRequestsWithDecoding(const TString& inputPath, const NBlockCodecs::ICodec* codec) { + TIFStream inputFileStream(inputPath); + TString encodedRequests = inputFileStream.ReadAll(); + TString requests; + codec->Decode(encodedRequests, requests); + return requests; +} + +Y_UNIT_TEST_SUITE(Json2Yson) { + Y_UNIT_TEST(NOAPACHE_REQUESTS) { + const ui32 warmUpRetries = 5; + const TVector<double> percentiles = {0.25, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.97, 0.99, 1.0}; + + NSimpleHistogram::TMultiHistogramCalcer<ui64> calcer; + + TString requests = GetRequestsWithDecoding(GetWorkPath() + "/noapache_requests_sample_lz4", NBlockCodecs::Codec("lz4")); + TStringInput inputStream(requests); + + for (TString jsonRequest, jsonString, ysonString; inputStream.ReadLine(jsonRequest);) { + TStringInput jsonInput(jsonRequest); + NJson::TJsonValue readedJson; + NJson::ReadJsonTree(&jsonInput, &readedJson, true); + jsonRequest.clear(); + + ui64 writeTime = Max<ui64>(); + ui64 readTime = Max<ui64>(); + + for (ui32 i = 0; i < warmUpRetries; ++i) { + NJson::TJsonValue Json2Json; + TStringOutput jsonWriteOutput(jsonString); + NJsonWriter::TBuf jsonBuf(NJsonWriter::HEM_UNSAFE, &jsonWriteOutput); + + writeTime = Min(writeTime, Run([&]() { + jsonBuf.WriteJsonValue(&readedJson); + })); + + TStringInput jsonInput(jsonString); + NJson::TJsonReaderConfig config; + config.DontValidateUtf8 = true; + readTime = Min(readTime, Run([&]() { + NJson::ReadJsonTree(&jsonInput, &config, &Json2Json, true); + })); + + UNIT_ASSERT_VALUES_EQUAL( + NJsonWriter::TBuf().WriteJsonValue(&readedJson, true).Str(), + NJsonWriter::TBuf().WriteJsonValue(&Json2Json, true).Str()); + + jsonString.clear(); + } + + calcer.RecordValue("read_json", readTime); + calcer.RecordValue("write_json", writeTime); + calcer.RecordValue("read_and_write_json", readTime + writeTime); + + writeTime = Max<ui64>(); + readTime = Max<ui64>(); + + for (ui32 i = 0; i < warmUpRetries; ++i) { + NJson::TJsonValue convertedJson; + TStringOutput ysonOutput(ysonString); + + writeTime = Min(writeTime, Run([&]() { + NJson2Yson::SerializeJsonValueAsYson(readedJson, &ysonOutput); + })); + + TStringInput ysonInput(ysonString); + readTime = Min(readTime, Run([&]() { + NJson2Yson::DeserializeYsonAsJsonValue(&ysonInput, &convertedJson); + })); + + UNIT_ASSERT_VALUES_EQUAL( + NJsonWriter::TBuf().WriteJsonValue(&convertedJson, true).Str(), + NJsonWriter::TBuf().WriteJsonValue(&readedJson, true).Str()); + + ysonString.clear(); + } + + calcer.RecordValue("read_yson", readTime); + calcer.RecordValue("write_yson", writeTime); + calcer.RecordValue("read_and_write_yson", readTime + writeTime); + } + + NJson::TJsonValue histogramJson = NSimpleHistogram::ToJson(calcer.Calc(), percentiles); + for (const auto& it : histogramJson.GetMap()) { + for (const auto& percentileValue : it.second.GetMap()) { + UNIT_ADD_METRIC(it.first + "_" + percentileValue.first, percentileValue.second.GetUInteger() / 1000.0); + } + } + } +} diff --git a/library/cpp/json/yson/ut/ya.make b/library/cpp/json/yson/ut/ya.make new file mode 100644 index 0000000000..4ceb65b279 --- /dev/null +++ b/library/cpp/json/yson/ut/ya.make @@ -0,0 +1,28 @@ +OWNER( + avitella + elshiko +) + +UNITTEST_FOR(library/cpp/json/yson) + +ALLOCATOR(LF) + +DATA(sbr://363537653) + +PEERDIR( + library/cpp/blockcodecs + library/cpp/histogram/simple + library/cpp/testing/unittest +) + +SIZE(LARGE) + +TAG(ya:fat) + +TIMEOUT(600) + +SRCS( + json2yson_ut.cpp +) + +END() diff --git a/library/cpp/json/yson/ya.make b/library/cpp/json/yson/ya.make new file mode 100644 index 0000000000..9b289d674f --- /dev/null +++ b/library/cpp/json/yson/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +OWNER( + avitella + elshiko +) + +PEERDIR( + library/cpp/json + library/cpp/yson + library/cpp/yson/json +) + +SRCS( + json2yson.cpp +) + +END() |