diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json/easy_parse | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json/easy_parse')
-rw-r--r-- | library/cpp/json/easy_parse/json_easy_parser.cpp | 236 | ||||
-rw-r--r-- | library/cpp/json/easy_parse/json_easy_parser.h | 46 | ||||
-rw-r--r-- | library/cpp/json/easy_parse/json_easy_parser_impl.h | 40 | ||||
-rw-r--r-- | library/cpp/json/easy_parse/ya.make | 13 |
4 files changed, 335 insertions, 0 deletions
diff --git a/library/cpp/json/easy_parse/json_easy_parser.cpp b/library/cpp/json/easy_parse/json_easy_parser.cpp new file mode 100644 index 0000000000..3c781f544b --- /dev/null +++ b/library/cpp/json/easy_parse/json_easy_parser.cpp @@ -0,0 +1,236 @@ +#include "json_easy_parser.h" +#include <library/cpp/json/json_reader.h> +#include <util/string/cast.h> +#include <util/string/split.h> +#include <util/string/strip.h> + +namespace NJson { + static TString MAP_IDENTIFIER = "{}"; + static TString ARRAY_IDENTIFIER = "[]"; + static TString ANY_IDENTIFIER = "*"; + + static void ParsePath(TString path, TVector<TPathElem>* res) { + TVector<const char*> parts; + Split(path.begin(), '/', &parts); + for (size_t n = 0; n < parts.size(); ++n) { + TString part = Strip(parts[n]); + if (!part.empty()) { + if (part[0] != '[') { + res->push_back(TPathElem(NImpl::MAP)); + res->push_back(TPathElem(part)); + } else { + int arrayCounter; + try { + arrayCounter = FromString<int>(part.substr(1, part.length() - 2)); + } catch (yexception&) { + arrayCounter = -1; + } + res->push_back(TPathElem(arrayCounter)); + } + } + } + } + + void TJsonParser::AddField(const TString& path, bool nonEmpty) { + Fields.emplace_back(); + Fields.back().NonEmpty = nonEmpty; + ParsePath(path, &Fields.back().Path); + } + + TString TJsonParser::ConvertToTabDelimited(const TString& json) const { + TStringInput in(json); + TStringStream out; + ConvertToTabDelimited(in, out); + return out.Str(); + } + + class TRewriteJsonImpl: public NJson::TJsonCallbacks { + const TJsonParser& Parent; + TVector<TString> FieldValues; + TVector<TPathElem> Stack; + bool ShouldUpdateOnArrayChange; + int CurrentFieldIdx; + bool HasFormatError; + + private: + static bool PathElementMatch(const TPathElem& templ, const TPathElem& real) { + if (templ.Type != real.Type) + return false; + if (templ.Type == NImpl::ARRAY) + return templ.ArrayCounter == -1 || templ.ArrayCounter == real.ArrayCounter; + if (templ.Type == NImpl::MAP_KEY) + return templ.Key == ANY_IDENTIFIER || templ.Key == real.Key; + return true; + } + + bool CheckFilter(const TVector<TPathElem>& path) const { + if (Stack.size() < path.size()) + return false; + for (size_t n = 0; n < path.size(); ++n) { + if (!PathElementMatch(path[n], Stack[n])) + return false; + } + return true; + } + + void UpdateRule() { + for (size_t n = 0; n < Parent.Fields.size(); ++n) { + if (FieldValues[n].empty() && CheckFilter(Parent.Fields[n].Path)) { + CurrentFieldIdx = n; + return; + } + } + CurrentFieldIdx = -1; + } + + void Pop() { + Stack.pop_back(); + } + + void IncreaseArrayCounter() { + if (!Stack.empty() && Stack.back().Type == NImpl::ARRAY) { + ++Stack.back().ArrayCounter; + if (ShouldUpdateOnArrayChange) + UpdateRule(); + } + } + + template <class T> + bool OnValue(const T& val) { + IncreaseArrayCounter(); + if (CurrentFieldIdx >= 0) { + FieldValues[CurrentFieldIdx] = ToString(val); + UpdateRule(); + } + return true; + } + + public: + TRewriteJsonImpl(const TJsonParser& parent) + : Parent(parent) + , FieldValues(parent.Fields.size()) + , ShouldUpdateOnArrayChange(false) + , CurrentFieldIdx(-1) + , HasFormatError(false) + { + for (size_t n = 0; n < Parent.Fields.size(); ++n) { + if (!Parent.Fields[n].Path.empty() && Parent.Fields[n].Path.back().Type == NImpl::ARRAY) + ShouldUpdateOnArrayChange = true; + } + } + + bool OnOpenMap() override { + IncreaseArrayCounter(); + Stack.push_back(TPathElem(NImpl::MAP)); + if (CurrentFieldIdx >= 0) + HasFormatError = true; + else + UpdateRule(); + return true; + } + + bool OnOpenArray() override { + IncreaseArrayCounter(); + Stack.push_back(TPathElem(-1)); + if (CurrentFieldIdx >= 0) + HasFormatError = true; + else + UpdateRule(); + return true; + } + + bool OnCloseMap() override { + while (!Stack.empty() && Stack.back().Type != NImpl::MAP) + Pop(); + if (!Stack.empty()) + Pop(); + UpdateRule(); + return true; + } + + bool OnCloseArray() override { + if (!Stack.empty()) + Pop(); + UpdateRule(); + return true; + } + + bool OnMapKey(const TStringBuf& key) override { + if (!Stack.empty() && Stack.back().Type == NImpl::MAP_KEY) { + Pop(); + UpdateRule(); + } + Stack.push_back(TPathElem(TString{key})); + if (CurrentFieldIdx >= 0) + HasFormatError = true; + else + UpdateRule(); + return true; + } + + bool OnBoolean(bool b) override { + return OnValue(b); + } + + bool OnInteger(long long i) override { + return OnValue(i); + } + + bool OnDouble(double f) override { + return OnValue(f); + } + + bool OnString(const TStringBuf& str) override { + return OnValue(str); + } + + bool IsOK() const { + if (HasFormatError) + return false; + for (size_t n = 0; n < FieldValues.size(); ++n) + if (Parent.Fields[n].NonEmpty && FieldValues[n].empty()) + return false; + return true; + } + + void WriteTo(IOutputStream& out) const { + for (size_t n = 0; n < FieldValues.size(); ++n) + out << "\t" << FieldValues[n]; + } + + void WriteTo(TVector<TString>* res) const { + *res = FieldValues; + } + }; + + void TJsonParser::ConvertToTabDelimited(IInputStream& in, IOutputStream& out) const { + TRewriteJsonImpl impl(*this); + ReadJson(&in, &impl); + if (impl.IsOK()) { + out << Prefix; + impl.WriteTo(out); + out.Flush(); + } + } + + bool TJsonParser::Parse(const TString& json, TVector<TString>* res) const { + TRewriteJsonImpl impl(*this); + TStringInput in(json); + ReadJson(&in, &impl); + if (impl.IsOK()) { + impl.WriteTo(res); + return true; + } else + return false; + } + + //struct TTestMe { + // TTestMe() { + // TJsonParser worker; + // worker.AddField("/x/y/z", true); + // TString ret1 = worker.ConvertToTabDelimited("{ \"x\" : { \"y\" : { \"w\" : 1, \"z\" : 2 } } }"); + // TString ret2 = worker.ConvertToTabDelimited(" [1, 2, 3, 4, 5] "); + // } + //} testMe; + +} diff --git a/library/cpp/json/easy_parse/json_easy_parser.h b/library/cpp/json/easy_parse/json_easy_parser.h new file mode 100644 index 0000000000..59d7791ab1 --- /dev/null +++ b/library/cpp/json/easy_parse/json_easy_parser.h @@ -0,0 +1,46 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/input.h> +#include <util/stream/output.h> +#include "json_easy_parser_impl.h" + +namespace NJson { + /* This class filters out nodes from a source JSON by a xpath-style description. It represent these nodes as a tab-delimited string (or a vector). + * It is useful if you need to parse a data which comes into JSON in a known and fixed format. + * Fields are set as a list of keys separated by slash, for example: + * Field x/y/z in JSON { "x" : { "y" : { "w" : 1, "z" : 2 } } contains number 2. + * In a path to a field you can also provide a special array identifier "[]", identifier of a particular field in an array (for example "[4]") or wildcard "*". + * + * The parser of the class supports parsing of several fields. Each of them could be marked as mandatory or as optional. + * If a mandatory field is not found in JSON, then Parse() returns false and ConvertToTabDelimited() returns an empty string. + * If an optional field is not found in JSON, then it's value in Parse()/ConvertToTabDelimited() is an empty string. + * In particular ConvertToTabDelimited() always returns either an empty string, or a string of the same number of tab-delimited fields starting from the same Prefix. + * + * NB! Library can not extract values of not a simple type (namely it doesn't support the case when a result is a vocabulary or an array) from JSON. + * If you expect such a case, please check json_value.h. + */ + + class TJsonParser { + TString Prefix; + + struct TField { + TVector<TPathElem> Path; + bool NonEmpty; + }; + TVector<TField> Fields; + + friend class TRewriteJsonImpl; + + void ConvertToTabDelimited(IInputStream& in, IOutputStream& out) const; + + public: + void SetPrefix(const TString& prefix) { + Prefix = prefix; + } + void AddField(const TString& path, bool mustExist); + TString ConvertToTabDelimited(const TString& json) const; + bool Parse(const TString& json, TVector<TString>* res) const; + }; +} diff --git a/library/cpp/json/easy_parse/json_easy_parser_impl.h b/library/cpp/json/easy_parse/json_easy_parser_impl.h new file mode 100644 index 0000000000..ec55d838b3 --- /dev/null +++ b/library/cpp/json/easy_parse/json_easy_parser_impl.h @@ -0,0 +1,40 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NJson { + namespace NImpl { + enum EType { + ARRAY, + MAP, + MAP_KEY + }; + } + template <class TStringType> + struct TPathElemImpl { + NImpl::EType Type; + TStringType Key; + int ArrayCounter; + + TPathElemImpl(NImpl::EType type) + : Type(type) + , ArrayCounter() + { + } + + TPathElemImpl(const TStringType& key) + : Type(NImpl::MAP_KEY) + , Key(key) + , ArrayCounter() + { + } + + TPathElemImpl(int arrayCounter) + : Type(NImpl::ARRAY) + , ArrayCounter(arrayCounter) + { + } + }; + + typedef TPathElemImpl<TString> TPathElem; +} diff --git a/library/cpp/json/easy_parse/ya.make b/library/cpp/json/easy_parse/ya.make new file mode 100644 index 0000000000..2304c542f2 --- /dev/null +++ b/library/cpp/json/easy_parse/ya.make @@ -0,0 +1,13 @@ +OWNER(finder) + +LIBRARY() + +SRCS( + json_easy_parser.cpp +) + +PEERDIR( + library/cpp/json +) + +END() |