aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/json/easy_parse
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json/easy_parse
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json/easy_parse')
-rw-r--r--library/cpp/json/easy_parse/json_easy_parser.cpp236
-rw-r--r--library/cpp/json/easy_parse/json_easy_parser.h46
-rw-r--r--library/cpp/json/easy_parse/json_easy_parser_impl.h40
-rw-r--r--library/cpp/json/easy_parse/ya.make13
4 files changed, 335 insertions, 0 deletions
diff --git a/library/cpp/json/easy_parse/json_easy_parser.cpp b/library/cpp/json/easy_parse/json_easy_parser.cpp
new file mode 100644
index 0000000000..3c781f544b
--- /dev/null
+++ b/library/cpp/json/easy_parse/json_easy_parser.cpp
@@ -0,0 +1,236 @@
+#include "json_easy_parser.h"
+#include <library/cpp/json/json_reader.h>
+#include <util/string/cast.h>
+#include <util/string/split.h>
+#include <util/string/strip.h>
+
+namespace NJson {
+ static TString MAP_IDENTIFIER = "{}";
+ static TString ARRAY_IDENTIFIER = "[]";
+ static TString ANY_IDENTIFIER = "*";
+
+ static void ParsePath(TString path, TVector<TPathElem>* res) {
+ TVector<const char*> parts;
+ Split(path.begin(), '/', &parts);
+ for (size_t n = 0; n < parts.size(); ++n) {
+ TString part = Strip(parts[n]);
+ if (!part.empty()) {
+ if (part[0] != '[') {
+ res->push_back(TPathElem(NImpl::MAP));
+ res->push_back(TPathElem(part));
+ } else {
+ int arrayCounter;
+ try {
+ arrayCounter = FromString<int>(part.substr(1, part.length() - 2));
+ } catch (yexception&) {
+ arrayCounter = -1;
+ }
+ res->push_back(TPathElem(arrayCounter));
+ }
+ }
+ }
+ }
+
+ void TJsonParser::AddField(const TString& path, bool nonEmpty) {
+ Fields.emplace_back();
+ Fields.back().NonEmpty = nonEmpty;
+ ParsePath(path, &Fields.back().Path);
+ }
+
+ TString TJsonParser::ConvertToTabDelimited(const TString& json) const {
+ TStringInput in(json);
+ TStringStream out;
+ ConvertToTabDelimited(in, out);
+ return out.Str();
+ }
+
+ class TRewriteJsonImpl: public NJson::TJsonCallbacks {
+ const TJsonParser& Parent;
+ TVector<TString> FieldValues;
+ TVector<TPathElem> Stack;
+ bool ShouldUpdateOnArrayChange;
+ int CurrentFieldIdx;
+ bool HasFormatError;
+
+ private:
+ static bool PathElementMatch(const TPathElem& templ, const TPathElem& real) {
+ if (templ.Type != real.Type)
+ return false;
+ if (templ.Type == NImpl::ARRAY)
+ return templ.ArrayCounter == -1 || templ.ArrayCounter == real.ArrayCounter;
+ if (templ.Type == NImpl::MAP_KEY)
+ return templ.Key == ANY_IDENTIFIER || templ.Key == real.Key;
+ return true;
+ }
+
+ bool CheckFilter(const TVector<TPathElem>& path) const {
+ if (Stack.size() < path.size())
+ return false;
+ for (size_t n = 0; n < path.size(); ++n) {
+ if (!PathElementMatch(path[n], Stack[n]))
+ return false;
+ }
+ return true;
+ }
+
+ void UpdateRule() {
+ for (size_t n = 0; n < Parent.Fields.size(); ++n) {
+ if (FieldValues[n].empty() && CheckFilter(Parent.Fields[n].Path)) {
+ CurrentFieldIdx = n;
+ return;
+ }
+ }
+ CurrentFieldIdx = -1;
+ }
+
+ void Pop() {
+ Stack.pop_back();
+ }
+
+ void IncreaseArrayCounter() {
+ if (!Stack.empty() && Stack.back().Type == NImpl::ARRAY) {
+ ++Stack.back().ArrayCounter;
+ if (ShouldUpdateOnArrayChange)
+ UpdateRule();
+ }
+ }
+
+ template <class T>
+ bool OnValue(const T& val) {
+ IncreaseArrayCounter();
+ if (CurrentFieldIdx >= 0) {
+ FieldValues[CurrentFieldIdx] = ToString(val);
+ UpdateRule();
+ }
+ return true;
+ }
+
+ public:
+ TRewriteJsonImpl(const TJsonParser& parent)
+ : Parent(parent)
+ , FieldValues(parent.Fields.size())
+ , ShouldUpdateOnArrayChange(false)
+ , CurrentFieldIdx(-1)
+ , HasFormatError(false)
+ {
+ for (size_t n = 0; n < Parent.Fields.size(); ++n) {
+ if (!Parent.Fields[n].Path.empty() && Parent.Fields[n].Path.back().Type == NImpl::ARRAY)
+ ShouldUpdateOnArrayChange = true;
+ }
+ }
+
+ bool OnOpenMap() override {
+ IncreaseArrayCounter();
+ Stack.push_back(TPathElem(NImpl::MAP));
+ if (CurrentFieldIdx >= 0)
+ HasFormatError = true;
+ else
+ UpdateRule();
+ return true;
+ }
+
+ bool OnOpenArray() override {
+ IncreaseArrayCounter();
+ Stack.push_back(TPathElem(-1));
+ if (CurrentFieldIdx >= 0)
+ HasFormatError = true;
+ else
+ UpdateRule();
+ return true;
+ }
+
+ bool OnCloseMap() override {
+ while (!Stack.empty() && Stack.back().Type != NImpl::MAP)
+ Pop();
+ if (!Stack.empty())
+ Pop();
+ UpdateRule();
+ return true;
+ }
+
+ bool OnCloseArray() override {
+ if (!Stack.empty())
+ Pop();
+ UpdateRule();
+ return true;
+ }
+
+ bool OnMapKey(const TStringBuf& key) override {
+ if (!Stack.empty() && Stack.back().Type == NImpl::MAP_KEY) {
+ Pop();
+ UpdateRule();
+ }
+ Stack.push_back(TPathElem(TString{key}));
+ if (CurrentFieldIdx >= 0)
+ HasFormatError = true;
+ else
+ UpdateRule();
+ return true;
+ }
+
+ bool OnBoolean(bool b) override {
+ return OnValue(b);
+ }
+
+ bool OnInteger(long long i) override {
+ return OnValue(i);
+ }
+
+ bool OnDouble(double f) override {
+ return OnValue(f);
+ }
+
+ bool OnString(const TStringBuf& str) override {
+ return OnValue(str);
+ }
+
+ bool IsOK() const {
+ if (HasFormatError)
+ return false;
+ for (size_t n = 0; n < FieldValues.size(); ++n)
+ if (Parent.Fields[n].NonEmpty && FieldValues[n].empty())
+ return false;
+ return true;
+ }
+
+ void WriteTo(IOutputStream& out) const {
+ for (size_t n = 0; n < FieldValues.size(); ++n)
+ out << "\t" << FieldValues[n];
+ }
+
+ void WriteTo(TVector<TString>* res) const {
+ *res = FieldValues;
+ }
+ };
+
+ void TJsonParser::ConvertToTabDelimited(IInputStream& in, IOutputStream& out) const {
+ TRewriteJsonImpl impl(*this);
+ ReadJson(&in, &impl);
+ if (impl.IsOK()) {
+ out << Prefix;
+ impl.WriteTo(out);
+ out.Flush();
+ }
+ }
+
+ bool TJsonParser::Parse(const TString& json, TVector<TString>* res) const {
+ TRewriteJsonImpl impl(*this);
+ TStringInput in(json);
+ ReadJson(&in, &impl);
+ if (impl.IsOK()) {
+ impl.WriteTo(res);
+ return true;
+ } else
+ return false;
+ }
+
+ //struct TTestMe {
+ // TTestMe() {
+ // TJsonParser worker;
+ // worker.AddField("/x/y/z", true);
+ // TString ret1 = worker.ConvertToTabDelimited("{ \"x\" : { \"y\" : { \"w\" : 1, \"z\" : 2 } } }");
+ // TString ret2 = worker.ConvertToTabDelimited(" [1, 2, 3, 4, 5] ");
+ // }
+ //} testMe;
+
+}
diff --git a/library/cpp/json/easy_parse/json_easy_parser.h b/library/cpp/json/easy_parse/json_easy_parser.h
new file mode 100644
index 0000000000..59d7791ab1
--- /dev/null
+++ b/library/cpp/json/easy_parse/json_easy_parser.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+#include "json_easy_parser_impl.h"
+
+namespace NJson {
+ /* This class filters out nodes from a source JSON by a xpath-style description. It represent these nodes as a tab-delimited string (or a vector).
+ * It is useful if you need to parse a data which comes into JSON in a known and fixed format.
+ * Fields are set as a list of keys separated by slash, for example:
+ * Field x/y/z in JSON { "x" : { "y" : { "w" : 1, "z" : 2 } } contains number 2.
+ * In a path to a field you can also provide a special array identifier "[]", identifier of a particular field in an array (for example "[4]") or wildcard "*".
+ *
+ * The parser of the class supports parsing of several fields. Each of them could be marked as mandatory or as optional.
+ * If a mandatory field is not found in JSON, then Parse() returns false and ConvertToTabDelimited() returns an empty string.
+ * If an optional field is not found in JSON, then it's value in Parse()/ConvertToTabDelimited() is an empty string.
+ * In particular ConvertToTabDelimited() always returns either an empty string, or a string of the same number of tab-delimited fields starting from the same Prefix.
+ *
+ * NB! Library can not extract values of not a simple type (namely it doesn't support the case when a result is a vocabulary or an array) from JSON.
+ * If you expect such a case, please check json_value.h.
+ */
+
+ class TJsonParser {
+ TString Prefix;
+
+ struct TField {
+ TVector<TPathElem> Path;
+ bool NonEmpty;
+ };
+ TVector<TField> Fields;
+
+ friend class TRewriteJsonImpl;
+
+ void ConvertToTabDelimited(IInputStream& in, IOutputStream& out) const;
+
+ public:
+ void SetPrefix(const TString& prefix) {
+ Prefix = prefix;
+ }
+ void AddField(const TString& path, bool mustExist);
+ TString ConvertToTabDelimited(const TString& json) const;
+ bool Parse(const TString& json, TVector<TString>* res) const;
+ };
+}
diff --git a/library/cpp/json/easy_parse/json_easy_parser_impl.h b/library/cpp/json/easy_parse/json_easy_parser_impl.h
new file mode 100644
index 0000000000..ec55d838b3
--- /dev/null
+++ b/library/cpp/json/easy_parse/json_easy_parser_impl.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <util/generic/string.h>
+
+namespace NJson {
+ namespace NImpl {
+ enum EType {
+ ARRAY,
+ MAP,
+ MAP_KEY
+ };
+ }
+ template <class TStringType>
+ struct TPathElemImpl {
+ NImpl::EType Type;
+ TStringType Key;
+ int ArrayCounter;
+
+ TPathElemImpl(NImpl::EType type)
+ : Type(type)
+ , ArrayCounter()
+ {
+ }
+
+ TPathElemImpl(const TStringType& key)
+ : Type(NImpl::MAP_KEY)
+ , Key(key)
+ , ArrayCounter()
+ {
+ }
+
+ TPathElemImpl(int arrayCounter)
+ : Type(NImpl::ARRAY)
+ , ArrayCounter(arrayCounter)
+ {
+ }
+ };
+
+ typedef TPathElemImpl<TString> TPathElem;
+}
diff --git a/library/cpp/json/easy_parse/ya.make b/library/cpp/json/easy_parse/ya.make
new file mode 100644
index 0000000000..2304c542f2
--- /dev/null
+++ b/library/cpp/json/easy_parse/ya.make
@@ -0,0 +1,13 @@
+OWNER(finder)
+
+LIBRARY()
+
+SRCS(
+ json_easy_parser.cpp
+)
+
+PEERDIR(
+ library/cpp/json
+)
+
+END()