aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/json/fast_sax
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json/fast_sax
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json/fast_sax')
-rw-r--r--library/cpp/json/fast_sax/parser.h13
-rw-r--r--library/cpp/json/fast_sax/parser.rl6314
-rw-r--r--library/cpp/json/fast_sax/unescape.cpp7
-rw-r--r--library/cpp/json/fast_sax/unescape.h5
-rw-r--r--library/cpp/json/fast_sax/ya.make17
5 files changed, 356 insertions, 0 deletions
diff --git a/library/cpp/json/fast_sax/parser.h b/library/cpp/json/fast_sax/parser.h
new file mode 100644
index 00000000000..b5f031dd9eb
--- /dev/null
+++ b/library/cpp/json/fast_sax/parser.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <library/cpp/json/common/defs.h>
+
+namespace NJson {
+ bool ReadJsonFast(TStringBuf in, TJsonCallbacks* callbacks);
+
+ inline bool ValidateJsonFast(TStringBuf in, bool throwOnError = false) {
+ Y_ASSERT(false); // this method is broken, see details in IGNIETFERRO-1243. Use NJson::ValidateJson instead, or fix this one before using
+ TJsonCallbacks c(throwOnError);
+ return ReadJsonFast(in, &c);
+ }
+}
diff --git a/library/cpp/json/fast_sax/parser.rl6 b/library/cpp/json/fast_sax/parser.rl6
new file mode 100644
index 00000000000..edb4e9ee1b3
--- /dev/null
+++ b/library/cpp/json/fast_sax/parser.rl6
@@ -0,0 +1,314 @@
+#include <library/cpp/json/fast_sax/unescape.h>
+#include <library/cpp/json/fast_sax/parser.h>
+
+#include <util/string/cast.h>
+#include <util/generic/buffer.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/ymath.h>
+
+namespace NJson {
+
+enum EStoredStr {
+ SS_NONE = 0, SS_NOCOPY, SS_MUSTCOPY
+};
+
+struct TParserCtx {
+ TJsonCallbacks& Hndl;
+
+ TBuffer Buffer;
+ TStringBuf String;
+ EStoredStr Stored = SS_NONE;
+ bool ExpectValue = true;
+
+ const char* p0 = nullptr;
+ const char* p = nullptr;
+ const char* pe = nullptr;
+ const char* eof = nullptr;
+ const char* ts = nullptr;
+ const char* te = nullptr;
+ int cs = 0;
+ int act = 0;
+
+ TParserCtx(TJsonCallbacks& h, TStringBuf data)
+ : Hndl(h)
+ , p0(data.data())
+ , p(data.data())
+ , pe(data.end())
+ , eof(data.end())
+ {}
+
+ static inline bool GoodPtrs(const char* b, const char* e) {
+ return b && e && b <= e;
+ }
+
+ bool OnError(TStringBuf reason = TStringBuf(""), bool end = false) const {
+ size_t off = 0;
+ TStringBuf token;
+
+ if (GoodPtrs(p0, ts)) {
+ off = ts - p0;
+ } else if (end && GoodPtrs(p0, pe)) {
+ off = pe - p0;
+ }
+
+ if (GoodPtrs(ts, te)) {
+ token = TStringBuf(ts, te);
+ }
+
+ if (!token) {
+ Hndl.OnError(off, reason);
+ } else {
+ Hndl.OnError(off, TString::Join(reason, " at token: '", token, "'"));
+ }
+
+ return false;
+ }
+
+ bool OnVal() {
+ if (Y_UNLIKELY(!ExpectValue)) {
+ return false;
+ }
+ ExpectValue = false;
+ return true;
+ }
+
+ bool OnNull() {
+ return Y_LIKELY(OnVal())
+ && Hndl.OnNull();
+ }
+
+ bool OnTrue() {
+ return Y_LIKELY(OnVal())
+ && Hndl.OnBoolean(true);
+ }
+
+ bool OnFalse() {
+ return Y_LIKELY(OnVal())
+ && Hndl.OnBoolean(false);
+ }
+
+ bool OnPInt() {
+ unsigned long long res = 0;
+ return Y_LIKELY(OnVal())
+ && TryFromString<unsigned long long>(TStringBuf(ts, te), res)
+ && Hndl.OnUInteger(res);
+ }
+
+ bool OnNInt() {
+ long long res = 0;
+ return Y_LIKELY(OnVal())
+ && TryFromString<long long>(TStringBuf(ts, te), res)
+ && Hndl.OnInteger(res);
+ }
+
+ bool OnFlt() {
+ double res = 0;
+ return Y_LIKELY(OnVal())
+ && TryFromString<double>(TStringBuf(ts, te), res)
+ && IsFinite(res)
+ && Hndl.OnDouble(res);
+ }
+
+ bool OnMapOpen() {
+ bool res = Y_LIKELY(OnVal())
+ && Hndl.OnOpenMap();
+ ExpectValue = true;
+ return res;
+ }
+
+ bool OnArrOpen() {
+ bool res = Y_LIKELY(OnVal())
+ && Hndl.OnOpenArray();
+ ExpectValue = true;
+ return res;
+ }
+
+ bool OnString(TStringBuf s, EStoredStr t) {
+ if (Y_LIKELY(OnVal())) {
+ String = s;
+ Stored = t;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool OnStrU() {
+ return OnString(TStringBuf(ts, te), SS_NOCOPY);
+ }
+
+ bool OnStrQ() {
+ return OnString(TStringBuf(ts + 1, te - 1), SS_NOCOPY);
+ }
+
+ bool OnStrE() {
+ Buffer.Clear();
+ Buffer.Reserve(2 * (te - ts));
+
+ return OnString(UnescapeJsonUnicode(TStringBuf(ts + 1, te - ts - 2), Buffer.data()), SS_MUSTCOPY);
+ }
+
+ bool OnMapClose() {
+ ExpectValue = false;
+ return Y_LIKELY(OnAfterVal())
+ && Hndl.OnCloseMap();
+ }
+
+ bool OnArrClose() {
+ ExpectValue = false;
+ return Y_LIKELY(OnAfterVal())
+ && Hndl.OnCloseArray();
+ }
+
+ bool OnColon() {
+ if (ExpectValue) {
+ return false;
+ }
+
+ ExpectValue = true;
+ const auto stored = Stored;
+ Stored = SS_NONE;
+
+ switch (stored) {
+ default:
+ return false;
+ case SS_NOCOPY:
+ return Hndl.OnMapKeyNoCopy(String);
+ case SS_MUSTCOPY:
+ return Hndl.OnMapKey(String);
+ }
+ }
+
+ bool OnAfterVal() {
+ const auto stored = Stored;
+ Stored = SS_NONE;
+
+ switch (stored) {
+ default:
+ return true;
+ case SS_NOCOPY:
+ return Hndl.OnStringNoCopy(String);
+ case SS_MUSTCOPY:
+ return Hndl.OnString(String);
+ }
+ }
+
+ bool OnComma() {
+ if (Y_UNLIKELY(ExpectValue)) {
+ return false;
+ }
+ ExpectValue = true;
+ return OnAfterVal();
+ }
+
+ bool Parse();
+};
+
+#if 0
+%%{
+machine fastjson;
+
+alphtype char;
+
+action OnNull { if (Y_UNLIKELY(!OnNull())) goto TOKEN_ERROR; }
+action OnTrue { if (Y_UNLIKELY(!OnTrue())) goto TOKEN_ERROR; }
+action OnFalse { if (Y_UNLIKELY(!OnFalse())) goto TOKEN_ERROR; }
+action OnPInt { if (Y_UNLIKELY(!OnPInt())) goto TOKEN_ERROR; }
+action OnNInt { if (Y_UNLIKELY(!OnNInt())) goto TOKEN_ERROR; }
+action OnFlt { if (Y_UNLIKELY(!OnFlt())) goto TOKEN_ERROR; }
+action OnStrU { if (Y_UNLIKELY(!OnStrU())) goto TOKEN_ERROR; }
+action OnStrQ { if (Y_UNLIKELY(!OnStrQ())) goto TOKEN_ERROR; }
+action OnStrE { if (Y_UNLIKELY(!OnStrE())) goto TOKEN_ERROR; }
+action OnDictO { if (Y_UNLIKELY(!OnMapOpen())) goto TOKEN_ERROR; }
+action OnDictC { if (Y_UNLIKELY(!OnMapClose())) goto TOKEN_ERROR; }
+action OnArrO { if (Y_UNLIKELY(!OnArrOpen())) goto TOKEN_ERROR; }
+action OnArrC { if (Y_UNLIKELY(!OnArrClose())) goto TOKEN_ERROR; }
+action OnComma { if (Y_UNLIKELY(!OnComma())) goto TOKEN_ERROR; }
+action OnColon { if (Y_UNLIKELY(!OnColon())) goto TOKEN_ERROR; }
+action OnError { goto TOKEN_ERROR; }
+
+comment1 = "/*" (any* -- "*/") "*/";
+
+pint = [0-9]+;
+nint = '-'[0-9]+;
+flt = '-'?[0-9.][0-9.eE+\-]+;
+
+uchar0 = [a-zA-Z_@$] | (0x80 .. 0xFF);
+uchar = uchar0 | digit | [.\-];
+
+qchar = [^'\\]; #';
+dchar = [^"\\]; #";
+
+echar = "\\" any;
+
+qechar = qchar | echar;
+dechar = dchar | echar;
+
+strq = "'" qchar* "'";
+strd = '"' dchar* '"';
+
+strqe = "'" qechar* "'";
+strde = '"' dechar* '"';
+
+strU = uchar0 uchar*;
+strQ = strq | strd;
+strE = strqe | strde;
+
+ws = (0x00 .. 0x20) | 0x7F;
+sp = ws+;
+
+main := |*
+ 'null' => OnNull;
+ 'true' => OnTrue;
+ 'false' => OnFalse;
+
+ pint => OnPInt;
+ nint => OnNInt;
+ flt => OnFlt;
+
+ strU => OnStrU;
+ strQ => OnStrQ;
+ strE => OnStrE;
+
+ ',' => OnComma;
+ ':' => OnColon;
+
+ '{' => OnDictO;
+ '}' => OnDictC;
+ '[' => OnArrO;
+ ']' => OnArrC;
+
+ sp;
+ comment1;
+
+ (flt | pint | nint) (any - (ws | ',' | ':' | '{' | '}' | '[' | ']')) => OnError;
+
+ any => OnError;
+ *|;
+}%%
+#endif
+
+bool TParserCtx::Parse() {
+ try {
+ %%{
+ write data noerror nofinal;
+ write init;
+ write exec;
+ }%%
+ ;
+ Y_UNUSED(fastjson_en_main);
+ } catch (const TFromStringException& e) {
+ return OnError(e.what());
+ }
+
+ return OnAfterVal() && Hndl.OnEnd() || OnError("invalid or truncated", true);
+
+ TOKEN_ERROR:
+ return OnError("invalid syntax");
+}
+
+bool ReadJsonFast(TStringBuf data, TJsonCallbacks* h) {
+ return TParserCtx(*h, data).Parse();
+}
+
+}
diff --git a/library/cpp/json/fast_sax/unescape.cpp b/library/cpp/json/fast_sax/unescape.cpp
new file mode 100644
index 00000000000..72109b0b5e6
--- /dev/null
+++ b/library/cpp/json/fast_sax/unescape.cpp
@@ -0,0 +1,7 @@
+#include "unescape.h"
+
+#include <util/string/escape.h>
+
+TStringBuf UnescapeJsonUnicode(TStringBuf data, char* scratch) {
+ return TStringBuf(scratch, UnescapeC(data.data(), data.size(), scratch));
+}
diff --git a/library/cpp/json/fast_sax/unescape.h b/library/cpp/json/fast_sax/unescape.h
new file mode 100644
index 00000000000..5e40e1e8660
--- /dev/null
+++ b/library/cpp/json/fast_sax/unescape.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+TStringBuf UnescapeJsonUnicode(TStringBuf data, char* scratch);
diff --git a/library/cpp/json/fast_sax/ya.make b/library/cpp/json/fast_sax/ya.make
new file mode 100644
index 00000000000..c6447ab6acc
--- /dev/null
+++ b/library/cpp/json/fast_sax/ya.make
@@ -0,0 +1,17 @@
+LIBRARY()
+
+OWNER(
+ pg
+ velavokr
+)
+
+PEERDIR(
+ library/cpp/json/common
+)
+
+SRCS(
+ parser.rl6
+ unescape.cpp
+)
+
+END()