diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/json/fast_sax | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/json/fast_sax')
-rw-r--r-- | library/cpp/json/fast_sax/parser.h | 13 | ||||
-rw-r--r-- | library/cpp/json/fast_sax/parser.rl6 | 314 | ||||
-rw-r--r-- | library/cpp/json/fast_sax/unescape.cpp | 7 | ||||
-rw-r--r-- | library/cpp/json/fast_sax/unescape.h | 5 | ||||
-rw-r--r-- | library/cpp/json/fast_sax/ya.make | 17 |
5 files changed, 356 insertions, 0 deletions
diff --git a/library/cpp/json/fast_sax/parser.h b/library/cpp/json/fast_sax/parser.h new file mode 100644 index 00000000000..b5f031dd9eb --- /dev/null +++ b/library/cpp/json/fast_sax/parser.h @@ -0,0 +1,13 @@ +#pragma once + +#include <library/cpp/json/common/defs.h> + +namespace NJson { + bool ReadJsonFast(TStringBuf in, TJsonCallbacks* callbacks); + + inline bool ValidateJsonFast(TStringBuf in, bool throwOnError = false) { + Y_ASSERT(false); // this method is broken, see details in IGNIETFERRO-1243. Use NJson::ValidateJson instead, or fix this one before using + TJsonCallbacks c(throwOnError); + return ReadJsonFast(in, &c); + } +} diff --git a/library/cpp/json/fast_sax/parser.rl6 b/library/cpp/json/fast_sax/parser.rl6 new file mode 100644 index 00000000000..edb4e9ee1b3 --- /dev/null +++ b/library/cpp/json/fast_sax/parser.rl6 @@ -0,0 +1,314 @@ +#include <library/cpp/json/fast_sax/unescape.h> +#include <library/cpp/json/fast_sax/parser.h> + +#include <util/string/cast.h> +#include <util/generic/buffer.h> +#include <util/generic/strbuf.h> +#include <util/generic/ymath.h> + +namespace NJson { + +enum EStoredStr { + SS_NONE = 0, SS_NOCOPY, SS_MUSTCOPY +}; + +struct TParserCtx { + TJsonCallbacks& Hndl; + + TBuffer Buffer; + TStringBuf String; + EStoredStr Stored = SS_NONE; + bool ExpectValue = true; + + const char* p0 = nullptr; + const char* p = nullptr; + const char* pe = nullptr; + const char* eof = nullptr; + const char* ts = nullptr; + const char* te = nullptr; + int cs = 0; + int act = 0; + + TParserCtx(TJsonCallbacks& h, TStringBuf data) + : Hndl(h) + , p0(data.data()) + , p(data.data()) + , pe(data.end()) + , eof(data.end()) + {} + + static inline bool GoodPtrs(const char* b, const char* e) { + return b && e && b <= e; + } + + bool OnError(TStringBuf reason = TStringBuf(""), bool end = false) const { + size_t off = 0; + TStringBuf token; + + if (GoodPtrs(p0, ts)) { + off = ts - p0; + } else if (end && GoodPtrs(p0, pe)) { + off = pe - p0; + } + + if (GoodPtrs(ts, te)) { + token = TStringBuf(ts, te); + } + + if (!token) { + Hndl.OnError(off, reason); + } else { + Hndl.OnError(off, TString::Join(reason, " at token: '", token, "'")); + } + + return false; + } + + bool OnVal() { + if (Y_UNLIKELY(!ExpectValue)) { + return false; + } + ExpectValue = false; + return true; + } + + bool OnNull() { + return Y_LIKELY(OnVal()) + && Hndl.OnNull(); + } + + bool OnTrue() { + return Y_LIKELY(OnVal()) + && Hndl.OnBoolean(true); + } + + bool OnFalse() { + return Y_LIKELY(OnVal()) + && Hndl.OnBoolean(false); + } + + bool OnPInt() { + unsigned long long res = 0; + return Y_LIKELY(OnVal()) + && TryFromString<unsigned long long>(TStringBuf(ts, te), res) + && Hndl.OnUInteger(res); + } + + bool OnNInt() { + long long res = 0; + return Y_LIKELY(OnVal()) + && TryFromString<long long>(TStringBuf(ts, te), res) + && Hndl.OnInteger(res); + } + + bool OnFlt() { + double res = 0; + return Y_LIKELY(OnVal()) + && TryFromString<double>(TStringBuf(ts, te), res) + && IsFinite(res) + && Hndl.OnDouble(res); + } + + bool OnMapOpen() { + bool res = Y_LIKELY(OnVal()) + && Hndl.OnOpenMap(); + ExpectValue = true; + return res; + } + + bool OnArrOpen() { + bool res = Y_LIKELY(OnVal()) + && Hndl.OnOpenArray(); + ExpectValue = true; + return res; + } + + bool OnString(TStringBuf s, EStoredStr t) { + if (Y_LIKELY(OnVal())) { + String = s; + Stored = t; + return true; + } else { + return false; + } + } + + bool OnStrU() { + return OnString(TStringBuf(ts, te), SS_NOCOPY); + } + + bool OnStrQ() { + return OnString(TStringBuf(ts + 1, te - 1), SS_NOCOPY); + } + + bool OnStrE() { + Buffer.Clear(); + Buffer.Reserve(2 * (te - ts)); + + return OnString(UnescapeJsonUnicode(TStringBuf(ts + 1, te - ts - 2), Buffer.data()), SS_MUSTCOPY); + } + + bool OnMapClose() { + ExpectValue = false; + return Y_LIKELY(OnAfterVal()) + && Hndl.OnCloseMap(); + } + + bool OnArrClose() { + ExpectValue = false; + return Y_LIKELY(OnAfterVal()) + && Hndl.OnCloseArray(); + } + + bool OnColon() { + if (ExpectValue) { + return false; + } + + ExpectValue = true; + const auto stored = Stored; + Stored = SS_NONE; + + switch (stored) { + default: + return false; + case SS_NOCOPY: + return Hndl.OnMapKeyNoCopy(String); + case SS_MUSTCOPY: + return Hndl.OnMapKey(String); + } + } + + bool OnAfterVal() { + const auto stored = Stored; + Stored = SS_NONE; + + switch (stored) { + default: + return true; + case SS_NOCOPY: + return Hndl.OnStringNoCopy(String); + case SS_MUSTCOPY: + return Hndl.OnString(String); + } + } + + bool OnComma() { + if (Y_UNLIKELY(ExpectValue)) { + return false; + } + ExpectValue = true; + return OnAfterVal(); + } + + bool Parse(); +}; + +#if 0 +%%{ +machine fastjson; + +alphtype char; + +action OnNull { if (Y_UNLIKELY(!OnNull())) goto TOKEN_ERROR; } +action OnTrue { if (Y_UNLIKELY(!OnTrue())) goto TOKEN_ERROR; } +action OnFalse { if (Y_UNLIKELY(!OnFalse())) goto TOKEN_ERROR; } +action OnPInt { if (Y_UNLIKELY(!OnPInt())) goto TOKEN_ERROR; } +action OnNInt { if (Y_UNLIKELY(!OnNInt())) goto TOKEN_ERROR; } +action OnFlt { if (Y_UNLIKELY(!OnFlt())) goto TOKEN_ERROR; } +action OnStrU { if (Y_UNLIKELY(!OnStrU())) goto TOKEN_ERROR; } +action OnStrQ { if (Y_UNLIKELY(!OnStrQ())) goto TOKEN_ERROR; } +action OnStrE { if (Y_UNLIKELY(!OnStrE())) goto TOKEN_ERROR; } +action OnDictO { if (Y_UNLIKELY(!OnMapOpen())) goto TOKEN_ERROR; } +action OnDictC { if (Y_UNLIKELY(!OnMapClose())) goto TOKEN_ERROR; } +action OnArrO { if (Y_UNLIKELY(!OnArrOpen())) goto TOKEN_ERROR; } +action OnArrC { if (Y_UNLIKELY(!OnArrClose())) goto TOKEN_ERROR; } +action OnComma { if (Y_UNLIKELY(!OnComma())) goto TOKEN_ERROR; } +action OnColon { if (Y_UNLIKELY(!OnColon())) goto TOKEN_ERROR; } +action OnError { goto TOKEN_ERROR; } + +comment1 = "/*" (any* -- "*/") "*/"; + +pint = [0-9]+; +nint = '-'[0-9]+; +flt = '-'?[0-9.][0-9.eE+\-]+; + +uchar0 = [a-zA-Z_@$] | (0x80 .. 0xFF); +uchar = uchar0 | digit | [.\-]; + +qchar = [^'\\]; #'; +dchar = [^"\\]; #"; + +echar = "\\" any; + +qechar = qchar | echar; +dechar = dchar | echar; + +strq = "'" qchar* "'"; +strd = '"' dchar* '"'; + +strqe = "'" qechar* "'"; +strde = '"' dechar* '"'; + +strU = uchar0 uchar*; +strQ = strq | strd; +strE = strqe | strde; + +ws = (0x00 .. 0x20) | 0x7F; +sp = ws+; + +main := |* + 'null' => OnNull; + 'true' => OnTrue; + 'false' => OnFalse; + + pint => OnPInt; + nint => OnNInt; + flt => OnFlt; + + strU => OnStrU; + strQ => OnStrQ; + strE => OnStrE; + + ',' => OnComma; + ':' => OnColon; + + '{' => OnDictO; + '}' => OnDictC; + '[' => OnArrO; + ']' => OnArrC; + + sp; + comment1; + + (flt | pint | nint) (any - (ws | ',' | ':' | '{' | '}' | '[' | ']')) => OnError; + + any => OnError; + *|; +}%% +#endif + +bool TParserCtx::Parse() { + try { + %%{ + write data noerror nofinal; + write init; + write exec; + }%% + ; + Y_UNUSED(fastjson_en_main); + } catch (const TFromStringException& e) { + return OnError(e.what()); + } + + return OnAfterVal() && Hndl.OnEnd() || OnError("invalid or truncated", true); + + TOKEN_ERROR: + return OnError("invalid syntax"); +} + +bool ReadJsonFast(TStringBuf data, TJsonCallbacks* h) { + return TParserCtx(*h, data).Parse(); +} + +} diff --git a/library/cpp/json/fast_sax/unescape.cpp b/library/cpp/json/fast_sax/unescape.cpp new file mode 100644 index 00000000000..72109b0b5e6 --- /dev/null +++ b/library/cpp/json/fast_sax/unescape.cpp @@ -0,0 +1,7 @@ +#include "unescape.h" + +#include <util/string/escape.h> + +TStringBuf UnescapeJsonUnicode(TStringBuf data, char* scratch) { + return TStringBuf(scratch, UnescapeC(data.data(), data.size(), scratch)); +} diff --git a/library/cpp/json/fast_sax/unescape.h b/library/cpp/json/fast_sax/unescape.h new file mode 100644 index 00000000000..5e40e1e8660 --- /dev/null +++ b/library/cpp/json/fast_sax/unescape.h @@ -0,0 +1,5 @@ +#pragma once + +#include <util/generic/strbuf.h> + +TStringBuf UnescapeJsonUnicode(TStringBuf data, char* scratch); diff --git a/library/cpp/json/fast_sax/ya.make b/library/cpp/json/fast_sax/ya.make new file mode 100644 index 00000000000..c6447ab6acc --- /dev/null +++ b/library/cpp/json/fast_sax/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +OWNER( + pg + velavokr +) + +PEERDIR( + library/cpp/json/common +) + +SRCS( + parser.rl6 + unescape.cpp +) + +END() |