aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/json/loads.cpp
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-30 13:26:22 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-30 15:44:45 +0300
commit0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch)
tree291d72dbd7e9865399f668c84d11ed86fb190bbf /library/python/json/loads.cpp
parentcb2c8d75065e5b3c47094067cb4aa407d4813298 (diff)
downloadydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/python/json/loads.cpp')
-rw-r--r--library/python/json/loads.cpp246
1 files changed, 246 insertions, 0 deletions
diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp
new file mode 100644
index 0000000000..19cdb096ae
--- /dev/null
+++ b/library/python/json/loads.cpp
@@ -0,0 +1,246 @@
+#include "loads.h"
+
+#include <Python.h>
+
+#include <library/cpp/json/fast_sax/parser.h>
+
+#include <util/generic/algorithm.h>
+#include <util/generic/stack.h>
+#include <util/generic/vector.h>
+#include <util/generic/ylimits.h>
+#include <util/string/ascii.h>
+
+using namespace NJson;
+
+namespace {
+ enum EKind {
+ Undefined,
+ Array,
+ Dict,
+ Value,
+ Key,
+ };
+
+ static inline TStringBuf ToStr(EKind kind) noexcept {
+ switch (kind) {
+ case Undefined:
+ return TStringBuf("Undefined");
+
+ case Array:
+ return TStringBuf("Array");
+
+ case Dict:
+ return TStringBuf("Dict");
+
+ case Value:
+ return TStringBuf("Value");
+
+ case Key:
+ return TStringBuf("Key");
+ }
+
+ Y_UNREACHABLE();
+ }
+
+ struct TUnref {
+ static inline void Destroy(PyObject* o) noexcept {
+ Py_XDECREF(o);
+ }
+ };
+
+ using TObjectPtr = TAutoPtr<PyObject, TUnref>;
+
+ static inline TObjectPtr BuildBool(bool val) noexcept {
+ if (val) {
+ Py_RETURN_TRUE;
+ }
+
+ Py_RETURN_FALSE;
+ }
+
+ // Translate python exceptions from object-creating functions into c++ exceptions
+ // Such errors are reported by returning nullptr
+ // When a python error is set and C++ exception is caught by Cython wrapper,
+ // Python exception is propagated, while C++ exception is discarded.
+ PyObject* CheckNewObject(PyObject* obj) {
+ Y_ENSURE(obj != nullptr, "got python exception");
+ return obj;
+ }
+
+ void CheckRetcode(int retcode) {
+ Y_ENSURE(retcode == 0, "got python exception");
+ }
+
+ static inline TObjectPtr BuildSmall(long val) {
+#if PY_VERSION_HEX >= 0x03000000
+ return CheckNewObject(PyLong_FromLong(val));
+#else
+ return CheckNewObject(PyInt_FromLong(val));
+#endif
+ }
+
+ PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) {
+#if PY_VERSION_HEX >= 0x03000000
+ Y_UNUSED(mayUnicode);
+ PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size());
+ if (intern) {
+ PyUnicode_InternInPlace(&pyStr);
+ }
+#else
+ const bool needUnicode = mayUnicode && !AllOf(str, IsAscii);
+ PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size())
+ : PyString_FromStringAndSize(str.data(), str.size());
+ if (intern && !needUnicode) {
+ PyString_InternInPlace(&pyStr);
+ }
+#endif
+ return pyStr;
+ }
+
+ struct TVal {
+ EKind Kind = Undefined;
+ TObjectPtr Val;
+
+ inline TVal() noexcept
+ : Kind(Undefined)
+ {
+ }
+
+ inline TVal(EKind kind, TObjectPtr val) noexcept
+ : Kind(kind)
+ , Val(val)
+ {
+ }
+ };
+
+ static inline TObjectPtr NoneRef() noexcept {
+ Py_RETURN_NONE;
+ }
+
+ struct TContext: public TJsonCallbacks {
+ const bool InternKeys;
+ const bool InternVals;
+ const bool MayUnicode;
+ TStack<TVal, TVector<TVal>> S;
+
+ inline TContext(bool internKeys, bool internVals, bool mayUnicode)
+ : TJsonCallbacks(true)
+ , InternKeys(internKeys)
+ , InternVals(internVals)
+ , MayUnicode(mayUnicode)
+ {
+ S.emplace();
+ }
+
+ inline bool Consume(TObjectPtr o) {
+ auto& t = S.top();
+
+ if (t.Kind == Array) {
+ CheckRetcode(PyList_Append(t.Val.Get(), o.Get()));
+ } else if (t.Kind == Key) {
+ auto key = S.top().Val;
+
+ S.pop();
+
+ CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get()));
+ } else {
+ t = TVal(Value, o);
+ }
+
+ return true;
+ }
+
+ inline TObjectPtr Pop(EKind expect) {
+ auto res = S.top();
+
+ S.pop();
+
+ if (res.Kind != expect) {
+ ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")";
+ }
+
+ return res.Val;
+ }
+
+ inline void Push(EKind kind, TObjectPtr object) {
+ S.push(TVal(kind, object));
+ }
+
+ virtual bool OnNull() {
+ return Consume(NoneRef());
+ }
+
+ virtual bool OnBoolean(bool v) {
+ return Consume(BuildBool(v));
+ }
+
+ virtual bool OnInteger(long long v) {
+ if (v >= (long long)Min<long>()) {
+ return Consume(BuildSmall((long)v));
+ }
+
+ return Consume(CheckNewObject(PyLong_FromLongLong(v)));
+ }
+
+ virtual bool OnUInteger(unsigned long long v) {
+ if (v <= (unsigned long long)Max<long>()) {
+ return Consume(BuildSmall((long)v));
+ }
+
+ return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v)));
+ }
+
+ virtual bool OnDouble(double v) {
+ return Consume(CheckNewObject(PyFloat_FromDouble(v)));
+ }
+
+ virtual bool OnString(const TStringBuf& v) {
+ return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode)));
+ }
+
+ virtual bool OnOpenMap() {
+ Push(Dict, CheckNewObject(PyDict_New()));
+
+ return true;
+ }
+
+ virtual bool OnCloseMap() {
+ return Consume(Pop(Dict));
+ }
+
+ virtual bool OnMapKey(const TStringBuf& k) {
+ Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode)));
+ return true;
+ }
+
+ virtual bool OnOpenArray() {
+ Push(Array, CheckNewObject(PyList_New(0)));
+
+ return true;
+ }
+
+ virtual bool OnCloseArray() {
+ return Consume(Pop(Array));
+ }
+ };
+}
+
+PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) {
+ TContext ctx(internKeys, internVals, mayUnicode);
+
+ if (!len) {
+ ythrow yexception() << "parse error: zero length input string";
+ }
+
+ if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) {
+ ythrow yexception() << "parse error";
+ }
+
+ auto& s = ctx.S;
+
+ if (!s || s.top().Kind != Value) {
+ ythrow yexception() << "shit happen";
+ }
+
+ return s.top().Val.Release();
+}