diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /library/python/json/loads.cpp | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/python/json/loads.cpp')
-rw-r--r-- | library/python/json/loads.cpp | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp new file mode 100644 index 0000000000..19cdb096ae --- /dev/null +++ b/library/python/json/loads.cpp @@ -0,0 +1,246 @@ +#include "loads.h" + +#include <Python.h> + +#include <library/cpp/json/fast_sax/parser.h> + +#include <util/generic/algorithm.h> +#include <util/generic/stack.h> +#include <util/generic/vector.h> +#include <util/generic/ylimits.h> +#include <util/string/ascii.h> + +using namespace NJson; + +namespace { + enum EKind { + Undefined, + Array, + Dict, + Value, + Key, + }; + + static inline TStringBuf ToStr(EKind kind) noexcept { + switch (kind) { + case Undefined: + return TStringBuf("Undefined"); + + case Array: + return TStringBuf("Array"); + + case Dict: + return TStringBuf("Dict"); + + case Value: + return TStringBuf("Value"); + + case Key: + return TStringBuf("Key"); + } + + Y_UNREACHABLE(); + } + + struct TUnref { + static inline void Destroy(PyObject* o) noexcept { + Py_XDECREF(o); + } + }; + + using TObjectPtr = TAutoPtr<PyObject, TUnref>; + + static inline TObjectPtr BuildBool(bool val) noexcept { + if (val) { + Py_RETURN_TRUE; + } + + Py_RETURN_FALSE; + } + + // Translate python exceptions from object-creating functions into c++ exceptions + // Such errors are reported by returning nullptr + // When a python error is set and C++ exception is caught by Cython wrapper, + // Python exception is propagated, while C++ exception is discarded. + PyObject* CheckNewObject(PyObject* obj) { + Y_ENSURE(obj != nullptr, "got python exception"); + return obj; + } + + void CheckRetcode(int retcode) { + Y_ENSURE(retcode == 0, "got python exception"); + } + + static inline TObjectPtr BuildSmall(long val) { +#if PY_VERSION_HEX >= 0x03000000 + return CheckNewObject(PyLong_FromLong(val)); +#else + return CheckNewObject(PyInt_FromLong(val)); +#endif + } + + PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) { +#if PY_VERSION_HEX >= 0x03000000 + Y_UNUSED(mayUnicode); + PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size()); + if (intern) { + PyUnicode_InternInPlace(&pyStr); + } +#else + const bool needUnicode = mayUnicode && !AllOf(str, IsAscii); + PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size()) + : PyString_FromStringAndSize(str.data(), str.size()); + if (intern && !needUnicode) { + PyString_InternInPlace(&pyStr); + } +#endif + return pyStr; + } + + struct TVal { + EKind Kind = Undefined; + TObjectPtr Val; + + inline TVal() noexcept + : Kind(Undefined) + { + } + + inline TVal(EKind kind, TObjectPtr val) noexcept + : Kind(kind) + , Val(val) + { + } + }; + + static inline TObjectPtr NoneRef() noexcept { + Py_RETURN_NONE; + } + + struct TContext: public TJsonCallbacks { + const bool InternKeys; + const bool InternVals; + const bool MayUnicode; + TStack<TVal, TVector<TVal>> S; + + inline TContext(bool internKeys, bool internVals, bool mayUnicode) + : TJsonCallbacks(true) + , InternKeys(internKeys) + , InternVals(internVals) + , MayUnicode(mayUnicode) + { + S.emplace(); + } + + inline bool Consume(TObjectPtr o) { + auto& t = S.top(); + + if (t.Kind == Array) { + CheckRetcode(PyList_Append(t.Val.Get(), o.Get())); + } else if (t.Kind == Key) { + auto key = S.top().Val; + + S.pop(); + + CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get())); + } else { + t = TVal(Value, o); + } + + return true; + } + + inline TObjectPtr Pop(EKind expect) { + auto res = S.top(); + + S.pop(); + + if (res.Kind != expect) { + ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")"; + } + + return res.Val; + } + + inline void Push(EKind kind, TObjectPtr object) { + S.push(TVal(kind, object)); + } + + virtual bool OnNull() { + return Consume(NoneRef()); + } + + virtual bool OnBoolean(bool v) { + return Consume(BuildBool(v)); + } + + virtual bool OnInteger(long long v) { + if (v >= (long long)Min<long>()) { + return Consume(BuildSmall((long)v)); + } + + return Consume(CheckNewObject(PyLong_FromLongLong(v))); + } + + virtual bool OnUInteger(unsigned long long v) { + if (v <= (unsigned long long)Max<long>()) { + return Consume(BuildSmall((long)v)); + } + + return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v))); + } + + virtual bool OnDouble(double v) { + return Consume(CheckNewObject(PyFloat_FromDouble(v))); + } + + virtual bool OnString(const TStringBuf& v) { + return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode))); + } + + virtual bool OnOpenMap() { + Push(Dict, CheckNewObject(PyDict_New())); + + return true; + } + + virtual bool OnCloseMap() { + return Consume(Pop(Dict)); + } + + virtual bool OnMapKey(const TStringBuf& k) { + Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode))); + return true; + } + + virtual bool OnOpenArray() { + Push(Array, CheckNewObject(PyList_New(0))); + + return true; + } + + virtual bool OnCloseArray() { + return Consume(Pop(Array)); + } + }; +} + +PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) { + TContext ctx(internKeys, internVals, mayUnicode); + + if (!len) { + ythrow yexception() << "parse error: zero length input string"; + } + + if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) { + ythrow yexception() << "parse error"; + } + + auto& s = ctx.S; + + if (!s || s.top().Kind != Value) { + ythrow yexception() << "shit happen"; + } + + return s.top().Val.Release(); +} |