diff options
author | prettyboy <prettyboy@yandex-team.com> | 2023-09-08 00:22:12 +0300 |
---|---|---|
committer | prettyboy <prettyboy@yandex-team.com> | 2023-09-08 00:46:04 +0300 |
commit | 3a6cd865171eed9b89bf536cd242285f8b583a91 (patch) | |
tree | 25e2756c125f7484fb118e0d5724212199662389 /library/python/json | |
parent | 67f3f216950849664a29035458cfaa5d12a62846 (diff) | |
download | ydb-3a6cd865171eed9b89bf536cd242285f8b583a91.tar.gz |
[build/plugins/ytest] Allow prebuilt linters for opensource
Без этого, ydb или не сможет запускать flake8 с помощью ya make.
Или к ним поедет сборка flake8.
Возможно последнее и не так плохо, но сейчас предлагается пока так
Diffstat (limited to 'library/python/json')
-rw-r--r-- | library/python/json/__init__.py | 44 | ||||
-rw-r--r-- | library/python/json/loads.cpp | 246 | ||||
-rw-r--r-- | library/python/json/loads.h | 5 | ||||
-rw-r--r-- | library/python/json/loads.pyx | 14 | ||||
-rw-r--r-- | library/python/json/ya.make | 17 |
5 files changed, 326 insertions, 0 deletions
diff --git a/library/python/json/__init__.py b/library/python/json/__init__.py new file mode 100644 index 0000000000..c6420d5e6d --- /dev/null +++ b/library/python/json/__init__.py @@ -0,0 +1,44 @@ +from library.python.json.loads import loads as _loads +from simplejson import loads as _sj_loads + + +def loads(*args, **kwargs): + try: + return _loads(*args, **kwargs) + except Exception as e: + if 'invalid syntax at token' in str(e): + kwargs.pop('intern_keys', None) + kwargs.pop('intern_vals', None) + kwargs.pop('may_unicode', None) + return _sj_loads(*args, **kwargs) + + raise + + +from simplejson import load, dump, dumps # noqa + + +def read_file(file_name, **kwargs): + """ + Read file and return its parsed json contents. + + All kwargs will be proxied to `json.load` method as is. + + :param file_name: file with json contents + :return: parsed json contents + """ + with open(file_name) as f: + return load(f, **kwargs) + + +def write_file(file_name, contents, **kwargs): + """ + Dump json data to file. + + All kwargs will be proxied to `json.dump` method as is. + + :param file_name: file to dump to + :param contents: JSON-serializable object + """ + with open(file_name, "w") as f: + dump(contents, f, **kwargs) diff --git a/library/python/json/loads.cpp b/library/python/json/loads.cpp new file mode 100644 index 0000000000..19cdb096ae --- /dev/null +++ b/library/python/json/loads.cpp @@ -0,0 +1,246 @@ +#include "loads.h" + +#include <Python.h> + +#include <library/cpp/json/fast_sax/parser.h> + +#include <util/generic/algorithm.h> +#include <util/generic/stack.h> +#include <util/generic/vector.h> +#include <util/generic/ylimits.h> +#include <util/string/ascii.h> + +using namespace NJson; + +namespace { + enum EKind { + Undefined, + Array, + Dict, + Value, + Key, + }; + + static inline TStringBuf ToStr(EKind kind) noexcept { + switch (kind) { + case Undefined: + return TStringBuf("Undefined"); + + case Array: + return TStringBuf("Array"); + + case Dict: + return TStringBuf("Dict"); + + case Value: + return TStringBuf("Value"); + + case Key: + return TStringBuf("Key"); + } + + Y_UNREACHABLE(); + } + + struct TUnref { + static inline void Destroy(PyObject* o) noexcept { + Py_XDECREF(o); + } + }; + + using TObjectPtr = TAutoPtr<PyObject, TUnref>; + + static inline TObjectPtr BuildBool(bool val) noexcept { + if (val) { + Py_RETURN_TRUE; + } + + Py_RETURN_FALSE; + } + + // Translate python exceptions from object-creating functions into c++ exceptions + // Such errors are reported by returning nullptr + // When a python error is set and C++ exception is caught by Cython wrapper, + // Python exception is propagated, while C++ exception is discarded. + PyObject* CheckNewObject(PyObject* obj) { + Y_ENSURE(obj != nullptr, "got python exception"); + return obj; + } + + void CheckRetcode(int retcode) { + Y_ENSURE(retcode == 0, "got python exception"); + } + + static inline TObjectPtr BuildSmall(long val) { +#if PY_VERSION_HEX >= 0x03000000 + return CheckNewObject(PyLong_FromLong(val)); +#else + return CheckNewObject(PyInt_FromLong(val)); +#endif + } + + PyObject* CreatePyString(TStringBuf str, bool intern, bool mayUnicode) { +#if PY_VERSION_HEX >= 0x03000000 + Y_UNUSED(mayUnicode); + PyObject* pyStr = PyUnicode_FromStringAndSize(str.data(), str.size()); + if (intern) { + PyUnicode_InternInPlace(&pyStr); + } +#else + const bool needUnicode = mayUnicode && !AllOf(str, IsAscii); + PyObject* pyStr = needUnicode ? PyUnicode_FromStringAndSize(str.data(), str.size()) + : PyString_FromStringAndSize(str.data(), str.size()); + if (intern && !needUnicode) { + PyString_InternInPlace(&pyStr); + } +#endif + return pyStr; + } + + struct TVal { + EKind Kind = Undefined; + TObjectPtr Val; + + inline TVal() noexcept + : Kind(Undefined) + { + } + + inline TVal(EKind kind, TObjectPtr val) noexcept + : Kind(kind) + , Val(val) + { + } + }; + + static inline TObjectPtr NoneRef() noexcept { + Py_RETURN_NONE; + } + + struct TContext: public TJsonCallbacks { + const bool InternKeys; + const bool InternVals; + const bool MayUnicode; + TStack<TVal, TVector<TVal>> S; + + inline TContext(bool internKeys, bool internVals, bool mayUnicode) + : TJsonCallbacks(true) + , InternKeys(internKeys) + , InternVals(internVals) + , MayUnicode(mayUnicode) + { + S.emplace(); + } + + inline bool Consume(TObjectPtr o) { + auto& t = S.top(); + + if (t.Kind == Array) { + CheckRetcode(PyList_Append(t.Val.Get(), o.Get())); + } else if (t.Kind == Key) { + auto key = S.top().Val; + + S.pop(); + + CheckRetcode(PyDict_SetItem(S.top().Val.Get(), key.Get(), o.Get())); + } else { + t = TVal(Value, o); + } + + return true; + } + + inline TObjectPtr Pop(EKind expect) { + auto res = S.top(); + + S.pop(); + + if (res.Kind != expect) { + ythrow yexception() << "unexpected kind(expect " << ToStr(expect) << ", got " << ToStr(res.Kind) << ")"; + } + + return res.Val; + } + + inline void Push(EKind kind, TObjectPtr object) { + S.push(TVal(kind, object)); + } + + virtual bool OnNull() { + return Consume(NoneRef()); + } + + virtual bool OnBoolean(bool v) { + return Consume(BuildBool(v)); + } + + virtual bool OnInteger(long long v) { + if (v >= (long long)Min<long>()) { + return Consume(BuildSmall((long)v)); + } + + return Consume(CheckNewObject(PyLong_FromLongLong(v))); + } + + virtual bool OnUInteger(unsigned long long v) { + if (v <= (unsigned long long)Max<long>()) { + return Consume(BuildSmall((long)v)); + } + + return Consume(CheckNewObject(PyLong_FromUnsignedLongLong(v))); + } + + virtual bool OnDouble(double v) { + return Consume(CheckNewObject(PyFloat_FromDouble(v))); + } + + virtual bool OnString(const TStringBuf& v) { + return Consume(CheckNewObject(CreatePyString(v, InternVals, MayUnicode))); + } + + virtual bool OnOpenMap() { + Push(Dict, CheckNewObject(PyDict_New())); + + return true; + } + + virtual bool OnCloseMap() { + return Consume(Pop(Dict)); + } + + virtual bool OnMapKey(const TStringBuf& k) { + Push(Key, CheckNewObject(CreatePyString(k, InternKeys, MayUnicode))); + return true; + } + + virtual bool OnOpenArray() { + Push(Array, CheckNewObject(PyList_New(0))); + + return true; + } + + virtual bool OnCloseArray() { + return Consume(Pop(Array)); + } + }; +} + +PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys, bool internVals, bool mayUnicode) { + TContext ctx(internKeys, internVals, mayUnicode); + + if (!len) { + ythrow yexception() << "parse error: zero length input string"; + } + + if (!NJson::ReadJsonFast(TStringBuf(data, len), &ctx)) { + ythrow yexception() << "parse error"; + } + + auto& s = ctx.S; + + if (!s || s.top().Kind != Value) { + ythrow yexception() << "shit happen"; + } + + return s.top().Val.Release(); +} diff --git a/library/python/json/loads.h b/library/python/json/loads.h new file mode 100644 index 0000000000..62dcdf6f21 --- /dev/null +++ b/library/python/json/loads.h @@ -0,0 +1,5 @@ +#pragma once + +#include <Python.h> + +PyObject* LoadJsonFromString(const char* data, size_t len, bool internKeys = false, bool internVals = false, bool mayUnicode = false); diff --git a/library/python/json/loads.pyx b/library/python/json/loads.pyx new file mode 100644 index 0000000000..82e5c6dce7 --- /dev/null +++ b/library/python/json/loads.pyx @@ -0,0 +1,14 @@ +from libcpp cimport bool + +cdef extern from "library/python/json/loads.h": + object LoadJsonFromString(const char*, size_t, bool internKeys, bool internVals, bool mayUnicode) except + + + +def loads(s, intern_keys = False, intern_vals = False, may_unicode = False): + if isinstance(s, unicode): + s = s.encode('utf-8') + + try: + return LoadJsonFromString(s, len(s), intern_keys, intern_vals, may_unicode) + except Exception as e: + raise ValueError(str(e)) diff --git a/library/python/json/ya.make b/library/python/json/ya.make new file mode 100644 index 0000000000..74a82de9d8 --- /dev/null +++ b/library/python/json/ya.make @@ -0,0 +1,17 @@ +PY23_LIBRARY() + +PEERDIR( + contrib/python/simplejson + library/cpp/json/fast_sax +) + +PY_SRCS( + __init__.py + loads.pyx +) + +SRCS( + loads.cpp +) + +END() |