diff options
author | imunkin <imunkin@yandex-team.com> | 2024-11-08 10:00:23 +0300 |
---|---|---|
committer | imunkin <imunkin@yandex-team.com> | 2024-11-08 10:12:13 +0300 |
commit | a784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch) | |
tree | 05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs/common/python | |
parent | d70137a7b530ccaa52834274913bbb5a3d1ca06e (diff) | |
download | ydb-a784a2f943d6e15caa6241e2e96d80aac6dbf375.tar.gz |
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories:
* clickhouse/client
* datetime
* knn
* roaring
commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs/common/python')
98 files changed, 11693 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.c b/yql/essentials/udfs/common/python/bindings/py27_backports.c new file mode 100644 index 0000000000..cf21a97cef --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.c @@ -0,0 +1,91 @@ +#include "py27_backports.h" + + +// Provide implementations from python 2.7.15 as backports + +int +_PySlice_Unpack(PyObject *_r, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) +{ + PySliceObject *r = (PySliceObject *)_r; + /* this is harder to get right than you might think */ + + assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX); + + if (r->step == Py_None) { + *step = 1; + } + else { + if (!_PyEval_SliceIndex(r->step, step)) return -1; + if (*step == 0) { + PyErr_SetString(PyExc_ValueError, + "slice step cannot be zero"); + return -1; + } + /* Here *step might be -PY_SSIZE_T_MAX-1; in this case we replace it + * with -PY_SSIZE_T_MAX. This doesn't affect the semantics, and it + * guards against later undefined behaviour resulting from code that + * does "step = -step" as part of a slice reversal. + */ + if (*step < -PY_SSIZE_T_MAX) + *step = -PY_SSIZE_T_MAX; + } + + if (r->start == Py_None) { + *start = *step < 0 ? PY_SSIZE_T_MAX : 0; + } + else { + if (!_PyEval_SliceIndex(r->start, start)) return -1; + } + + if (r->stop == Py_None) { + *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX; + } + else { + if (!_PyEval_SliceIndex(r->stop, stop)) return -1; + } + + return 0; +} + +Py_ssize_t +_PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step) +{ + /* this is harder to get right than you might think */ + + assert(step != 0); + assert(step >= -PY_SSIZE_T_MAX); + + if (*start < 0) { + *start += length; + if (*start < 0) { + *start = (step < 0) ? -1 : 0; + } + } + else if (*start >= length) { + *start = (step < 0) ? length - 1 : length; + } + + if (*stop < 0) { + *stop += length; + if (*stop < 0) { + *stop = (step < 0) ? -1 : 0; + } + } + else if (*stop >= length) { + *stop = (step < 0) ? length - 1 : length; + } + + if (step < 0) { + if (*stop < *start) { + return (*start - *stop - 1) / (-step) + 1; + } + } + else { + if (*start < *stop) { + return (*stop - *start - 1) / step + 1; + } + } + return 0; +} diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.h b/yql/essentials/udfs/common/python/bindings/py27_backports.h new file mode 100644 index 0000000000..766af6a76f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.h @@ -0,0 +1,26 @@ +#pragma once + +#include "Python.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Declare functions which are to be backported +// (see details about need for backports in ya.make) + +int _PySlice_Unpack(PyObject *slice, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); + +Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t step); + +// Declare py23 compatible names + +#define PySlice_Unpack _PySlice_Unpack +#define PySlice_AdjustIndices _PySlice_AdjustIndices + +#ifdef __cplusplus +} +#endif diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.cpp b/yql/essentials/udfs/common/python/bindings/py_callable.cpp new file mode 100644 index 0000000000..c60403bdca --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_callable.cpp @@ -0,0 +1,423 @@ +#include "py_callable.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_stream.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { +namespace { + +////////////////////////////////////////////////////////////////////////////// +// TPyCallableObject +////////////////////////////////////////////////////////////////////////////// +struct TPyCallableObject +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* Type; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + NUdf::TCallableTypeInspector Inspector; + + TPyCallableObject(const TPyCastContext::TPtr& castCtx, const NUdf::TType* type) + : CastCtx(castCtx) + , Type(type) + , Inspector(*castCtx->PyCtx->TypeInfoHelper, type) + {} +}; + +inline TPyCallableObject* CastToCallable(PyObject* o) +{ + return reinterpret_cast<TPyCallableObject*>(o); +} + +void CallableDealloc(PyObject* self) +{ + delete CastToCallable(self); +} + +PyObject* CallableRepr(PyObject*) +{ + // TODO: print callable signature + return PyRepr("<yql.TCallable>").Release(); +} + +PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs) +{ + Y_UNUSED(kwargs); + + PY_TRY { + TPyCallableObject* callable = CastToCallable(self); + auto callableType = callable->Type; + auto valueBuilder = callable->CastCtx->ValueBuilder; + const auto& inspector = callable->Inspector; + + TSmallVec<NUdf::TUnboxedValue> cArgs; + cArgs.resize(inspector.GetArgsCount()); + FromPyArgs(callable->CastCtx, callableType, args, cArgs.data(), inspector); + + NUdf::TUnboxedValue result; + { + TPyGilUnlocker unlock; + result = NUdf::TBoxedValueAccessor::Run(*callable->Value.Get(), valueBuilder, cArgs.data()); + } + + return ToPyObject(callable->CastCtx, inspector.GetReturnType(), result).Release(); + } PY_CATCH(nullptr) +} + +} + +PyTypeObject PyCallableType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TCallable"), + INIT_MEMBER(tp_basicsize , sizeof(TPyCallableObject)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , CallableDealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , CallableRepr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , CallableCall), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , 0), + INIT_MEMBER(tp_doc , "yql.TCallable object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , nullptr), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyCallable +////////////////////////////////////////////////////////////////////////////// +class TPyCallable: public NUdf::TBoxedValue +{ +public: + TPyCallable( + PyObject* function, + const NUdf::TType* functionType, + const TPyCastContext::TPtr& castCtx) + : Function_(function, TPyObjectPtr::ADD_REF) + , FunctionType_(functionType) + , CastCtx_(castCtx) + , Inspector_(*castCtx->PyCtx->TypeInfoHelper, functionType) + { + // keep ownership of function closure if any + if (PyFunction_Check(function)) { + PyObject* closure = PyFunction_GetClosure(function); + if (closure) { + Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF); + } + } + } + + ~TPyCallable() { + TPyGilLocker lock; + Closure_.Reset(); + Function_.Reset(); + CastCtx_.Reset(); + } + +private: + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder*, + const NUdf::TUnboxedValuePod* args) const final + { + TPyGilLocker lock; + try { + TPyObjectPtr pyArgs = ToPyArgs(CastCtx_, FunctionType_, args, Inspector_); + TPyObjectPtr resultObj = + PyObject_CallObject(Function_.Get(), pyArgs.Get()); + if (!resultObj) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + + auto returnType = Inspector_.GetReturnType(); + if (CastCtx_->PyCtx->TypeInfoHelper->GetTypeKind(returnType) == NUdf::ETypeKind::Stream) { + return FromPyStream(CastCtx_, returnType, resultObj, Function_, Closure_, pyArgs); + } + + return FromPyObject(CastCtx_, returnType, resultObj.Get()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" << e.what()).data()); + } + } + + TPyObjectPtr Function_; + TPyObjectPtr Closure_; + const NUdf::TType* FunctionType_; + TPyCastContext::TPtr CastCtx_; + NUdf::TCallableTypeInspector Inspector_; +}; + + +TPyObjectPtr ToPyCallable( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + TPyCallableObject* callable = new TPyCallableObject(castCtx, type); + PyObject_INIT(callable, &PyCallableType); + + callable->Value.Set(castCtx->PyCtx, value.AsBoxed()); + + return reinterpret_cast<PyObject*>(callable); +} + +NUdf::TUnboxedValue FromPyCallable( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) +{ + return NUdf::TUnboxedValuePod(new TPyCallable(value, type, castCtx)); +} + +TMaybe<TPyObjectPtr> GetOptionalAttribute(PyObject* value, const char* attrName) { + if (TPyObjectPtr attr = PyObject_GetAttrString(value, attrName)) { + return attr; + } else { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + return Nothing(); + } else { + throw yexception() << "Cannot get attribute '" << attrName << "', error: " << GetLastErrorAsString(); + } + } +} + + +struct TPySecureParam +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + + TPySecureParam(const TPyCastContext::TPtr& castCtx) : CastCtx(castCtx) {} +}; + +inline TPySecureParam* CastToSecureParam(PyObject* o) +{ + return reinterpret_cast<TPySecureParam*>(o); +} + +void SecureParamDealloc(PyObject* self) +{ + delete CastToSecureParam(self); +} + +PyObject* SecureParamRepr(PyObject*) +{ + return PyRepr("<yql.TSecureParam>").Release(); +} + +PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs) +{ + Y_UNUSED(kwargs); + + struct PyBufDeleter { + void operator() (Py_buffer* view) { PyBuffer_Release(view); } + }; + Py_buffer input; + if (!PyArg_ParseTuple(args, "s*", &input)) { + return nullptr; + } + std::unique_ptr<Py_buffer, PyBufDeleter> bufPtr(&input); + auto valueBuilder = CastToSecureParam(self)->CastCtx->ValueBuilder; + NUdf::TStringRef key(static_cast<const char*>(input.buf), input.len); + PY_TRY { + if (!valueBuilder->GetSecureParam(key, key)) { + throw yexception() << "Cannot get secure parameter for key: " << key; + } + return PyRepr(TStringBuf(key.Data(), key.Size())).Release(); + } PY_CATCH(nullptr) +} + +static PyTypeObject PySecureParamType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TSecureParam"), + INIT_MEMBER(tp_basicsize , sizeof(TPySecureParam)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , SecureParamDealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , SecureParamRepr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , SecureParamCall), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , 0), + INIT_MEMBER(tp_doc , "yql.TSecureParam object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , nullptr), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +TPyObjectPtr ToPySecureParam(const TPyCastContext::TPtr& castCtx) +{ + TPySecureParam* ret = new TPySecureParam(castCtx); + PyObject_INIT(ret, &PySecureParamType); + return reinterpret_cast<PyObject*>(ret); +} + + +void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value) { + if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) try { + castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get()); + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what(); + } + + if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) try { + Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get()); + if (itemsCount != 2) { + throw yexception() << "Expected tuple of 2 callables"; + } + + castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0)); + castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1)); + if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) { + throw yexception() << "Expected tuple of 2 callables"; + } + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what(); + } + + if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) try { + PyObject* bytesValue = nullptr; + if (PyBytes_Check(bytesDecodeMode->Get())) { + bytesValue = PyObject_Bytes(bytesDecodeMode->Get()); + } else if (PyUnicode_Check(bytesDecodeMode->Get())) { + bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get()); + } else { + throw yexception() << "Expected bytes or unicode"; + } + if (!bytesValue) { + PyErr_Clear(); + throw yexception() << "Failed to convert to bytes"; + } + + TStringBuf view(PyBytes_AS_STRING(bytesValue)); + if (view == "never") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Never; + } else if (view == "strict") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Strict; + } else { + Py_DECREF(bytesValue); + throw yexception() << "Expected values 'never' or 'strict'"; + } + Py_DECREF(bytesValue); + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what(); + } + + if (PyObject_SetAttrString(value, "_yql_secure_param", ToPySecureParam(castCtx).Get()) != 0) { + throw yexception() << "Cannot set attribute '_yql_secure_param'"; + } +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.h b/yql/essentials/udfs/common/python/bindings/py_callable.h new file mode 100644 index 0000000000..4ce79e1d7f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_callable.h @@ -0,0 +1,22 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyCallableType; + +TPyObjectPtr ToPyCallable( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyCallable( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp new file mode 100644 index 0000000000..1c58d7b371 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp @@ -0,0 +1,87 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyCallableTest) { + struct TTestCallable: public NUdf::TBoxedValue { + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override + { + Y_UNUSED(valueBuilder); + return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); + } + }; + + Y_UNIT_TEST(FromPyFunction) { + TPythonTestEngine engine; + const NUdf::IValueBuilder* vb = &engine.GetValueBuilder(); + + engine.ToMiniKQL<char* (*)(char*, ui32)>( + "def Test():\n" + " def test(str, count):\n" + " return str * count\n" + " return test", + [vb](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue args[2]; + args[0] = vb->NewString("j"); + args[1] = NUdf::TUnboxedValuePod((ui32) 5); + auto result = value.Run(vb, args); + + UNIT_ASSERT(result); + UNIT_ASSERT(5 == result.AsStringRef().Size()); + UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj"); + }); + } + + Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value):\n" + " assert type(value).__name__ == 'TCallable'\n" + " assert value.__call__ != None\n" + " assert value(-2) == 40\n" + " assert value(-1) == 41\n" + " assert value(0) == 42\n" + " assert value(1) == 43\n" + " assert value(2) == 44\n"); + } + + Y_UNIT_TEST(ToPythonAndBack) { + struct TTestCallable: public NUdf::TBoxedValue { + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override + { + Y_UNUSED(valueBuilder); + return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); + } + }; + + TPythonTestEngine engine; + engine.ToPythonAndBack<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32) 5); + const auto result = value.Run(nullptr, &arg); + + UNIT_ASSERT(result); + UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>()); + }); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp new file mode 100644 index 0000000000..3aa5537b21 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp @@ -0,0 +1,955 @@ +#include "py_cast.h" +#include "py_ptr.h" +#include "py_errors.h" +#include "py_callable.h" +#include "py_dict.h" +#include "py_list.h" +#include "py_gil.h" +#include "py_utils.h" +#include "py_void.h" +#include "py_resource.h" +#include "py_stream.h" +#include "py_struct.h" +#include "py_tuple.h" +#include "py_variant.h" +#include "py_decimal.h" + +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_type_printer.h> +#include <yql/essentials/public/udf/udf_terminator.h> +#include <yql/essentials/utils/utf8.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/string/join.h> +#include <util/string/builder.h> + +#ifdef HAVE_LONG_LONG +# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask +# define YQL_PyLong_Asi64 PyLong_AsLongLong +# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong +#else +# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask +# define YQL_PyLong_Asi64 PyLong_AsLong +# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong +#endif + +#define TO_PYTHON(Format, Type) \ + template <> \ + ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \ + return Py_BuildValue(Format, value); \ + } + +#define TO_PYTHON_BYTES(Type) \ + template <> \ + ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ + const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ + return PyBytes_FromStringAndSize(value.data(), size); \ + } + +#define TO_PYTHON_UNICODE(Type) \ + template <> \ + ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ + Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ + return PyUnicode_FromStringAndSize(value.data(), size); \ + } + +#define PY_ENSURE_TYPE(Type, Value, Message) \ + do { \ + if (!Py##Type##_Check(Value)) { \ + throw yexception() << Message << " " #Type "; Object repr: " \ + << PyObjectRepr(Value); \ + } \ + } while (0) + +#define FROM_PYTHON_FLOAT(Type) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + double result = PyFloat_AsDouble(value); \ + if (result == -1.0 && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Float"); \ + } \ + return static_cast<Type>(result); \ + } + +#define FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ + } + +#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if ( \ + static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \ + static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \ + ) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ + } + +#define FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char *str = nullptr; \ + const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } \ + ThrowCastTypeException(value, "String"); \ + } + +#define FROM_PYTHON_BYTES(Type) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + PY_ENSURE_TYPE(Bytes, value, "Expected"); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } + +#define TRY_FROM_PYTHON_FLOAT(Type) \ + template <> \ + bool TryPyCast<Type>(PyObject* value, Type& result) { \ + double v = PyFloat_AsDouble(value); \ + if (v == -1.0 && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + result = static_cast<Type>(v); \ + return true; \ + } + +#define TRY_FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject* value, Type& res) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ + } + +#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject* value, Type& res) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + res = static_cast<Type>(result); \ + if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \ + return false; \ + } \ + return true; \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ + } + +#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char *str = nullptr; \ + const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } \ + return false; \ + } + +#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + int rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else { \ + return false; \ + } \ + } + +namespace NPython { + +using namespace NKikimr; + +inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) { + throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType + << "; Object repr: " << PyObjectRepr(value); +} + +inline void ThrowCastException(PyObject* value, TStringBuf toType) { + throw yexception() << "Cast error object " << PyObjectRepr(value) << " to " << toType << ": " + << GetLastErrorAsString(); +} + + +template <> +bool TryPyCast<bool>(PyObject* value, bool& result) +{ + int isTrue = PyObject_IsTrue(value); + if (isTrue == -1) { + return false; + } + result = (isTrue == 1); + return true; +} + +#if PY_MAJOR_VERSION >= 3 +TRY_FROM_PYTHON_LONG(i8, i64) +TRY_FROM_PYTHON_LONG(ui8, ui64) +TRY_FROM_PYTHON_LONG(i16, i64) +TRY_FROM_PYTHON_LONG(ui16, ui64) +TRY_FROM_PYTHON_LONG(i32, i64) +TRY_FROM_PYTHON_LONG(ui32, ui64) +TRY_FROM_PYTHON_LONG(i64, i64) +TRY_FROM_PYTHON_LONG(ui64, ui64) +TRY_FROM_PYTHON_BYTES_OR_UTF(TString) +TRY_FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef) +#else +TRY_FROM_PYTHON_INT_OR_LONG(i8, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui8, ui64) +TRY_FROM_PYTHON_INT_OR_LONG(i16, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui16, ui64) +TRY_FROM_PYTHON_INT_OR_LONG(i32, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui32, ui64) +TRY_FROM_PYTHON_INT_OR_LONG(i64, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui64, ui64) +TRY_FROM_PYTHON_STR_OR_UTF(TString) +TRY_FROM_PYTHON_STR_OR_UTF(NUdf::TStringRef) +#endif + +TRY_FROM_PYTHON_FLOAT(float) +TRY_FROM_PYTHON_FLOAT(double) + +template <> +bool PyCast<bool>(PyObject* value) +{ + int res = PyObject_IsTrue(value); + if (res == -1) { + throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to bool. " + << GetLastErrorAsString(); + } + return res == 1; +} + +#if PY_MAJOR_VERSION >= 3 +FROM_PYTHON_LONG(i8, i64) +FROM_PYTHON_LONG(ui8, ui64) +FROM_PYTHON_LONG(i16, i64) +FROM_PYTHON_LONG(ui16, ui64) +FROM_PYTHON_LONG(i32, i64) +FROM_PYTHON_LONG(ui32, ui64) +FROM_PYTHON_LONG(i64, i64) +FROM_PYTHON_LONG(ui64, ui64) +FROM_PYTHON_BYTES_OR_UTF(TString) +FROM_PYTHON_BYTES_OR_UTF(TStringBuf) +FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef) +#else +FROM_PYTHON_INT_OR_LONG(i8, i64) +FROM_PYTHON_INT_OR_LONG(ui8, ui64) +FROM_PYTHON_INT_OR_LONG(i16, i64) +FROM_PYTHON_INT_OR_LONG(ui16, ui64) +FROM_PYTHON_INT_OR_LONG(i32, i64) +FROM_PYTHON_INT_OR_LONG(ui32, ui64) +FROM_PYTHON_INT_OR_LONG(i64, i64) +FROM_PYTHON_INT_OR_LONG(ui64, ui64) +FROM_PYTHON_BYTES(TString) +FROM_PYTHON_BYTES(TStringBuf) +FROM_PYTHON_BYTES(NUdf::TStringRef) +#endif + +FROM_PYTHON_FLOAT(float) +FROM_PYTHON_FLOAT(double) + +template <> +TPyObjectPtr PyCast<bool>(bool value) +{ + PyObject* res = value ? Py_True : Py_False; + return TPyObjectPtr(res, TPyObjectPtr::ADD_REF); +} + +TO_PYTHON("b", i8) +TO_PYTHON("B", ui8) +TO_PYTHON("h", i16) +TO_PYTHON("H", ui16) +TO_PYTHON("i", i32) +TO_PYTHON("I", ui32) +#ifdef HAVE_LONG_LONG +TO_PYTHON("L", i64) +TO_PYTHON("K", ui64) +#else +TO_PYTHON("l", i64) +TO_PYTHON("k", ui64) +#endif + +TO_PYTHON_BYTES(TString) +TO_PYTHON_BYTES(TStringBuf) +TO_PYTHON_BYTES(NUdf::TStringRef) +TO_PYTHON_UNICODE(TString) +TO_PYTHON_UNICODE(TStringBuf) +TO_PYTHON_UNICODE(NUdf::TStringRef) + +template <typename T> +NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) { + PY_ENSURE(PyTuple_Check(value), + "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); + + Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); + PY_ENSURE(tupleSize == 2, + "Expected to get Tuple with 2 elements, but got " + << tupleSize << " elements"); + + PyObject* el0 = PyTuple_GET_ITEM(value, 0); + PyObject* el1 = PyTuple_GET_ITEM(value, 1); + auto num = PyCast<T>(el0); + if (num >= limit) { + throw yexception() << "Python object " << PyObjectRepr(el0) \ + << " is out of range for " << typeName; + } + + auto name = PyCast<NUdf::TStringRef>(el1); + auto ret = NUdf::TUnboxedValuePod(num); + ui32 tzId; + if (!ctx->ValueBuilder->GetDateBuilder().FindTimezoneId(name, tzId)) { + throw yexception() << "Unknown timezone: " << TStringBuf(name); + } + + ret.SetTimezoneId(tzId); + return ret; +} + +TO_PYTHON("f", float) +TO_PYTHON("d", double) + +namespace { + +TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto typeId = inspector.GetTypeId(); + + switch (typeId) { + case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>()); + case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>()); + case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>()); + case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>()); + case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>()); + case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>()); + case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>()); + case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<const char*>::Id: { + if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) { + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + } else { + auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << + "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" << + GetLastErrorAsString()).data() + ); + } + return pyObj; + } + } + case NUdf::TDataType<NUdf::TYson>::Id: { + auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef()); + if (ctx->YsonConverterIn) { + TPyObjectPtr pyArgs(PyTuple_New(1)); + PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release()); + pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + } + + return pyObj; + } + case NUdf::TDataType<NUdf::TUuid>::Id: + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: + return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<NUdf::TTzDate>::Id: { + TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + } + + throw yexception() + << "Unsupported type " << typeId; +} + +NUdf::TUnboxedValue FromPyData( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto typeId = inspector.GetTypeId(); + + switch (typeId) { + case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value)); + case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value)); + case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value)); + case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value)); + case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value)); + case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value)); + case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value)); + case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value)); + case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value)); + case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value)); + case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value)); + case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<NUdf::TYson>::Id: { + if (ctx->YsonConverterOut) { + TPyObjectPtr input(value, TPyObjectPtr::ADD_REF); + TPyObjectPtr pyArgs(PyTuple_New(1)); + // PyTuple_SET_ITEM steals reference, so pass ownership to it + PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release()); + input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get())); + if (!input) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get())); + } + } +#if PY_MAJOR_VERSION >= 3 + case NUdf::TDataType<const char*>::Id: + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + if (PyUnicode_Check(value)) { + const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); + } + throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; +#else + case NUdf::TDataType<const char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: { + if (PyUnicode_Check(value)) { + const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); + } + + if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) && + PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) { + throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json"; + } + + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + } +#endif + case NUdf::TDataType<NUdf::TUuid>::Id: { + const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + if (ret.AsStringRef().Size() != 16) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " has invalid value for Uuid"; + } + + return ret; + } + case NUdf::TDataType<NUdf::TDate>::Id: { + auto num = PyCast<ui16>(value); + if (num >= NUdf::MAX_DATE) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Date"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TDatetime>::Id: { + auto num = PyCast<ui32>(value); + if (num >= NUdf::MAX_DATETIME) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Datetime"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TTimestamp>::Id: { + auto num = PyCast<ui64>(value); + if (num >= NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Timestamp"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TInterval>::Id: { + auto num = PyCast<i64>(value); + if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Interval"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TTzDate>::Id: + return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx); + case NUdf::TDataType<NUdf::TTzDatetime>::Id: + return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx); + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx); + } + + throw yexception() + << "Unsupported type " << typeId; +} + +TPyObjectPtr ToPyList( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto itemType = inspector.GetItemType(); + + if (ctx->LazyInputObjects) { + return ToPyLazyList(ctx, itemType, value); + } + + TPyObjectPtr list(PyList_New(0)); + const auto iterator = value.GetListIterator(); + for (NUdf::TUnboxedValue item; iterator.Next(item);) { + auto pyItem = ToPyObject(ctx, itemType, item); + if (PyList_Append(list.Get(), pyItem.Get()) < 0) { + throw yexception() << "Can't append item to list" + << GetLastErrorAsString(); + } + } + + return list; +} + +NUdf::TUnboxedValue FromPyList( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + + if (PyList_Check(value)) { + // eager list to list conversion + auto itemType = inspector.GetItemType(); + Py_ssize_t cnt = PyList_GET_SIZE(value); + NUdf::TUnboxedValue *items = nullptr; + const auto list = ctx->ValueBuilder->NewArray(cnt, items); + for (Py_ssize_t i = 0; i < cnt; ++i) { + PyObject *item = PyList_GET_ITEM(value, i); + *items++ = FromPyObject(ctx, itemType, item); + } + return list; + } + + if (PyTuple_Check(value)) { + // eager tuple to list conversion + auto itemType = inspector.GetItemType(); + Py_ssize_t cnt = PyTuple_GET_SIZE(value); + NUdf::TUnboxedValue *items = nullptr; + const auto list = ctx->ValueBuilder->NewArray(cnt, items); + for (Py_ssize_t i = 0; i < cnt; ++i) { + PyObject *item = PyTuple_GET_ITEM(value, i); + *items++ = FromPyObject(ctx, itemType, item); + } + return list; + } + + if (PyGen_Check(value)) { + TPyObjectPtr valuePtr(PyObject_GetIter(value)); + return FromPyLazyIterator(ctx, type, std::move(valuePtr)); + } + + if (PyIter_Check(value) +#if PY_MAJOR_VERSION < 3 + // python 2 iterators must also implement "next" method + && 1 == PyObject_HasAttrString(value, "next") +#endif + ) { + TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF); + return FromPyLazyIterator(ctx, type, std::move(valuePtr)); + } + + // assume that this function will returns generator + if (PyCallable_Check(value)) { + TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF); + return FromPyLazyGenerator(ctx, type, std::move(valuePtr)); + } + + if (PySequence_Check(value) || PyObject_HasAttrString(value, "__iter__")) { + TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF); + return FromPyLazyIterable(ctx, type, std::move(valuePtr)); + } + + throw yexception() << "Expected list, tuple, generator, generator factory, " + "iterator or iterable object, but got: " << PyObjectRepr(value); +} + +TPyObjectPtr ToPyOptional( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + if (!value) { + return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF); + } + + const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + return ToPyObject(ctx, inspector.GetItemType(), value); +} + +NUdf::TUnboxedValue FromPyOptional( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + if (value == Py_None) { + return NUdf::TUnboxedValue(); + } + + const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + return FromPyObject(ctx, inspector.GetItemType(), value).Release().MakeOptional(); +} + +TPyObjectPtr ToPyDict( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto keyType = inspector.GetKeyType(); + const auto valueType = inspector.GetValueType(); + + if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) { + if (ctx->LazyInputObjects) { // TODO + return ToPyLazySet(ctx, keyType, value); + } + + const TPyObjectPtr set(PyFrozenSet_New(nullptr)); + const auto iterator = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; iterator.Next(key);) { + auto pyKey = ToPyObject(ctx, keyType, key); + if (PySet_Add(set.Get(), pyKey.Get()) < 0) { + throw yexception() << "Can't add item to set" << GetLastErrorAsString(); + } + } + + return set; + } else { + if (ctx->LazyInputObjects) { + return ToPyLazyDict(ctx, keyType, valueType, value); + } + + const TPyObjectPtr dict(PyDict_New()); + const auto iterator = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, valueObj; iterator.NextPair(key, valueObj);) { + auto pyKey = ToPyObject(ctx, keyType, key); + auto pyValue = ToPyObject(ctx, valueType, valueObj); + if (PyDict_SetItem(dict.Get(), pyKey.Get(), pyValue.Get()) < 0) { + throw yexception() << "Can't add item to dict" << GetLastErrorAsString(); + } + } + + return dict; + } +} + +NUdf::TUnboxedValue FromPyDict( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto keyType = inspector.GetKeyType(); + const auto valueType = inspector.GetValueType(); + + if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType) + && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) { + const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType); + if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) { + return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value); + } + } else if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) { + if (PyAnySet_Check(value)) { + return FromPySet(ctx, keyType, value); + } else if (value->ob_type->tp_as_sequence && value->ob_type->tp_as_sequence->sq_contains) { + return FromPySequence(ctx, keyType, value); + } + } else if (PyDict_Check(value)) { + return FromPyDict(ctx, keyType, valueType, value); + } else if (PyMapping_Check(value)) { + return FromPyMapping(ctx, keyType, valueType, value); + } + + throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict."; +} + +} // namespace + +TPyObjectPtr ToPyObject( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { + case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value); + case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: return ToPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value); + case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value); + default: { + ::TStringBuilder sb; + sb << "Failed to export: "; + NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out); + throw yexception() << sb; + } + } +} + +NUdf::TUnboxedValue FromPyObject( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { + case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value); + case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: return FromPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); + case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value); + default: { + ::TStringBuilder sb; + sb << "Failed to import: "; + NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out); + throw yexception() << sb; + } + } +} + +TPyObjectPtr ToPyArgs( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod* args, + const NUdf::TCallableTypeInspector& inspector) +{ + const auto argsCount = inspector.GetArgsCount(); + TPyObjectPtr tuple(PyTuple_New(argsCount)); + + for (ui32 i = 0; i < argsCount; i++) { + auto arg = ToPyObject(ctx, inspector.GetArgType(i), args[i]); + PyTuple_SET_ITEM(tuple.Get(), i, arg.Release()); + } + + return tuple; +} + +void FromPyArgs( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* pyArgs, + NUdf::TUnboxedValue* cArgs, + const NUdf::TCallableTypeInspector& inspector) +{ + PY_ENSURE_TYPE(Tuple, pyArgs, "Expected"); + + const auto argsCount = inspector.GetArgsCount(); + const auto optArgsCount = inspector.GetOptionalArgsCount(); + + ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs)); + PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount, + "arguments count missmatch: " + "min " << (argsCount - optArgsCount) << ", max " << argsCount + << ", got " << pyArgsCount); + + for (ui32 i = 0; i < pyArgsCount; i++) { + PyObject* item = PyTuple_GET_ITEM(pyArgs, i); + cArgs[i] = FromPyObject(ctx, inspector.GetArgType(i), item); + } + + for (ui32 i = pyArgsCount; i < argsCount; i++) { + cArgs[i] = NUdf::TUnboxedValuePod(); + } +} + +class TDummyMemoryLock : public IMemoryLock { +public: + void Acquire() override {} + void Release() override {} +}; + +TPyCastContext::TPyCastContext( + const NKikimr::NUdf::IValueBuilder* builder, + TPyContext::TPtr pyCtx, + THolder<IMemoryLock> memoryLock) + : ValueBuilder(builder) + , PyCtx(std::move(pyCtx)) + , MemoryLock(std::move(memoryLock)) +{ + if (!MemoryLock) { + MemoryLock = MakeHolder<TDummyMemoryLock>(); + } +} + +TPyCastContext::~TPyCastContext() { + TPyGilLocker locker; + StructTypes.clear(); + YsonConverterIn.Reset(); + YsonConverterOut.Reset(); + TimezoneNames.clear(); +} + +const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) { + auto& x = TimezoneNames[id]; + if (!x) { + NKikimr::NUdf::TStringRef ref; + if (!ValueBuilder->GetDateBuilder().FindTimezoneName(id, ref)) { + throw yexception() << "Unknown timezone id: " << id; + } + + x = PyRepr(ref); + } + + return x; +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.h b/yql/essentials/udfs/common/python/bindings/py_cast.h new file mode 100644 index 0000000000..e6850c7404 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_cast.h @@ -0,0 +1,45 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +#include <util/generic/typetraits.h> + +namespace NPython { + +template <typename T> +TPyObjectPtr PyCast(typename TTypeTraits<T>::TFuncParam value); + +template <typename T> +T PyCast(PyObject* value); + +template <typename T> +bool TryPyCast(PyObject* value, T& result); + +template <typename T> +TPyObjectPtr ToPyUnicode(const T& value); + +TPyObjectPtr ToPyObject( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyObject( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +TPyObjectPtr ToPyArgs( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod* args, + const NKikimr::NUdf::TCallableTypeInspector& inspector); + +void FromPyArgs( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* pyArgs, + NKikimr::NUdf::TUnboxedValue* cArgs, + const NKikimr::NUdf::TCallableTypeInspector& inspector); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp new file mode 100644 index 0000000000..47f65ab6fa --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp @@ -0,0 +1,90 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyCastTest) { + Y_UNIT_TEST(FromPyStrToInt) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<i32>( + "def Test():\n" + " return '123a'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "str"); + } + + Y_UNIT_TEST(FromPyTupleToLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return 1, 1", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "tuple"); + } + + Y_UNIT_TEST(FromPyFuncToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def f():\n" + " return 42\n" + "def Test():\n" + " return f", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "function"); + } + + Y_UNIT_TEST(FromPyNoneToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def Test():\n" + " return None", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "None"); + } + + Y_UNIT_TEST(BadFromPythonFloat) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<float>( + "def Test():\n" + " return '3 <dot> 1415926'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object '3 <dot> 1415926' to Float"); + } + +#if PY_MAJOR_VERSION >= 3 +# define RETVAL "-1" +#else +# define RETVAL "-18446744073709551616L" +#endif + + Y_UNIT_TEST(BadFromPythonLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return " RETVAL, + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object " RETVAL " to Long"); + } + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_ctx.h b/yql/essentials/udfs/common/python/bindings/py_ctx.h new file mode 100644 index 0000000000..9e86042908 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_ctx.h @@ -0,0 +1,120 @@ +#pragma once + +#include "py_ptr.h" + +#include <yql/essentials/public/udf/udf_types.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_string.h> + +#include <util/generic/ptr.h> +#include <util/generic/intrlist.h> + +#include <unordered_map> + +namespace NPython { + +enum class EBytesDecodeMode { + Never, + Strict, +}; + +class IMemoryLock { +public: + virtual ~IMemoryLock() = default; + virtual void Acquire() = 0; + virtual void Release() = 0; +}; + +struct TPyCleanupListItemBase: public TIntrusiveListItem<TPyCleanupListItemBase> { + virtual ~TPyCleanupListItemBase() = default; + virtual void Cleanup() = 0; +}; + +template <typename TValueType> +class TPyCleanupListItem: public TPyCleanupListItemBase { +public: + TPyCleanupListItem() = default; + virtual ~TPyCleanupListItem() { + Unlink(); + } + + void Cleanup() override { + Value = {}; + } + + template <typename TCtx> + void Set(const TIntrusivePtr<TCtx>& ctx, TValueType val) { + Value = std::move(val); + ctx->CleanupList.PushBack(this); + } + + bool IsSet() const { + return !!Value; + } + + const TValueType& Get() const { + if (!Value) { + throw yexception() << "Trying to use python wrap object with destroyed yql value"; + } + return Value; + } + +private: + TValueType Value; +}; + +struct TPyContext: public TSimpleRefCount<TPyContext> { + const NKikimr::NUdf::ITypeInfoHelper::TPtr TypeInfoHelper; + const NKikimr::NUdf::TStringRef ResourceTag; + const NKikimr::NUdf::TSourcePosition Pos; + TIntrusiveList<TPyCleanupListItemBase> CleanupList; + + TPyContext(NKikimr::NUdf::ITypeInfoHelper::TPtr helper, const NKikimr::NUdf::TStringRef& tag, const NKikimr::NUdf::TSourcePosition& pos) + : TypeInfoHelper(std::move(helper)) + , ResourceTag(tag) + , Pos(pos) + { + } + + void Cleanup() { + for (auto& o: CleanupList) { + o.Cleanup(); + } + CleanupList.Clear(); + } + + ~TPyContext() = default; + + using TPtr = TIntrusivePtr<TPyContext>; +}; + +struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { + const NKikimr::NUdf::IValueBuilder *const ValueBuilder; + const TPyContext::TPtr PyCtx; + std::unordered_map<const NKikimr::NUdf::TType*, TPyObjectPtr> StructTypes; + bool LazyInputObjects = true; + TPyObjectPtr YsonConverterIn; + TPyObjectPtr YsonConverterOut; + EBytesDecodeMode BytesDecodeMode = EBytesDecodeMode::Never; + TPyObjectPtr Decimal; + std::unordered_map<ui32, TPyObjectPtr> TimezoneNames; + THolder<IMemoryLock> MemoryLock; + + TPyCastContext( + const NKikimr::NUdf::IValueBuilder* builder, + TPyContext::TPtr pyCtx, + THolder<IMemoryLock> memoryLock = {}); + + ~TPyCastContext(); + + const TPyObjectPtr& GetTimezoneName(ui32 id); + const TPyObjectPtr& GetDecimal(); + + using TPtr = TIntrusivePtr<TPyCastContext>; +}; + +using TPyCastContextPtr = TPyCastContext::TPtr; + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp new file mode 100644 index 0000000000..0070e3420f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp @@ -0,0 +1,59 @@ +#include "py_decimal.h" +#include "py_errors.h" +#include "py_utils.h" +#include "py_cast.h" + +#include <util/stream/str.h> + +#include <yql/essentials/public/udf/udf_value.h> + +using namespace NKikimr; + +namespace NPython { + +TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale) +{ + const auto str = NYql::NDecimal::ToString(value.GetInt128(), precision, scale); + PY_ENSURE(str, "Bad decimal value."); + + const TPyObjectPtr pyStr(PyRepr(str)); + + const TPyObjectPtr args(PyTuple_Pack(1, pyStr.Get())); + PY_ENSURE(args, "Can't pack args."); + + const TPyObjectPtr dec(PyObject_CallObject(ctx->GetDecimal().Get(), args.Get())); + PY_ENSURE(dec, "Can't create Decimal."); + return dec; +} + +NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& ctx, PyObject* value, ui8 precision, ui8 scale) +{ + const TPyObjectPtr print(PyObject_Str(value)); + PY_ENSURE(print, "Can't print decimal."); + + TString str; + PY_ENSURE(TryPyCast<TString>(print.Get(), str), "Can't get decimal string."); + + if (str.EndsWith("Infinity")) { + str.resize(str.size() - 5U); + } + + const auto dec = NYql::NDecimal::FromStringEx(str.c_str(), precision, scale); + PY_ENSURE(!NYql::NDecimal::IsError(dec), "Can't make Decimal from string."); + + return NKikimr::NUdf::TUnboxedValuePod(dec); +} + +const TPyObjectPtr& TPyCastContext::GetDecimal() { + if (!Decimal) { + const TPyObjectPtr module(PyImport_ImportModule("decimal")); + PY_ENSURE(module, "Can't import decimal."); + + Decimal.ResetSteal(PyObject_GetAttrString(module.Get(), "Decimal")); + PY_ENSURE(Decimal, "Can't get Decimal."); + } + + return Decimal; +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.h b/yql/essentials/udfs/common/python/bindings/py_decimal.h new file mode 100644 index 0000000000..5764fe4fa8 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_decimal.h @@ -0,0 +1,12 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& castCtx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale); + +NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& castCtx, PyObject* value, ui8 precision, ui8 scale); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp new file mode 100644 index 0000000000..8388c110f3 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp @@ -0,0 +1,122 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyDecimalTest) { + Y_UNIT_TEST(FromPyZero) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<12,5>>( + R"( +from decimal import Decimal +def Test(): return Decimal() + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(!value.GetInt128()); + }); + } + + Y_UNIT_TEST(FromPyPi) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<28,18>>( + R"( +from decimal import Decimal +def Test(): return Decimal('3.141592653589793238') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL); + }); + } + + Y_UNIT_TEST(FromPyTini) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35,35>>( + R"( +from decimal import Decimal +def Test(): return Decimal('-.00000000000000000000000000000000001') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -1); + }); + } + + Y_UNIT_TEST(FromPyNan) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( + R"( +from decimal import Decimal +def Test(): return Decimal('NaN') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan()); + }); + } + + Y_UNIT_TEST(FromPyInf) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( + R"( +from decimal import Decimal +def Test(): return Decimal('-inf') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf()); + }); + } + + Y_UNIT_TEST(ToPyZero) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<7,7>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod::Zero(); + }, + "def Test(value): assert value.is_zero()" + ); + } + + Y_UNIT_TEST(ToPyPi) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<20,18>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL)); + }, + "def Test(value): assert str(value) == '3.141592653589793238'" + ); + } + + Y_UNIT_TEST(ToPyTini) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<35,35>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1)); + }, + "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'" + ); + } + + Y_UNIT_TEST(ToPyNan) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<2,2>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan()); + }, + "def Test(value): assert value.is_nan()" + ); + } + + Y_UNIT_TEST(ToPyInf) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<30,0>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf()); + }, + "def Test(value): assert value.is_infinite() and value.is_signed()" + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_dict.cpp new file mode 100644 index 0000000000..f2bd0669ed --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_dict.cpp @@ -0,0 +1,683 @@ +#include "py_dict.h" +#include "py_iterator.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> + + +using namespace NKikimr; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyDict interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazyDict +{ + using TPtr = NUdf::TRefCountedPtr<TPyLazyDict, TPyPtrOps<TPyLazyDict>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* KeyType; + const NUdf::TType* PayloadType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + + inline static TPyLazyDict* Cast(PyObject* o) { + return reinterpret_cast<TPyLazyDict*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static PyObject* Subscript(PyObject* self, PyObject* key); + static int Contains(PyObject* self, PyObject* key); + static PyObject* Get(PyObject* self, PyObject* args); + + static PyObject* Iter(PyObject* self) { return Keys(self, nullptr); } + static PyObject* Keys(PyObject* self, PyObject* /* args */); + static PyObject* Items(PyObject* self, PyObject* /* args */); + static PyObject* Values(PyObject* self, PyObject* /* args */); +}; + +PyMappingMethods LazyDictMapping = { + INIT_MEMBER(mp_length, TPyLazyDict::Len), + INIT_MEMBER(mp_subscript, TPyLazyDict::Subscript), + INIT_MEMBER(mp_ass_subscript, nullptr), +}; + +PySequenceMethods LazyDictSequence = { + INIT_MEMBER(sq_length , TPyLazyDict::Len), + INIT_MEMBER(sq_concat , nullptr), + INIT_MEMBER(sq_repeat , nullptr), + INIT_MEMBER(sq_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_slice , nullptr), +#else + INIT_MEMBER(sq_slice , nullptr), +#endif + INIT_MEMBER(sq_ass_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_ass_slice , nullptr), +#else + INIT_MEMBER(sq_ass_slice , nullptr), +#endif + INIT_MEMBER(sq_contains , TPyLazyDict::Contains), + INIT_MEMBER(sq_inplace_concat , nullptr), + INIT_MEMBER(sq_inplace_repeat , nullptr), +}; + +PyNumberMethods LazyDictNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyLazyDict::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#define Py_TPFLAGS_HAVE_SEQUENCE_IN 0 +#endif + +PyDoc_STRVAR(get__doc__, + "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."); +PyDoc_STRVAR(keys__doc__, + "D.keys() -> an iterator over the keys of D"); +PyDoc_STRVAR(values__doc__, + "D.values() -> an iterator over the values of D"); +PyDoc_STRVAR(items__doc__, + "D.items() -> an iterator over the (key, value) items of D"); +#if PY_MAJOR_VERSION < 3 +PyDoc_STRVAR(iterkeys__doc__, + "D.iterkeys() -> an iterator over the keys of D"); +PyDoc_STRVAR(itervalues__doc__, + "D.itervalues() -> an iterator over the values of D"); +PyDoc_STRVAR(iteritems__doc__, + "D.iteritems() -> an iterator over the (key, value) items of D"); +#endif + +static PyMethodDef LazyDictMethods[] = { + { "get", TPyLazyDict::Get, METH_VARARGS, get__doc__ }, + { "keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__ }, + { "items", TPyLazyDict::Items, METH_NOARGS, items__doc__ }, + { "values", TPyLazyDict::Values, METH_NOARGS, values__doc__ }, +#if PY_MAJOR_VERSION < 3 + { "iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__ }, + { "iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__ }, + { "itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__ }, +#endif + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +PyTypeObject PyLazyDictType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TDict"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazyDict)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazyDict::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazyDict::Repr), + INIT_MEMBER(tp_as_number , &LazyDictNumbering), + INIT_MEMBER(tp_as_sequence , &LazyDictSequence), + INIT_MEMBER(tp_as_mapping , &LazyDictMapping), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc , "yql.TDict object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , &TPyLazyDict::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , LazyDictMethods), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazySet interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazySet +{ + using TPtr = NUdf::TRefCountedPtr<TPyLazySet, TPyPtrOps<TPyLazySet>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + + inline static TPyLazySet* Cast(PyObject* o) { + return reinterpret_cast<TPyLazySet*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static int Contains(PyObject* self, PyObject* key); + static PyObject* Get(PyObject* self, PyObject* args); + + static PyObject* Iter(PyObject* self); +}; + +PySequenceMethods LazySetSequence = { + INIT_MEMBER(sq_length , TPyLazySet::Len), + INIT_MEMBER(sq_concat , nullptr), + INIT_MEMBER(sq_repeat , nullptr), + INIT_MEMBER(sq_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_slice , nullptr), +#else + INIT_MEMBER(sq_slice , nullptr), +#endif + INIT_MEMBER(sq_ass_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_ass_slice , nullptr), +#else + INIT_MEMBER(sq_ass_slice , nullptr), +#endif + INIT_MEMBER(sq_contains , TPyLazySet::Contains), + INIT_MEMBER(sq_inplace_concat , nullptr), + INIT_MEMBER(sq_inplace_repeat , nullptr), +}; + +PyNumberMethods LazySetNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyLazySet::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyLazySet::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + +PyTypeObject PyLazySetType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TSet"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazySet)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazySet::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazySet::Repr), + INIT_MEMBER(tp_as_number , &LazySetNumbering), + INIT_MEMBER(tp_as_sequence , &LazySetSequence), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc , "yql.TSet object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , &TPyLazySet::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyDict implementation +////////////////////////////////////////////////////////////////////////////// +int TPyLazyDict::Bool(PyObject* self) +{ + PY_TRY { + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazyDict::Repr(PyObject*) +{ + return PyRepr("<yql.TDict>").Release(); +} + +Py_ssize_t TPyLazyDict::Len(PyObject* self) +{ + PY_TRY { + return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); + } PY_CATCH(-1) +} + +PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key) +{ + PY_TRY { + TPyLazyDict* dict = Cast(self); + + if (dict->KeyType) { + const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + + const TPyObjectPtr repr = PyObject_Repr(key); + PyErr_SetObject(PyExc_KeyError, repr.Get()); + return nullptr; + } else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } + + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return nullptr; + } + + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + + const TPyObjectPtr repr = PyObject_Repr(key); + PyErr_SetObject(PyExc_IndexError, repr.Get()); + return nullptr; + } + + } PY_CATCH(nullptr) +} + +// -1 error +// 0 not found +// 1 found +int TPyLazyDict::Contains(PyObject* self, PyObject* key) +{ + PY_TRY { + TPyLazyDict* dict = Cast(self); + NUdf::TUnboxedValue mkqlKey; + + if (dict->KeyType) { + mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + } else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return -1; + } + + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return 0; + } + mkqlKey = NUdf::TUnboxedValuePod(ui64(index)); + } + + return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazyDict::Get(PyObject* self, PyObject* args) +{ + PY_TRY { + PyObject* key = nullptr; + PyObject* failobj = Py_None; + + if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) + return nullptr; + + TPyLazyDict* dict = Cast(self); + if (dict->KeyType) { + const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + } else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } + + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return nullptr; + } + + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + } + + Py_INCREF(failobj); + return failobj; + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::Keys(PyObject* self, PyObject* /* args */) +{ + PY_TRY { + const auto dict = Cast(self); + return ToPyIterator(dict->CastCtx, dict->KeyType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::Items(PyObject* self, PyObject* /* args */) +{ + PY_TRY { + const auto dict = Cast(self); + return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::Values(PyObject* self, PyObject* /* args */) +{ + PY_TRY { + const auto dict = Cast(self); + return ToPyIterator(dict->CastCtx, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value) +{ + TPyLazyDict* dict = new TPyLazyDict; + PyObject_INIT(dict, &PyLazyDictType); + + dict->CastCtx = castCtx; + dict->KeyType = keyType; + dict->PayloadType = payloadType; + dict->Value.Set(castCtx->PyCtx, value); + return reinterpret_cast<PyObject*>(dict); +} + +////////////////////////////////////////////////////////////////////////////// +// TPyLazySet implementation +////////////////////////////////////////////////////////////////////////////// +int TPyLazySet::Bool(PyObject* self) +{ + PY_TRY { + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazySet::Repr(PyObject*) +{ + return PyRepr("<yql.TSet>").Release(); +} + +Py_ssize_t TPyLazySet::Len(PyObject* self) +{ + PY_TRY { + return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); + } PY_CATCH(-1) +} + +// -1 error +// 0 not found +// 1 found +int TPyLazySet::Contains(PyObject* self, PyObject* key) +{ + PY_TRY { + const auto set = Cast(self); + const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key); + return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazySet::Iter(PyObject* self) +{ + PY_TRY { + const auto set = Cast(self); + return ToPyIterator(set->CastCtx, set->ItemType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazySet::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value) +{ + TPyLazySet* dict = new TPyLazySet; + PyObject_INIT(dict, &PyLazySetType); + + dict->CastCtx = castCtx; + dict->ItemType = itemType; + dict->Value.Set(castCtx->PyCtx, value); + return reinterpret_cast<PyObject*>(dict); +} + +////////////////////////////////////////////////////////////////////////////// + +TPyObjectPtr ToPyLazyDict( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyLazyDict::New(castCtx, keyType, payloadType, value.AsBoxed()); +} + +TPyObjectPtr ToPyLazySet( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyLazySet::New(castCtx, itemType, value.AsBoxed()); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.h b/yql/essentials/udfs/common/python/bindings/py_dict.h new file mode 100644 index 0000000000..538ca69a12 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_dict.h @@ -0,0 +1,50 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyLazyDictType; +extern PyTypeObject PyLazySetType; + +TPyObjectPtr ToPyLazyDict( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +TPyObjectPtr ToPyLazySet( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyMapping( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* map); + +NKikimr::NUdf::TUnboxedValue FromPyDict( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* dict); + +NKikimr::NUdf::TUnboxedValue FromPySet( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + PyObject* set); + +NKikimr::NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + PyObject* sequence); + +NKikimr::NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TDataTypeId keyType, + PyObject* sequence); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp new file mode 100644 index 0000000000..9ac9627ebb --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp @@ -0,0 +1,722 @@ +#include "ut3/py_test_engine.h" + +#include <yql/essentials/public/udf/udf_ut_helpers.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyDictTest) { + Y_UNIT_TEST(FromPyEmptyDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); + } + + Y_UNIT_TEST(FromPyDict_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); + } + + Y_UNIT_TEST(FromPyDict_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4)))); + }); + } + + Y_UNIT_TEST(FromPyDict_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4)))); + }); + } + + Y_UNIT_TEST(FromPyDict_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::map<ui32, TString> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<ui32>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[1], "one"); + UNIT_ASSERT_EQUAL(items[2], "two"); + UNIT_ASSERT_EQUAL(items[3], "three"); + }); + } + + Y_UNIT_TEST(FromPyDict_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<ui32> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<ui32>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], 1U); + UNIT_ASSERT_EQUAL(items[1], 2U); + UNIT_ASSERT_EQUAL(items[2], 3U); + }); + } + + Y_UNIT_TEST(FromPyDict_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "three"); + UNIT_ASSERT_EQUAL(items[2], "two"); + }); + } + + Y_UNIT_TEST(FromPyList_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); + } + + Y_UNIT_TEST(FromPyTuple_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i32, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1))); + UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three"); + const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2))); + UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two"); + const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3))); + UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one"); + + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4)))); + }); + } + + Y_UNIT_TEST(FromPyList_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i16, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4)))); + }); + } + + Y_UNIT_TEST(FromPyTuple_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui16, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<std::pair<ui16, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui16>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3U); + UNIT_ASSERT_EQUAL(items[0].first, 0); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 2); + UNIT_ASSERT_EQUAL(items[0].second, "one"); + UNIT_ASSERT_EQUAL(items[1].second, "two"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + }); + } + + Y_UNIT_TEST(FromPyList_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i64, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<i64> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<i64>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], 0); + UNIT_ASSERT_EQUAL(items[1], 1); + UNIT_ASSERT_EQUAL(items[2], 2); + }); + } + + Y_UNIT_TEST(FromPyTuple_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui64, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "two"); + UNIT_ASSERT_EQUAL(items[2], "three"); + }); + } + + Y_UNIT_TEST(ToPyEmptyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<ui8, ui32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n" + ); + } + + Y_UNIT_TEST(ToPyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value):\n" + " assert value\n" + " assert len(value) == 3\n" + " assert iter(value) is not None\n" + " assert 2 in value\n" + " assert 0 not in value\n" + " assert set(iter(value)) == set([1, 2, 3])\n" + " assert value[2] == 0.2\n" + " assert value.get(0, 0.7) == 0.7\n" + " assert value.get(3, 0.7) == 0.3\n" + " assert sorted(value.keys()) == [1, 2, 3]\n" + " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n" + " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n" +#if PY_MAJOR_VERSION < 3 + " assert all(isinstance(k, int) for k in value.iterkeys())\n" + " assert all(isinstance(v, float) for v in value.itervalues())\n" + " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n" +#endif + ); + } + + Y_UNIT_TEST(ToPyDictWrongKey) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value):\n" + " try:\n" + " print(value[0])\n" + " except KeyError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(FromPyEmptySet) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<ui32, void>>( + "def Test(): return set([])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); + + } + + Y_UNIT_TEST(FromPySet) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return set(['one', 'two', 'three'])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + + std::set<TString> set; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + set.emplace(key.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(set.size(), 3); + UNIT_ASSERT(set.count("one")); + UNIT_ASSERT(set.count("two")); + UNIT_ASSERT(set.count("three")); + }); + + } + + Y_UNIT_TEST(FromPySet_Contains) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return {b'one', b'two', b'three'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three"))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero"))); + }); + + } + + Y_UNIT_TEST(ToPyEmptySet) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n" + ); + + } + + Y_UNIT_TEST(ToPySet) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((ui8) 1), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod((ui8) 2), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod((ui8) 3), NUdf::TUnboxedValuePod::Void()) + .Build(); + + }, + "def Test(value):\n" + " assert len(value) == 3\n" + " assert all(isinstance(k, int) for k in iter(value))\n" + " assert all(i in value for i in [1, 2, 3])\n"); + } + + Y_UNIT_TEST(FromPyMultiDict) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>( + "def Test(): return {1: ['one', 'two'], 3: ['three']}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 2); + + std::unordered_map<ui32, std::vector<TString>> map; + const auto dictIt = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) { + auto& val = map[key.Get<ui32>()]; + const auto listIt = payload.GetListIterator(); + for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) { + val.emplace_back(listItem.AsStringRef()); + } + } + + UNIT_ASSERT_EQUAL(map.size(), 2); + auto it = map.find(1); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 2); + UNIT_ASSERT_EQUAL(it->second[0], "one"); + UNIT_ASSERT_EQUAL(it->second[1], "two"); + it = map.find(3); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 1); + UNIT_ASSERT_EQUAL(it->second[0], "three"); + }); + + } + + Y_UNIT_TEST(ToPyMultiDict) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi; + return vb.NewDict(type, flags)-> + Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("one")) + .Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("two")) + .Add(NUdf::TUnboxedValuePod((ui8) 3), vb.NewString("three")) + .Build(); + + }, + "def Test(value):\n" + " assert len(value) == 2\n" + " assert 1 in value\n" + " assert 3 in value\n" + " assert len(value[1]) == 2\n" + " assert 'one' in value[1]\n" + " assert 'two' in value[1]\n" + " assert list(value[3]) == ['three']\n"); + } + + Y_UNIT_TEST(ToPyAndBackDictAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> + Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32) 0))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32) 3))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32) 2)).Get<double>(), 0.2); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32) 4))); + + std::vector<std::pair<i32, double>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i32>(), payload.Get<double>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 1); + UNIT_ASSERT_EQUAL(items[1].first, 2); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, 0.1); + UNIT_ASSERT_EQUAL(items[1].second, 0.2); + UNIT_ASSERT_EQUAL(items[2].second, 0.3); + + std::vector<i32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 1); + UNIT_ASSERT_EQUAL(keys[1], 2); + UNIT_ASSERT_EQUAL(keys[2], 3); + + std::vector<double> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<double>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], 0.1); + UNIT_ASSERT_EQUAL(values[1], 0.2); + UNIT_ASSERT_EQUAL(values[2], 0.3); + } + ); + } + + Y_UNIT_TEST(PyInvertDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value): return { v: k for k, v in value.items() }", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double) 0.1))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double) 0.0))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double) 0.4))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double) 0.2)).Get<i32>(), 2); + + std::map<double, i32> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<double>(), payload.Get<i32>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0.1], 1); + UNIT_ASSERT_EQUAL(items[0.2], 2); + UNIT_ASSERT_EQUAL(items[0.3], 3); + } + ); + } + + Y_UNIT_TEST(FromPyOrderedDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "from collections import OrderedDict\n" + "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v.AsStringRef(), "one"); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4))))); + +#if PY_MAJOR_VERSION >= 3 + std::vector<std::pair<ui32, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui32>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 2); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, "two"); + UNIT_ASSERT_EQUAL(items[1].second, "one"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + + std::vector<ui32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui32>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 2); + UNIT_ASSERT_EQUAL(keys[1], 1); + UNIT_ASSERT_EQUAL(keys[2], 3); + + std::vector<TString> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], "two"); + UNIT_ASSERT_EQUAL(values[1], "one"); + UNIT_ASSERT_EQUAL(values[2], "three"); +#endif + }); + } + + Y_UNIT_TEST(ToPyAndBackSetAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<float, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> + Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void()) + .Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f))); + UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f))); + + std::vector<float> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<float>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 0.1f); + UNIT_ASSERT_EQUAL(keys[1], 0.2f); + UNIT_ASSERT_EQUAL(keys[2], 0.3f); + } + ); + } + + Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue *items = nullptr; + const auto a = vb.NewArray(9U, items); + const float f[] = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f }; + std::transform(f, f + 9U, items, [](float v){ return NUdf::TUnboxedValuePod(v); }); + return a; + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13)))); + + std::vector<std::pair<i8, float>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i8>(), payload.Get<float>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 0.1f); + UNIT_ASSERT_EQUAL(items.back().second, 0.9f); + + std::vector<i8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i8>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); + + std::vector<float> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<float>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 0.1f); + UNIT_ASSERT_EQUAL(values.back(), 0.9f); + } + ); + } + + Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10)); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13)))); + + std::vector<std::pair<ui8, i32>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui8>(), payload.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 1); + UNIT_ASSERT_EQUAL(items.back().second, 9); + + std::vector<ui8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui8>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); + + std::vector<i32> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 1); + UNIT_ASSERT_EQUAL(values.back(), 9); + } + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.cpp b/yql/essentials/udfs/common/python/bindings/py_errors.cpp new file mode 100644 index 0000000000..5741978d54 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_errors.cpp @@ -0,0 +1,72 @@ +#include "py_errors.h" +#include "py_ptr.h" +#include "py_cast.h" +#include "py_utils.h" + +#include <util/generic/string.h> +#include <util/stream/output.h> + +namespace NPython { + +// this function in conjuction with code after Py_Initialize +// does approximately following: +// +// sys.stderr = StderrProxy(sys.stderr) +// +// ... +// +// sys.stderr._toggle_real_mode() +// sys.excepthook( +// sys.last_type, +// sys.last_value, +// sys.last_traceback) +// sys.stderr._get_value() +// sys.stderr._toggle_real_mode() +// +// where _toggle_real_mode, _get_value & all calls to stderr not in real mode +// are handled in a thread-safe way +// +TString GetLastErrorAsString() +{ + PyObject* etype; + PyObject* evalue; + PyObject* etraceback; + + PyErr_Fetch(&etype, &evalue, &etraceback); + + if (!etype) { + return {}; + } + + TPyObjectPtr etypePtr {etype, TPyObjectPtr::ADD_REF}; + TPyObjectPtr evaluePtr {evalue, TPyObjectPtr::ADD_REF}; + TPyObjectPtr etracebackPtr {etraceback, TPyObjectPtr::ADD_REF}; + + TPyObjectPtr stderrObject {PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF}; + if (!stderrObject) { + return {}; + } + + TPyObjectPtr unused = PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr); + + PyErr_Restore(etypePtr.Get(), evaluePtr.Get(), etracebackPtr.Get()); + // in unusual situations there may be low-level write to stderr + // (by direct C FILE* write), but that's OK + PyErr_Print(); + + TPyObjectPtr error = PyObject_CallMethod(stderrObject.Get(), "_get_value", nullptr); + if (!error) { + return {}; + } + unused.ResetSteal( + PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr) + ); + + TString errorValue; + if (!TryPyCast(error.Get(), errorValue)) { + errorValue = TString("can't get error string from: ") += PyObjectRepr(error.Get()); + } + return errorValue; +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.h b/yql/essentials/udfs/common/python/bindings/py_errors.h new file mode 100644 index 0000000000..2306b47bb9 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_errors.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/generic/fwd.h> + +namespace NPython { + +TString GetLastErrorAsString(); + +#define PY_TRY try + +#define PY_CATCH(ErrorValue) \ + catch (const yexception& e) { \ + PyErr_SetString(PyExc_RuntimeError, e.what()); \ + return ErrorValue; \ + } + +#define PY_ENSURE(condition, message) \ + do { \ + if (Y_UNLIKELY(!(condition))) { \ + throw yexception() << message; \ + } \ + } while (0) + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_gil.h b/yql/essentials/udfs/common/python/bindings/py_gil.h new file mode 100644 index 0000000000..70e9bf3e91 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_gil.h @@ -0,0 +1,37 @@ +#pragma once + +#include <Python.h> + + +namespace NPython { + +struct TPyGilLocker +{ + TPyGilLocker() + : Gil(PyGILState_Ensure()) + { + } + + ~TPyGilLocker() { + PyGILState_Release(Gil); + } + +private: + PyGILState_STATE Gil; +}; + +struct TPyGilUnlocker { + TPyGilUnlocker() + : ThreadState(PyEval_SaveThread()) + { + } + + ~TPyGilUnlocker() { + PyEval_RestoreThread(ThreadState); + } + +private: + PyThreadState* ThreadState; +}; + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp new file mode 100644 index 0000000000..090211be2c --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp @@ -0,0 +1,280 @@ +#include "py_iterator.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +using namespace NKikimr; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// TPyIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyIterator +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator; + + inline static TPyIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TDictKeysIterator>").Release(); + } + + static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator); + static PyObject* Next(PyObject* self); +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TDictKeysIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyPairIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyPairIterator +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* KeyType; + const NUdf::TType* PayloadType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator; + + inline static TPyPairIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyPairIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TDictIterator>").Release(); + } + + static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator); + static PyObject* Next(PyObject* self); +}; + +PyTypeObject PyPairIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TDictIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyPairIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyPairIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyPairIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TPairIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyPairIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator) +{ + TPyIterator* dictIter = new TPyIterator; + PyObject_INIT(dictIter, &PyIteratorType); + dictIter->CastCtx = ctx; + dictIter->ItemType = itemType; + dictIter->Iterator.Set(ctx->PyCtx, iterator); + return reinterpret_cast<PyObject*>(dictIter); +} + +PyObject* TPyIterator::Next(PyObject* self) +{ + PY_TRY { + const auto iter = Cast(self); + NUdf::TUnboxedValue item; + if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) { + return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release(); + } + return nullptr; + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// +// TPyPairIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyPairIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator) +{ + TPyPairIterator* dictIter = new TPyPairIterator; + PyObject_INIT(dictIter, &PyPairIteratorType); + dictIter->CastCtx = ctx; + dictIter->KeyType = keyType; + dictIter->PayloadType = payloadType; + dictIter->Iterator.Set(ctx->PyCtx, iterator); + return reinterpret_cast<PyObject*>(dictIter); +} + +PyObject* TPyPairIterator::Next(PyObject* self) +{ + PY_TRY { + const auto iter = Cast(self); + NUdf::TUnboxedValue k, v; + if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) { + const TPyObjectPtr key = iter->KeyType ? + ToPyObject(iter->CastCtx, iter->KeyType, k): + PyCast<ui64>(k.Get<ui64>()); + const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v); + return PyTuple_Pack(2, key.Get(), value.Get()); + } + return nullptr; + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyIterator::New(castCtx, itemType, value.AsBoxed()); +} + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyPairIterator::New(castCtx, keyType, payloadType, value.AsBoxed()); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.h b/yql/essentials/udfs/common/python/bindings/py_iterator.h new file mode 100644 index 0000000000..5c5de27b0b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.h @@ -0,0 +1,23 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyIteratorType; +extern PyTypeObject PyPairIteratorType; + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); + + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp new file mode 100644 index 0000000000..ffaa2fe4ec --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp @@ -0,0 +1,705 @@ +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/generic/maybe.h> +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { +namespace { +////////////////////////////////////////////////////////////////////////////// +// TLazyDictBase +////////////////////////////////////////////////////////////////////////////// +class TLazyDictBase: public NUdf::TBoxedValue +{ +protected: + class TIterator: public NUdf::TBoxedValue { + public: + TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter) + : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter)) + {} + + ~TIterator() { + const TPyGilLocker lock; + PyIter_.Reset(); + } + + private: + bool Skip() override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Next(NUdf::TUnboxedValue& value) override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + value = FromPyObject(CastCtx_, ItemType_, next.Get()); + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + payload = NUdf::TUnboxedValuePod::Void(); + return Next(key); + } + + private: + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PyIter_; + }; + + class TPairIterator: public NUdf::TBoxedValue { + public: + TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter) + : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter)) + {} + + ~TPairIterator() { + const TPyGilLocker lock; + PyIter_.Reset(); + } + + private: + bool Skip() override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + key = FromPyObject(CastCtx_, KeyType_, PyTuple_GET_ITEM(next.Get(), 0)); + pay = FromPyObject(CastCtx_, PayType_, PyTuple_GET_ITEM(next.Get(), 1)); + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + private: + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* KeyType_; + const NUdf::TType* PayType_; + TPyObjectPtr PyIter_; + }; + + TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject) + : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef()) + {} + + ~TLazyDictBase() { + const TPyGilLocker lock; + PyObject_.Reset(); + } + + bool HasDictItems() const override try { + const TPyGilLocker lock; + const auto has = PyObject_IsTrue(PyObject_.Get()); + if (has < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return bool(has); + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PyObject_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyMapping +////////////////////////////////////////////////////////////////////////////// +class TLazyMapping: public TLazyDictBase +{ +public: + TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) + : TLazyDictBase(ctx, keyType, dict), PayType_(payType) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PyMapping_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyMapping_Keys(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyMapping_Values(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetDictIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyMapping_Items(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + if (const auto item = PyObject_GetItem(PyObject_.Get(), pyKey.Get())) { + return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional(); + } + + if (PyErr_Occurred()) { + PyErr_Clear(); + } + + return NUdf::TUnboxedValue(); + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto map = PyObject_.Get(); + const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ? + (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) : + PyMapping_HasKey(map, pyKey.Get()); + + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + +private: + const NUdf::TType* PayType_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyDict +////////////////////////////////////////////////////////////////////////////// +class TLazyDict: public TLazyDictBase +{ +public: + TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) + : TLazyDictBase(ctx, keyType, dict), PayType_(payType) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PyDict_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyDict_Keys(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyDict_Values(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetDictIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyDict_Items(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + if (const auto item = PyDict_GetItem(PyObject_.Get(), pyKey.Get())) { + return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional(); + } else if (!PyErr_Occurred()) { + return NUdf::TUnboxedValue(); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto has = PyDict_Contains(PyObject_.Get(), pyKey.Get()); + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + +private: + const NUdf::TType* PayType_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazySet +////////////////////////////////////////////////////////////////////////////// +class TLazySet: public TLazyDictBase +{ +public: + TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set) + : TLazyDictBase(ctx, itemType, set) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PySet_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod(); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto has = PySet_Contains(PyObject_.Get(), pyKey.Get()); + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetDictIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetListIterator() const override { + return GetKeysIterator(); + } + + ui64 GetListLength() const override { + return GetDictLength(); + } + + bool HasListItems() const override { + return HasDictItems(); + } + + bool HasFastListLength() const override { + return true; + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazySequenceAsSet +////////////////////////////////////////////////////////////////////////////// +class TLazySequenceAsSet: public TLazyDictBase +{ +public: + TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence) + : TLazyDictBase(ctx, keyType, sequence) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PySequence_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod(); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto has = PySequence_Contains(PyObject_.Get(), pyKey.Get()); + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetDictIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetListIterator() const override { + return GetKeysIterator(); + } + + ui64 GetListLength() const override { + return GetDictLength(); + } + + bool HasListItems() const override { + return HasDictItems(); + } + + bool HasFastListLength() const override { + return true; + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazySequenceAsDict +////////////////////////////////////////////////////////////////////////////// +template<typename KeyType> +class TLazySequenceAsDict: public NUdf::TBoxedValue +{ +private: + class TKeyIterator: public NUdf::TBoxedValue { + public: + TKeyIterator(Py_ssize_t size) + : Size(size), Index(0) + {} + + private: + bool Skip() override { + if (Index >= Size) + return false; + + ++Index; + return true; + } + + bool Next(NUdf::TUnboxedValue& value) override { + if (Index >= Size) + return false; + + value = NUdf::TUnboxedValuePod(KeyType(Index++)); + return true; + } + + private: + const Py_ssize_t Size; + Py_ssize_t Index; + }; + + class TIterator: public NUdf::TBoxedValue { + public: + TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq) + : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size(size), Index(0) + {} + + ~TIterator() { + const TPyGilLocker lock; + PySeq_.Reset(); + } + + private: + bool Skip() override { + if (Index >= Size) + return false; + + ++Index; + return true; + } + + bool Next(NUdf::TUnboxedValue& value) override try { + if (Index >= Size) + return false; + + const TPyGilLocker lock; + value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++)); + return true; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try { + if (Index >= Size) + return false; + + const TPyGilLocker lock; + key = NUdf::TUnboxedValuePod(KeyType(Index)); + pay = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++)); + return true; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + private: + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PySeq_; + const Py_ssize_t Size; + Py_ssize_t Index; + }; + +public: + TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size) + : CastCtx_(ctx), ItemType_(itemType), Size(size), PySeq_(std::move(sequence)) + {} + + ~TLazySequenceAsDict() + { + const TPyGilLocker lock; + PySeq_.Reset(); + } + +private: + bool IsSortedDict() const override { return true; } + + bool HasDictItems() const override { + return Size > 0; + } + + ui64 GetDictLength() const override { + return Size; + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + const Py_ssize_t index = key.Get<KeyType>(); + if (index >= -Size && index < Size) try { + const TPyGilLocker lock; + if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size + index)) { + return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional(); + } else if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + return NUdf::TUnboxedValue(); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override { + const Py_ssize_t index = key.Get<KeyType>(); + return index >= -Size && index < Size; + } + + NUdf::TUnboxedValue GetKeysIterator() const override { + return NUdf::TUnboxedValuePod(new TKeyIterator(Size)); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_)); + } + + NUdf::TUnboxedValue GetDictIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_)); + } + + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + const Py_ssize_t Size; + TPyObjectPtr PySeq_; +}; + +} // namspace + +NUdf::TUnboxedValue FromPyDict( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* dict) +{ + return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict)); +} + +NUdf::TUnboxedValue FromPyMapping( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* map) +{ + return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map)); +} + +NUdf::TUnboxedValue FromPySet( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + PyObject* set) +{ + return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set)); +} + +NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + PyObject* set) +{ + return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set)); +} + +NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TDataTypeId keyType, + PyObject* sequence) +{ + if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) { + const auto size = PySequence_Fast_GET_SIZE(fast.Get()); + if (size >= 0) { + switch (keyType) { +#define MAKE_PRIMITIVE_TYPE_SIZE(type) \ + case NUdf::TDataType<type>::Id: \ + return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size)); + INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE) +#undef MAKE_PRIMITIVE_TYPE_SIZE + } + Y_ABORT("Invalid key type."); + } + } + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp new file mode 100644 index 0000000000..fe3b8892e6 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp @@ -0,0 +1,382 @@ +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/generic/maybe.h> +#include <util/string/builder.h> + + +using namespace NKikimr; + +namespace NPython { +namespace { + +static ui64 CalculateIteratorLength(PyObject* iter, const TPyCastContext::TPtr& castCtx) +{ + PyObject* item; + + ui64 length = 0; + while ((item = PyIter_Next(iter))) { + length++; + Py_DECREF(item); + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return length; +} + +static bool IsIteratorHasItems(PyObject* iter, const TPyCastContext::TPtr& castCtx) +{ + if (const TPyObjectPtr item = PyIter_Next(iter)) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; +} + +////////////////////////////////////////////////////////////////////////////// +// TBaseLazyList +////////////////////////////////////////////////////////////////////////////// +template<typename TDerived> +class TBaseLazyList: public NUdf::TBoxedValue +{ + using TListSelf = TBaseLazyList<TDerived>; + + class TIterator: public NUdf::TBoxedValue { + public: + TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter) + : CastCtx_(ctx) + , PyIter_(std::move(pyIter)) + , ItemType_(type) + {} + + ~TIterator() { + const TPyGilLocker lock; + PyIter_.Reset(); + } + + private: + bool Skip() override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Next(NUdf::TUnboxedValue& value) override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + value = FromPyObject(CastCtx_, ItemType_, next.Get()); + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + private: + const TPyCastContext::TPtr CastCtx_; + TPyObjectPtr PyIter_; + const NUdf::TType* ItemType_; + }; + +public: + TBaseLazyList( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : CastCtx_(castCtx) + , PyObject_(std::move(pyObject)) + , ItemType_(NUdf::TListTypeInspector(*CastCtx_->PyCtx->TypeInfoHelper, type).GetItemType()) + { + } + + ~TBaseLazyList() { + TPyGilLocker lock; + PyObject_.Reset(); + } + +private: + TPyObjectPtr GetIterator() const try { + return static_cast<const TDerived*>(this)->GetIteratorImpl(); + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool HasFastListLength() const override { + return Length_.Defined(); + } + + ui64 GetEstimatedListLength() const override { + return GetListLength(); + } + + ui64 GetListLength() const override try { + if (!Length_.Defined()) { + const TPyGilLocker lock; + TPyObjectPtr iter = GetIterator(); + Length_ = CalculateIteratorLength(iter.Get(), CastCtx_); + } + + return *Length_; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool HasListItems() const override try { + if (Length_.Defined()) + return *Length_ > 0; + + const TPyGilLocker lock; + TPyObjectPtr iter = GetIterator(); + const bool hasItems = IsIteratorHasItems(iter.Get(), CastCtx_); + if (!hasItems) { + Length_ = 0; + } + return hasItems; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetListIterator() const override try { + const TPyGilLocker lock; + TPyObjectPtr pyIter = GetIterator(); + auto* self = const_cast<TListSelf*>(this); + return NUdf::TUnboxedValuePod(new TIterator(self->CastCtx_, self->ItemType_, std::move(pyIter))); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + const NUdf::TOpaqueListRepresentation* GetListRepresentation() const override { + return nullptr; + } + + NUdf::IBoxedValuePtr ReverseListImpl( + const NUdf::IValueBuilder& builder) const override + { + Y_UNUSED(builder); + return nullptr; + } + + NUdf::IBoxedValuePtr SkipListImpl( + const NUdf::IValueBuilder& builder, ui64 count) const override + { + Y_UNUSED(builder); + Y_UNUSED(count); + return nullptr; + } + + NUdf::IBoxedValuePtr TakeListImpl( + const NUdf::IValueBuilder& builder, ui64 count) const override + { + Y_UNUSED(builder); + Y_UNUSED(count); + return nullptr; + } + + NUdf::IBoxedValuePtr ToIndexDictImpl( + const NUdf::IValueBuilder& builder) const override + { + Y_UNUSED(builder); + return nullptr; + } + +protected: + const TPyCastContext::TPtr CastCtx_; + TPyObjectPtr PyObject_; + const NUdf::TType* ItemType_; + mutable TMaybe<ui64> Length_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyIterable +////////////////////////////////////////////////////////////////////////////// +class TLazyIterable: public TBaseLazyList<TLazyIterable> +{ + using TBase = TBaseLazyList<TLazyIterable>; +public: + TLazyIterable( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : TBase(castCtx, std::move(pyObject), type) + {} + + TPyObjectPtr GetIteratorImpl() const { + if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) { + return ret; + } + + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos + << "Cannot get iterator from object: " + << PyObjectRepr(PyObject_.Get()) << ", error: " + << GetLastErrorAsString()).data()); + } + +private: + bool HasFastListLength() const override { + return Length_.Defined(); + } + + ui64 GetListLength() const override try { + if (!Length_.Defined()) { + const TPyGilLocker lock; + const auto len = PyObject_Size(PyObject_.Get()); + if (len >= 0) { + Length_ = len; + } else { + Length_ = CalculateIteratorLength(GetIteratorImpl().Get(), CastCtx_); + } + } + return *Length_; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool HasListItems() const override try { + const TPyGilLocker lock; + bool hasItems = false; + const auto isTrue = PyObject_IsTrue(PyObject_.Get()); + if (isTrue != -1) { + hasItems = static_cast<bool>(isTrue); + } else { + TPyObjectPtr iter = GetIteratorImpl(); + hasItems = IsIteratorHasItems(iter.Get(), CastCtx_); + } + if (!hasItems) { + Length_ = 0; + } + return hasItems; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyIterator +////////////////////////////////////////////////////////////////////////////// +class TLazyIterator: public TBaseLazyList<TLazyIterator> +{ + using TBase = TBaseLazyList<TLazyIterator>; +public: + TLazyIterator( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : TBase(castCtx, std::move(pyObject), type) + , IteratorDrained_(false) + {} + + TPyObjectPtr GetIteratorImpl() const { + if (IteratorDrained_) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << + "Lazy list was build under python iterator. " + "Iterator was already used.").data()); + } + IteratorDrained_ = true; + return PyObject_; + } + +private: + mutable bool IteratorDrained_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyGenerator +////////////////////////////////////////////////////////////////////////////// +class TLazyGenerator: public TBaseLazyList<TLazyGenerator> +{ + using TBase = TBaseLazyList<TLazyGenerator>; +public: + TLazyGenerator( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : TBase(castCtx, std::move(pyObject), type) + { + // keep ownership of function closure if any + if (PyFunction_Check(PyObject_.Get())) { + PyObject* closure = PyFunction_GetClosure(PyObject_.Get()); + if (closure) { + Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF); + } + } + } + + ~TLazyGenerator() { + const TPyGilLocker lock; + Closure_.Reset(); + } + + TPyObjectPtr GetIteratorImpl() const { + TPyObjectPtr generator = PyObject_CallObject(PyObject_.Get(), nullptr); + if (!generator || !PyGen_Check(generator.Get())) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected generator as a result of function call").data()); + } + return PyObject_GetIter(generator.Get()); + } + +private: + TPyObjectPtr Closure_; +}; + +} // namspace + + +NUdf::TUnboxedValue FromPyLazyGenerator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr callableObj) +{ + return NUdf::TUnboxedValuePod(new TLazyGenerator(castCtx, std::move(callableObj), type)); +} + +NUdf::TUnboxedValue FromPyLazyIterable( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iterableObj) +{ + return NUdf::TUnboxedValuePod(new TLazyIterable(castCtx, std::move(iterableObj), type)); +} + +NUdf::TUnboxedValue FromPyLazyIterator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iteratorObj) +{ + return NUdf::TUnboxedValuePod(new TLazyIterator(castCtx, std::move(iteratorObj), type)); +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list.cpp b/yql/essentials/udfs/common/python/bindings/py_list.cpp new file mode 100644 index 0000000000..376a1ca124 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_list.cpp @@ -0,0 +1,1116 @@ +#include "py_list.h" +#include "py_dict.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +using namespace NKikimr; + +#if PY_MAJOR_VERSION >= 3 +#define SLICEOBJ(obj) obj +#else +#define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj)) +// See details about need for backports in ya.make +#include "py27_backports.h" +#endif + +namespace NPython { + +namespace { +inline Py_ssize_t CastIndex(PyObject* key, const char* name) +{ + Py_ssize_t index = -1; + if (PyIndex_Check(key)) { + index = PyNumber_AsSsize_t(key, PyExc_IndexError); + } + if (index < 0) { + const TPyObjectPtr value = PyUnicode_FromFormat("argument of %s must be positive integer or long", name); + PyErr_SetObject(PyExc_IndexError, value.Get()); + } + + return index; +} +} + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyList interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazyList +{ + using TPtr = NUdf::TRefCountedPtr<TPyLazyList, TPyPtrOps<TPyLazyList>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Dict; + Py_ssize_t Step; + Py_ssize_t CachedLength; + + inline static TPyLazyList* Cast(PyObject* o) { + return reinterpret_cast<TPyLazyList*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step = 1, + Py_ssize_t size = -1); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static PyObject* Iter(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static PyObject* Subscript(PyObject* self, PyObject* slice); + static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */); + static PyObject* Reversed(PyObject* self, PyObject* /* arg */); + static PyObject* Take(PyObject* self, PyObject* arg); + static PyObject* Skip(PyObject* self, PyObject* arg); + static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */); + static PyObject* HasItems(PyObject* self, PyObject* /* arg */); +}; + +PyMappingMethods LazyListMapping = { + INIT_MEMBER(mp_length, TPyLazyList::Len), + INIT_MEMBER(mp_subscript, TPyLazyList::Subscript), + INIT_MEMBER(mp_ass_subscript, nullptr), +}; + +PyNumberMethods LazyListNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyLazyList::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyLazyList::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + +PyDoc_STRVAR(reversed__doc__, "DEPRECATED: use reversed(list) or list[::-1] instead."); +PyDoc_STRVAR(take__doc__, "DEPRECATED: use slice list[:n] instead."); +PyDoc_STRVAR(skip__doc__, "DEPRECATED: use slice list[n:] instead."); +PyDoc_STRVAR(to_index_dict__doc__, "DEPRECATED: use list[n] instead."); +PyDoc_STRVAR(has_fast_len__doc__, "DEPRECATED: do not use."); +PyDoc_STRVAR(has_items__doc__, "DEPRECATED: test list as bool instead."); + +static PyMethodDef TPyLazyListMethods[] = { + { "__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr }, + { "to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, + { "reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__ }, + { "take", TPyLazyList::Take, METH_O, take__doc__ }, + { "skip", TPyLazyList::Skip, METH_O, skip__doc__ }, + { "has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, + { "has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__ }, + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyLazyListType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TList"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazyList)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazyList::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazyList::Repr), + INIT_MEMBER(tp_as_number , &LazyListNumbering), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , &LazyListMapping), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TList object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , TPyLazyList::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , TPyLazyListMethods), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyListIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazyListIterator +{ + PyObject_HEAD; + TPyLazyList::TPtr List; + TPyCleanupListItem<NUdf::TUnboxedValue> Iterator; + Py_ssize_t Length; + TPyCastContext::TPtr CastCtx; + + inline static TPyLazyListIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyLazyListIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + auto obj = Cast(self); + auto ctx = obj->CastCtx; + ctx->MemoryLock->Acquire(); + delete obj; + ctx->MemoryLock->Release(); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TListIterator>").Release(); + } + + static PyObject* New(TPyLazyList* list); + static PyObject* Next(PyObject* self); +}; + +PyTypeObject PyLazyListIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TListIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazyListIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazyListIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazyListIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.ListIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyLazyListIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyList implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyLazyList::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step, + Py_ssize_t size) +{ + TPyLazyList* list = new TPyLazyList; + PyObject_INIT(list, &PyLazyListType); + + list->CastCtx = castCtx; + list->ItemType = itemType; + list->Value.Set(castCtx->PyCtx, value); + list->Step = step; + list->CachedLength = size; + + return reinterpret_cast<PyObject*>(list); +} + +PyObject* TPyLazyList::Repr(PyObject*) +{ + return PyRepr("<yql.TList>").Release(); +} + +PyObject* TPyLazyList::Iter(PyObject* self) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + return TPyLazyListIterator::New(list); + } PY_CATCH(nullptr) +} + +Py_ssize_t TPyLazyList::Len(PyObject* self) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); + } + return (list->CachedLength + list->Step - 1) / list->Step; + } PY_CATCH(-1) +} + +PyObject* TPyLazyList::Subscript(PyObject* self, PyObject* slice) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + const auto vb = list->CastCtx->ValueBuilder; + + if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); + + if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); + } + + if (index < 0) { + if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get())); + } + + ++index *= list->Step; + --index += list->CachedLength; + } else { + index *= list->Step; + } + + if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } + + if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release(); + } + + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } + + if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; + + if (list->CachedLength >= 0) { + if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) { + return nullptr; + } + } else { + if (PySlice_Unpack(slice, &start, &stop, &step) < 0) { + return nullptr; + } + + if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); + size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step); + } else { + size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step); + } + } + + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } + + const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step; + const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step; + step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN); + + NUdf::TUnboxedValue newList; + if (size > 0) { + size = step > 0 ? + (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX): + (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX); + + start = start < hi ? start * list->Step : PY_SSIZE_T_MAX; + const Py_ssize_t skip = step > 0 ? start : start - size + 1; + + newList = NUdf::TUnboxedValuePod(list->Value.Get().Get()); + if (skip > 0) { + newList = vb->SkipList(newList, skip); + } + + if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) { + newList = vb->TakeList(newList, size); + } + + if (step < 0) { + step = -step; + newList = vb->ReverseList(newList); + } + } else { + newList = vb->NewEmptyList(); + } + + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size); + } + + const TPyObjectPtr type = PyObject_Type(slice); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::ToIndexDict(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); + } + + return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::Reversed(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::Take(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + Py_ssize_t count = CastIndex(arg, "take"); + if (count < 0) { + return nullptr; + } + count *= list->Step; + + auto vb = list->CastCtx->ValueBuilder; + NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + auto newList = vb->TakeList(value, static_cast<ui64>(count)); + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::Skip(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + Py_ssize_t count = CastIndex(arg, "skip"); + if (count < 0) { + return nullptr; + } + count *= list->Step; + + NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count)); + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::HasFastLen(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get())) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } PY_CATCH(nullptr) +} + +int TPyLazyList::Bool(PyObject* self) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (list->CachedLength == -1) { + return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0; + } else { + return list->CachedLength > 0 ? 1 : 0; + } + } PY_CATCH(-1) +} + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyListIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyLazyListIterator::New(TPyLazyList* list) +{ + TPyLazyListIterator* listIter = new TPyLazyListIterator; + PyObject_INIT(listIter, &PyLazyListIteratorType); + listIter->List.Reset(list); + listIter->Iterator.Set(list->CastCtx->PyCtx, NUdf::TBoxedValueAccessor::GetListIterator(*list->Value.Get())); + listIter->Length = 0; + listIter->CastCtx = list->CastCtx; + return reinterpret_cast<PyObject*>(listIter); +} + +PyObject* TPyLazyListIterator::Next(PyObject* self) +{ + PY_TRY { + TPyLazyListIterator* iter = Cast(self); + TPyLazyList* list = iter->List.Get(); + + NUdf::TUnboxedValue item; + if (iter->Iterator.Get().Next(item)) { + ++iter->Length; + + for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length) + continue; + + return ToPyObject(list->CastCtx, list->ItemType, item).Release(); + } + + // store calculated list length after traverse over whole list + if (list->CachedLength == -1) { + list->CachedLength = iter->Length; + } + + return nullptr; + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// +// TPyThinList interface +////////////////////////////////////////////////////////////////////////////// +struct TPyThinList +{ + using TPtr = NUdf::TRefCountedPtr<TPyThinList, TPyPtrOps<TPyThinList>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + const NUdf::TUnboxedValue* Elements; + Py_ssize_t Length; + Py_ssize_t Step; + + inline static TPyThinList* Cast(PyObject* o) { + return reinterpret_cast<TPyThinList*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(), + const NUdf::TUnboxedValue* elements = nullptr, + Py_ssize_t length = 0, + Py_ssize_t step = 1); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static PyObject* Iter(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static PyObject* Subscript(PyObject* self, PyObject* slice); + static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */); + static PyObject* Reversed(PyObject* self, PyObject* /* arg */); + static PyObject* Take(PyObject* self, PyObject* arg); + static PyObject* Skip(PyObject* self, PyObject* arg); + static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */); + static PyObject* HasItems(PyObject* self, PyObject* /* arg */); +}; + +PyMappingMethods ThinListMapping = { + INIT_MEMBER(mp_length, TPyThinList::Len), + INIT_MEMBER(mp_subscript, TPyThinList::Subscript), + INIT_MEMBER(mp_ass_subscript, nullptr), +}; + +PyNumberMethods ThinListNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyThinList::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyThinList::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + +static PyMethodDef TPyThinListMethods[] = { + { "__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr }, + { "to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, + { "reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__ }, + { "take", TPyThinList::Take, METH_O, take__doc__ }, + { "skip", TPyThinList::Skip, METH_O, skip__doc__ }, + { "has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, + { "has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__ }, + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyThinListType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TList"), + INIT_MEMBER(tp_basicsize , sizeof(TPyThinList)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyThinList::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyThinList::Repr), + INIT_MEMBER(tp_as_number , &ThinListNumbering), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , &ThinListMapping), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TList object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , TPyThinList::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , TPyThinListMethods), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyThinListIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyThinListIterator +{ + PyObject_HEAD; + TPyThinList::TPtr List; + const NUdf::TUnboxedValue* Elements; + Py_ssize_t Count; + + inline static TPyThinListIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyThinListIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TListIterator>").Release(); + } + + static PyObject* New(TPyThinList* list); + static PyObject* Next(PyObject* self); +}; + +PyTypeObject PyThinListIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TListIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyThinListIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyThinListIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyThinListIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.ListIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyThinListIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyThinList implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyThinList::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + const NUdf::TUnboxedValue* elements, + Py_ssize_t length, + Py_ssize_t step) +{ + TPyThinList* list = new TPyThinList; + PyObject_INIT(list, &PyThinListType); + + list->CastCtx = castCtx; + list->ItemType = itemType; + list->Value.Set(castCtx->PyCtx, value); + list->Elements = elements; + list->Length = length; + list->Step = step; + + return reinterpret_cast<PyObject*>(list); +} + +PyObject* TPyThinList::Repr(PyObject*) +{ + return PyRepr("<yql.TList>").Release(); +} + +PyObject* TPyThinList::Iter(PyObject* self) +{ + PY_TRY { + TPyThinList* list = Cast(self); + return TPyThinListIterator::New(list); + } PY_CATCH(nullptr) +} + +Py_ssize_t TPyThinList::Len(PyObject* self) +{ + return Cast(self)->Length; +} + +PyObject* TPyThinList::Subscript(PyObject* self, PyObject* slice) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const auto vb = list->CastCtx->ValueBuilder; + + if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); + + if (index < 0) { + index += list->Length; + } + + if (index < 0 || index >= list->Length) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } + + if (list->Step > 0) { + index *= list->Step; + } else { + index = list->Length - ++index; + index *= -list->Step; + } + + return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release(); + } + + if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; + + if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) { + return nullptr; + } + + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } + + if (size > 0) { + const Py_ssize_t skip = list->Step * (list->Step > 0 ? + (step > 0 ? start : start + step * (size - 1)): + (step > 0 ? stop : start + 1) - list->Length); + + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step); + } else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); + } + } + + const TPyObjectPtr type = PyObject_Type(slice); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } PY_CATCH(nullptr) +} + +#undef SLICEOBJ + +PyObject* TPyThinList::ToIndexDict(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::Reversed(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyThinList* list = Cast(self); + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::Take(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const Py_ssize_t count = CastIndex(arg, "take"); + if (count < 0) { + return nullptr; + } + + if (const auto size = std::min(count, list->Length)) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step); + } else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); + } + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::Skip(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const Py_ssize_t count = CastIndex(arg, "skip"); + if (count < 0) { + return nullptr; + } + + if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step); + } else { + return New(list->CastCtx, list->ItemType); + } + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */) +{ + Py_RETURN_TRUE; +} + +PyObject* TPyThinList::HasItems(PyObject* self, PyObject* /* arg */) +{ + if (Cast(self)->Length > 0) + Py_RETURN_TRUE; + else + Py_RETURN_FALSE; +} + +int TPyThinList::Bool(PyObject* self) +{ + return Cast(self)->Length > 0 ? 1 : 0; +} + +////////////////////////////////////////////////////////////////////////////// +// TPyThinListIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyThinListIterator::New(TPyThinList* list) +{ + TPyThinListIterator* listIter = new TPyThinListIterator; + PyObject_INIT(listIter, &PyThinListIteratorType); + listIter->List.Reset(list); + listIter->Elements = list->Step > 0 ? list->Elements - list->Step : list->Elements - list->Length * list->Step; + listIter->Count = list->Length; + return reinterpret_cast<PyObject*>(listIter); +} + +PyObject* TPyThinListIterator::Next(PyObject* self) +{ + PY_TRY { + TPyThinListIterator* iter = Cast(self); + + if (iter->Count) { + --iter->Count; + TPyThinList* list = iter->List.Get(); + return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release(); + } + + return nullptr; + } PY_CATCH(nullptr) +} + +TPyObjectPtr ToPyLazyList( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) +{ + if (const auto elements = value.GetElements()) { + return TPyThinList::New(castCtx, itemType, value.AsBoxed(), elements, value.GetListLength()); + } else { + return TPyLazyList::New(castCtx, itemType, value.AsBoxed()); + } +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list.h b/yql/essentials/udfs/common/python/bindings/py_list.h new file mode 100644 index 0000000000..9db170a795 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_list.h @@ -0,0 +1,33 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyLazyListIteratorType; +extern PyTypeObject PyLazyListType; +extern PyTypeObject PyThinListIteratorType; +extern PyTypeObject PyThinListType; + +TPyObjectPtr ToPyLazyList( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyLazyGenerator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr callableObj); + +NKikimr::NUdf::TUnboxedValue FromPyLazyIterable( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iterableObj); + +NKikimr::NUdf::TUnboxedValue FromPyLazyIterator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iteratorObj); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp new file mode 100644 index 0000000000..f16165fc54 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp @@ -0,0 +1,1025 @@ +#include "ut3/py_test_engine.h" + +#include <yql/essentials/public/udf/udf_ut_helpers.h> + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyListTest) { + Y_UNIT_TEST(FromPyEmptyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return []", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 0); + }); + } + + Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return [1, 2, 3, 4]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 4); + const auto it = value.GetListIterator(); + NUdf::TUnboxedValue item; + + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4); + UNIT_ASSERT(false == it.Next(item)); + }); + } + + Y_UNIT_TEST(ToPyEmptyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 0\n"); + } + + Y_UNIT_TEST(ToPyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 3\n" + " assert all(isinstance(v, float) for v in value)\n" + " assert list(value) == [0.1, 0.2, 0.3]\n"); + } + + Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return (1, 2, 3)", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 3); + + ui32 expected = 1; + auto it = value.GetListIterator(); + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + }); + } + + Y_UNIT_TEST(ThinListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0.1\n" + " assert next(it) == 0.2\n" + " assert next(it) == 0.3\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__reversed__' in dir(v)\n" + " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n" + ); + } + + Y_UNIT_TEST(LazyListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " assert '__reversed__' in dir(v)\n" + " it = iter(reversed(v))\n" + " assert next(it) == 2\n" + " assert next(it) == 1\n" + " assert next(it) == 0\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0\n" + " assert next(it) == 1\n" + " assert next(it) == 2\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n" + ); + } + + Y_UNIT_TEST(ThinListSliceOverReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(x):\n" + " e = list(reversed(range(0, 10)))\n" + " v = reversed(x)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n" + ); + } + + Y_UNIT_TEST(LazyListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert len(v) == len(e)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[3:]) == e[3:]\n" + " assert list(v[-2:]) == e[-2:]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n" + ); + } + + Y_UNIT_TEST(ThinListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" + ); + } + + Y_UNIT_TEST(LazyListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" + ); + } + + Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n" + ); + } + + Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n" + ); + } + + Y_UNIT_TEST(ThinListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n" + ); + } + + Y_UNIT_TEST(LazyListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n" + ); + } + + Y_UNIT_TEST(ThinListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = range(0, 10)\n" + " assert '__len__' in dir(v)\n" + " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n" + " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n" + " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n" + ); + } + + Y_UNIT_TEST(ThinListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert len(v) == len(e)\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListToIndexDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert len(d) == 3\n" + " assert d[0] == 3\n" + " assert d[1] == 4\n" + " assert d[2] == 5\n" + " assert 3 not in d"); + } + + Y_UNIT_TEST(LazyListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + NUdf::TUnboxedValue *items = nullptr; + return vb.NewArray(1U, items); + }, + "def Test(value):\n" + " assert value\n" + ); + } + + Y_UNIT_TEST(LazyListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " assert not value\n" + ); + } + + Y_UNIT_TEST(ThinListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " assert value\n" + ); + } + + Y_UNIT_TEST(ThinListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert not value\n" + ); + } + + Y_UNIT_TEST(LazyListHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert b\n"); + } + + Y_UNIT_TEST(LazyListEmptyHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert not b\n"); + } + + Y_UNIT_TEST(LazyIndexDictContains) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert 0 in d\n" + " assert 1 in d\n" + " assert 2 in d\n" + " assert 3 not in d\n" + " assert -1 not in d"); + } + + Y_UNIT_TEST(LazyIndexDictIter) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " i, j = 0, 3\n" + " for k, v in d.items():\n" + " assert i == k\n" + " assert j == v\n" + " i, j = i+1, j+1"); + } + + Y_UNIT_TEST(LazyIndexDictGet) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert d.get(1) == 4\n" + " assert d.get(5) == None\n" + " assert d.get(5, 10) == 10\n"); + } + + Y_UNIT_TEST(FromPyGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); + + const auto it = value.GetListIterator(); + ui32 expected = 0; + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + }); + } + + Y_UNIT_TEST(FromPyIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" +#if PY_MAJOR_VERSION >= 3 + " return range(10)\n", +#else + " return xrange(10)\n", +#endif + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); + + const auto it = value.GetListIterator(); + ui32 expected = 0U; + for (NUdf::TUnboxedValue item; it.Next(item);) { + UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++); + } + + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + UNIT_ASSERT(value.HasFastListLength()); + }); + } + + Y_UNIT_TEST(FromPyCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T([1, 2])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasListItems()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 2); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 2); + } + + UNIT_ASSERT(false == it.Skip()); + }); + } + + Y_UNIT_TEST(FromPyIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " return iter(range(2))\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + + UNIT_ASSERT(false == it.Skip()); + }); + } + + Y_UNIT_TEST(FromPyGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " yield 0\n" + " yield 1\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + + UNIT_ASSERT(false == it.Skip()); + }); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp new file mode 100644 index 0000000000..c55e25891d --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp @@ -0,0 +1,359 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +#define PY_CHECKER(Name, PyType, AsType, Type) \ + struct TPy##Name##Checker { \ + void operator()(PyObject* pyVal, Type expected) { \ + UNIT_ASSERT(Py##PyType##_Check(pyVal)); \ + Type val = Py##PyType##_As##AsType(pyVal); \ + UNIT_ASSERT(val != static_cast<Type>(-1) || !PyErr_Occurred()); \ + UNIT_ASSERT_EQUAL(val, expected); \ + } \ + }; + +#if PY_MAJOR_VERSION >= 3 +PY_CHECKER(Long, Long, Long, long) +#else +PY_CHECKER(Int, Int, Long, long) +#endif + +#ifdef HAVE_LONG_LONG +PY_CHECKER(LLong, Long, LongLong, long long) +PY_CHECKER(Ulong, Long, UnsignedLongLong, unsigned long long) +#else +PY_CHECKER(LLong, Long, Long, long) +PY_CHECKER(Ulong, Long, UnsignedLong, unsigned long) +#endif + +PY_CHECKER(Float, Float, Double, long) + +#undef PY_CHECKER + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyNumberTest) { + template <typename T, typename TPyChecker> + void TestCastsInRange(T begin, T end) { + for (T i = begin; i < end; i++) { + TPyObjectPtr pyVal = PyCast<T>(i); + UNIT_ASSERT(pyVal.Get() != nullptr); + + TPyChecker c; + c(pyVal.Get(), i); + + T cppVal = PyCast<T>(pyVal.Get()); + UNIT_ASSERT_EQUAL(cppVal, i); + } + } + + template <typename T, typename TPyChecker, int range = 10> + void TestSignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyChecker>(-range, range); + TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>()); + } + + template <typename T, typename TPyDownChecker, + typename TPyUpChecker = TPyDownChecker, int range = 10> + void TestUnsignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>()); + } + + Y_UNIT_TEST(Bool) { + TPythonTestEngine engine; + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true); + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false); + + TPyObjectPtr list = PyList_New(0); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false); + bool res1; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1)); + UNIT_ASSERT_EQUAL(res1, false); + + PyList_Append(list.Get(), Py_None); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true); + bool res2; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2)); + UNIT_ASSERT_EQUAL(res2, true); + } + + Y_UNIT_TEST(Float) { + TestSignedCasts<float, TPyFloatChecker>(); + } + + Y_UNIT_TEST(Double) { + TestUnsignedCasts<double, TPyFloatChecker>(); + } + + Y_UNIT_TEST(I64) { + TestSignedCasts<i64, TPyLLongChecker>(); + } + + Y_UNIT_TEST(Ui64) { + TestUnsignedCasts<ui64, TPyUlongChecker>(); + } + +#if PY_MAJOR_VERSION >= 3 + Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyLongChecker>(); + } + + Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyLongChecker>(); + } + + Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyLongChecker>(); + } + + Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyLongChecker>(); + } + + Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyLongChecker>(); + } + + Y_UNIT_TEST(Ui32) { + TestUnsignedCasts<ui32, TPyLongChecker>(); + } + Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyLong_FromLong(expected); + + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } + + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } + + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + } + +#else + Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyIntChecker>(); + } + + Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyIntChecker>(); + } + + Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyIntChecker>(); + } + + Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyIntChecker>(); + } + + Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyIntChecker>(); + } + + Y_UNIT_TEST(Ui32) { + if (sizeof(long) == 4) { + TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>(); + } else { + TestUnsignedCasts<ui32, TPyIntChecker>(); + } + } + + Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyInt_FromLong(expected); + + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } + + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } + + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + } +#endif + + + Y_UNIT_TEST(ImplicitLongCasts) { + TPythonTestEngine engine; + i64 expected = static_cast<i64>(Max<ui32>()) + 10; + TPyObjectPtr pyLong; + #ifdef HAVE_LONG_LONG + pyLong = PyLong_FromLongLong(expected); + #else + pyLong = PyLong_FromLong(expected) + #endif + + { // signed + i64 actual = PyCast<i64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } + + { // unsigned + ui64 actual = PyCast<ui64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } + + { // to float + float f = PyCast<float>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyLong.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyLong.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i8 tmp; + UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp)); + } + + Y_UNIT_TEST(HugeLongOverflow) { + TPythonTestEngine engine; + TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0); + TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()"); + UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number + + ui64 resUI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64)); + + i64 resI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64)); + + ui32 resUI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32)); + + i32 resI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32)); + + ui16 resUI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16)); + + i16 resI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16)); + + ui8 resUI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8)); + + i8 resI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8)); + } + + Y_UNIT_TEST(ImplicitFloatCasts) { + TPythonTestEngine engine; + double expected = 3.14159; + TPyObjectPtr pyFloat = PyFloat_FromDouble(expected); + + { // to float + float f = PyCast<float>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyFloat.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyFloat.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + } + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp new file mode 100644 index 0000000000..d13ea65da6 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp @@ -0,0 +1,56 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(FromPyNone) { + Y_UNIT_TEST(FromPyNone) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return None", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value); + }); + } + + Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return 42", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42); + }); + } + + Y_UNIT_TEST(ToPyNone) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(); + }, + "def Test(value):\n" + " assert value == None\n"); + } + + Y_UNIT_TEST(ToPyFilledOptional) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + const TOptionalType* optType = + static_cast<const TOptionalType*>(type); + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items); + items[0] = vb.NewString("test string"); + items[1] = NUdf::TUnboxedValuePod(false); + return NUdf::TUnboxedValue(tuple); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 2\n" + " assert value == ('test string', False)\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_ptr.h b/yql/essentials/udfs/common/python/bindings/py_ptr.h new file mode 100644 index 0000000000..704629b86b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_ptr.h @@ -0,0 +1,69 @@ +#pragma once + +#include <Python.h> // PyObject + +#include <yql/essentials/public/udf/udf_ptr.h> + +namespace NPython { + +template <typename T> +class TPyPtrOps +{ +public: + static inline void Ref(T* t) { + Y_ASSERT(t); + Py_INCREF(t); + } + + static inline void UnRef(T* t) { + Y_ASSERT(t); + Py_DECREF(t); + } + + static inline ui32 RefCount(const T* t) { + Y_ASSERT(t); + return t->ob_refcnt; + } +}; + +class TPyObjectPtr: + public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>> +{ + using TSelf = NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>; + +public: + inline TPyObjectPtr() + { + } + + inline TPyObjectPtr(PyObject* p) + : TSelf(p, STEAL_REF) // do not increment refcounter by default + { + } + + inline TPyObjectPtr(PyObject* p, AddRef) + : TSelf(p) + { + } + + inline void ResetSteal(PyObject* p) { + TSelf::Reset(p, STEAL_REF); + } + + inline void ResetAddRef(PyObject* p) { + TSelf::Reset(p); + } + + inline void Reset() { + TSelf::Reset(); + } + + template <class T> + inline T* GetAs() const { + return reinterpret_cast<T*>(Get()); + } + + void Reset(PyObject* p) = delete; +}; + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.cpp b/yql/essentials/udfs/common/python/bindings/py_resource.cpp new file mode 100644 index 0000000000..ebb096029a --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_resource.cpp @@ -0,0 +1,116 @@ +#include "py_resource.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> + +using namespace NKikimr; + +namespace NPython { +namespace { + +void DestroyResourceCapsule(PyObject* obj) { + if (auto* ptr = PyCapsule_GetPointer(obj, ResourceCapsuleName)) { + delete reinterpret_cast<NUdf::TUnboxedValue*>(ptr); + } +} + +///////////////////////////////////////////////////////////////////////////// +// TResource +///////////////////////////////////////////////////////////////////////////// +class TResource final: public NUdf::TBoxedValue +{ +public: + TResource(PyObject* value, const NUdf::TStringRef& tag) + : Value_(value, TPyObjectPtr::ADD_REF), Tag_(tag) + { + } + + ~TResource() { + TPyGilLocker lock; + Value_.Reset(); + } + +private: + NUdf::TStringRef GetResourceTag() const override { + return Tag_; + } + + void* GetResource() final { + return Value_.Get(); + } + + TPyObjectPtr Value_; + const NUdf::TStringRef Tag_; +}; + +} // namespace + +const char ResourceCapsuleName[] = "YqlResourceCapsule"; + +TPyObjectPtr ToPyResource( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ +// TODO NILE-43 +#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) + NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); + auto tag = inpector.GetTag(); + if (tag == ctx->PyCtx->ResourceTag) { + PyObject* p = reinterpret_cast<PyObject*>(value.GetResource()); + return TPyObjectPtr(p, TPyObjectPtr::ADD_REF); + } +#else + Y_UNUSED(type); + if (value.GetResourceTag() == ctx->PyCtx->ResourceTag) { + PyObject* p = reinterpret_cast<PyObject*>(value.GetResource()); + return TPyObjectPtr(p, TPyObjectPtr::ADD_REF); + } +#endif + auto resource = MakeHolder<NUdf::TUnboxedValue>(value); + + return PyCapsule_New(resource.Release(), ResourceCapsuleName, &DestroyResourceCapsule); +} + +NUdf::TUnboxedValue FromPyResource( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ +// TODO NILE-43 +#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) + NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); + auto tag = inpector.GetTag(); + if (tag == ctx->PyCtx->ResourceTag) { + return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag)); + } + + if (PyCapsule_IsValid(value, ResourceCapsuleName)) { + auto* resource = reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName)); + auto valueTag = resource->GetResourceTag(); + if (valueTag != tag) { + throw yexception() << "Mismatch of resource tag, expected: " + << tag << ", got: " << valueTag; + } + + return *resource; + } + + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is not a valid resource with tag " << tag; +#else + Y_UNUSED(type); + if (PyCapsule_CheckExact(value)) { + if (!PyCapsule_IsValid(value, ResourceCapsuleName)) { + throw yexception() << "Python object " << PyObjectRepr(value) << " is not a valid resource capsule"; + } + return *reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName)); + } + return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag)); +#endif +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.h b/yql/essentials/udfs/common/python/bindings/py_resource.h new file mode 100644 index 0000000000..b46b84c84b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_resource.h @@ -0,0 +1,20 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern const char ResourceCapsuleName[]; + +TPyObjectPtr ToPyResource( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyResource( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp new file mode 100644 index 0000000000..aaa9899c4f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp @@ -0,0 +1,81 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +extern const char SimpleDataTag[] = "SimpleData"; +extern const char PythonTestTag[] = PYTHON_TEST_TAG; + +struct TSimpleData { + TString Name; + ui32 Age; + + TSimpleData(const TString& name, ui32 age) + : Name(name) + , Age(age) + {} +}; + +using TSimpleDataResource = NUdf::TBoxedResource<TSimpleData, SimpleDataTag>; + +Y_UNIT_TEST_SUITE(TPyResourceTest) { + Y_UNIT_TEST(MkqlObject) { + TPythonTestEngine engine; + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99)); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert type(value).__name__ == 'PyCapsule'\n" + " return value\n"); + UNIT_ASSERT(!!pyValue); + + engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>( + pyValue.Get(), + "import yql\n" + "\n" + "def Test(value):\n" + " return value\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value);; + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag); + auto simpleData = + reinterpret_cast<TSimpleData*>(value.GetResource()); + UNIT_ASSERT_EQUAL(simpleData->Age, 99); + UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel"); + }); + } + + Y_UNIT_TEST(PythonObject) { + TPythonTestEngine engine; + NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>( + "class CustomStruct:\n" + " def __init__(self, name, age):\n" + " self.name = name\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return CustomStruct('Jamel', 97)\n"); + UNIT_ASSERT(mkqlValue); + UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag); + + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>( + [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return mkqlValue; + }, + "def Test(value):\n" + " assert isinstance(value, CustomStruct)\n" + " assert value.age, 97\n" + " assert value.name, 'Jamel'\n"); + UNIT_ASSERT(!!pyValue); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.cpp b/yql/essentials/udfs/common/python/bindings/py_stream.cpp new file mode 100644 index 0000000000..3d9aecdc00 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_stream.cpp @@ -0,0 +1,343 @@ +#include "py_stream.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { + +// will be initialized in InitYqlModule() +PyObject* PyYieldIterationException = nullptr; + +////////////////////////////////////////////////////////////////////////////// +// TPyStream +////////////////////////////////////////////////////////////////////////////// +struct TPyStream { + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + const NUdf::TType* ItemType; + + inline static TPyStream* Cast(PyObject* o) { + return reinterpret_cast<TPyStream*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TStream>").Release(); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value); + + static PyObject* Next(PyObject* self); +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyStreamType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TStream"), + INIT_MEMBER(tp_basicsize , sizeof(TPyStream)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyStream::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyStream::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TStream object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyStream::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +PyObject* TPyStream::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value) +{ + TPyStream* stream = new TPyStream; + PyObject_INIT(stream, &PyStreamType); + stream->CastCtx = castCtx; + stream->Value.Set(castCtx->PyCtx, value); + + const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type); + stream->ItemType = inspector.GetItemType(); + + return reinterpret_cast<PyObject*>(stream); +} + +PyObject* TPyStream::Next(PyObject* self) { + PY_TRY { + TPyStream* stream = Cast(self); + + NUdf::TUnboxedValue item; + auto status = NUdf::TBoxedValueAccessor::Fetch(*stream->Value.Get(), item); + + switch (status) { + case NUdf::EFetchStatus::Ok: + return ToPyObject(stream->CastCtx, stream->ItemType, item) + .Release(); + case NUdf::EFetchStatus::Finish: + return nullptr; + case NUdf::EFetchStatus::Yield: + PyErr_SetNone(PyYieldIterationException); + return nullptr; + default: + Y_ABORT("Unknown stream status"); + } + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// +// TStreamOverPyIter +////////////////////////////////////////////////////////////////////////////// +class TStreamOverPyIter final: public NUdf::TBoxedValue { +public: + TStreamOverPyIter( + TPyCastContext::TPtr castCtx, + const NUdf::TType* itemType, + TPyObjectPtr pyIter, + TPyObjectPtr pyIterable, + TPyObjectPtr pyGeneratorCallable, + TPyObjectPtr pyGeneratorCallableClosure, + TPyObjectPtr pyGeneratorCallableArgs) + : CastCtx_(std::move(castCtx)) + , ItemType_(itemType) + , PyIter_(std::move(pyIter)) + , PyIterable_(std::move(pyIterable)) + , PyGeneratorCallable_(std::move(pyGeneratorCallable)) + , PyGeneratorCallableClosure_(std::move(pyGeneratorCallableClosure)) + , PyGeneratorCallableArgs_(std::move(pyGeneratorCallableArgs)) + { + } + + ~TStreamOverPyIter() { + TPyGilLocker lock; + PyIter_.Reset(); + PyIterable_.Reset(); + PyGeneratorCallableArgs_.Reset(); + PyGeneratorCallableClosure_.Reset(); + PyGeneratorCallable_.Reset(); + } + +private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + try { + TPyGilLocker lock; + TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + if (PyErr_GivenExceptionMatches(next.Get(), PyYieldIterationException)) { + return NUdf::EFetchStatus::Yield; + } + + result = FromPyObject(CastCtx_, ItemType_, next.Get()); + return NUdf::EFetchStatus::Ok; + } + + if (PyObject* ex = PyErr_Occurred()) { + if (PyErr_GivenExceptionMatches(ex, PyYieldIterationException)) { + PyErr_Clear(); + TPyObjectPtr iterable; + TPyObjectPtr iter; + if (PyIterable_) { + PyIter_.Reset(); + iterable = PyIterable_; + } else if (PyGeneratorCallable_) { + PyIter_.Reset(); + TPyObjectPtr result(PyObject_CallObject(PyGeneratorCallable_.Get(), PyGeneratorCallableArgs_.Get())); + if (!result) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + + if (PyGen_Check(result.Get())) { + iterable = std::move(result); + } else if (PyIter_Check(result.Get())) { + iter = std::move(result); + } else { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected iterator or generator, but got " << PyObjectRepr(result.Get())).data()); + } + } else { + return NUdf::EFetchStatus::Yield; + } + + if (!iter) { + iter.ResetSteal(PyObject_GetIter(iterable.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + } + + PyIter_.ResetAddRef(iter.Get()); + return NUdf::EFetchStatus::Yield; + } + + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return NUdf::EFetchStatus::Finish; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + } + +private: + TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PyIter_; + TPyObjectPtr PyIterable_; + TPyObjectPtr PyGeneratorCallable_; + TPyObjectPtr PyGeneratorCallableClosure_; + TPyObjectPtr PyGeneratorCallableArgs_; +}; + + +////////////////////////////////////////////////////////////////////////////// +// public functions +////////////////////////////////////////////////////////////////////////////// +TPyObjectPtr ToPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value) +{ + return TPyStream::New(castCtx, type, value.AsBoxed()); +} + +NKikimr::NUdf::TUnboxedValue FromPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const TPyObjectPtr& value, + const TPyObjectPtr& originalCallable, + const TPyObjectPtr& originalCallableClosure, + const TPyObjectPtr& originalCallableArgs +) +{ + const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type); + const NUdf::TType* itemType = inspector.GetItemType(); + + if (PyGen_Check(value.Get())) { + TPyObjectPtr iter(PyObject_GetIter(value.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, + originalCallable, originalCallableClosure, originalCallableArgs)); + } + + if (PyIter_Check(value.Get()) +#if PY_MAJOR_VERSION < 3 + // python 2 iterators must also implement "next" method + && 1 == PyObject_HasAttrString(value.Get(), "next") +#endif + ) { + TPyObjectPtr iter(value.Get(), TPyObjectPtr::ADD_REF); + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, + originalCallable, originalCallableClosure, originalCallableArgs)); + } + + // assume that this function will returns generator + if (PyCallable_Check(value.Get())) { + TPyObjectPtr generator(PyObject_CallObject(value.Get(), nullptr)); + if (!generator || !PyGen_Check(generator.Get())) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected generator as a result of function call").data()); + } + TPyObjectPtr iter(PyObject_GetIter(generator.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + TPyObjectPtr callableClosure; + if (PyFunction_Check(value.Get())) { + PyObject* closure = PyFunction_GetClosure(value.Get()); + if (closure) { + callableClosure = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF); + } + } + + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, + originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr)); + } + + // must be after checking for callable + if (PySequence_Check(value.Get()) || PyObject_HasAttrString(value.Get(), "__iter__")) { + TPyObjectPtr iter(PyObject_GetIter(value.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), originalCallable ? value : nullptr, nullptr, nullptr, nullptr)); + } + + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected iterator, generator, generator factory, " + "or iterable object, but got " << PyObjectRepr(value.Get())).data()); +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.h b/yql/essentials/udfs/common/python/bindings/py_stream.h new file mode 100644 index 0000000000..f677e23930 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_stream.h @@ -0,0 +1,24 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyStreamType; +extern PyObject* PyYieldIterationException; + +TPyObjectPtr ToPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const TPyObjectPtr& value, + const TPyObjectPtr& originalCallable, + const TPyObjectPtr& originalCallableClosure, + const TPyObjectPtr& originalCallableArgs); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp new file mode 100644 index 0000000000..4a24dd1a13 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp @@ -0,0 +1,208 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyStreamTest) { + void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; + + while (true) { + status = value.Fetch(item); + if (status != NUdf::EFetchStatus::Ok) break; + + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish); + UNIT_ASSERT_EQUAL(expected, 10); + } + + struct TTestStream final: NUdf::TBoxedValue { + TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>()) + : Current_(0) + , YieldOn_(yieldOn) + , MaxValue_(maxValue) + { + } + + private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + if (Current_ == YieldOn_) { + return NUdf::EFetchStatus::Yield; + } else if (Current_ >= MaxValue_) { + return NUdf::EFetchStatus::Finish; + } + result = NUdf::TUnboxedValuePod(Current_++); + return NUdf::EFetchStatus::Ok; + } + + ui32 Current_, YieldOn_, MaxValue_; + }; + + Y_UNIT_TEST(FromGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return iter(range(10))\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" +#if PY_MAJOR_VERSION >= 3 + " return range(10)\n", +#else + " return xrange(10)\n", +#endif + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T(list(range(10)))\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value):\n" + " import yql\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"); + } + + Y_UNIT_TEST(ToPythonAndBackAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value): return value", + Ui32StreamValidator + ); + } + + Y_UNIT_TEST(YieldingStreamFromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "import yql\n" + "def Test():\n" + " yield 0\n" + " yield 1\n" + " yield yql.TYieldIteration\n" + " yield 2\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; + + while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield); + UNIT_ASSERT_EQUAL(expected, 2); + }); + } + + Y_UNIT_TEST(YieldingStreamFromCpp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(5, 2)); + }, + "import yql\n" + "def Test(value):\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert next(value) == 0\n" + " assert next(value) == 1\n" + " try:\n" + " next(value)\n" + " except yql.TYieldIteration:\n" + " pass\n" + " else:\n" + " assert False, 'Expected yql.TYieldIteration'\n"); + } + + Y_UNIT_TEST(FromCppListIterator) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue *items = nullptr; + const auto a = vb.NewArray(10U, items); + ui32 i = 0U; + std::generate_n(items, 10U, [&i](){ return NUdf::TUnboxedValuePod(i++); }); + return a; + }, + "def Test(value): return iter(value)", + Ui32StreamValidator + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp new file mode 100644 index 0000000000..444b7b0c5b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp @@ -0,0 +1,98 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyStringTest) { + template <typename TStringType> + void TestStringCasts() { + TStringType testStr1(TStringBuf("test string")); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); + + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + TStringType testStr2(TStringBuf("another test string")); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); + } + + template <typename TStringType> + void TestBinaryStringCasts() { + TStringType testStr1(TStringBuf("\xa0\xa1"sv)); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); + + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv)); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); + } + + template <typename TStringType> + void TestUtf8StringCasts() { + const TStringType testStr1(TStringBuf("тестовая строка")); + TStringBuf strBuf1 = testStr1; + const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data()); + const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get()); + const auto value = PyCast<TStringType>(utf8.Get()); + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + const TStringType testStr2(TStringBuf("еще одна тестовая строка")); + TStringBuf strBuf2 = testStr2; + const auto str2 = ToPyUnicode<TStringType>(testStr2); + + UNIT_ASSERT(PyUnicode_Check(str2.Get())); + + Py_ssize_t size = 0U; +#if PY_MAJOR_VERSION >= 3 + const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size); +#else + char* buf = nullptr; + const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get()); + const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); +#endif + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); + } + + Y_UNIT_TEST(Simple) { + TestStringCasts<TString>(); + TestStringCasts<TStringBuf>(); + TestStringCasts<NUdf::TStringRef>(); + } + + Y_UNIT_TEST(Utf8) { + TestUtf8StringCasts<TString>(); + TestUtf8StringCasts<TStringBuf>(); + TestUtf8StringCasts<NUdf::TStringRef>(); + } + + Y_UNIT_TEST(Binary) { + TestBinaryStringCasts<TString>(); + TestBinaryStringCasts<TStringBuf>(); + TestBinaryStringCasts<NUdf::TStringRef>(); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.cpp b/yql/essentials/udfs/common/python/bindings/py_struct.cpp new file mode 100644 index 0000000000..a4ab99ee32 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_struct.cpp @@ -0,0 +1,188 @@ +#include "py_struct.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/string/cast.h> +#include <util/string/join.h> +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { + +namespace { + +TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TType* type, const NUdf::TStructTypeInspector& inspector) +{ + const auto it = ctx->StructTypes.emplace(type, TPyObjectPtr()); + if (it.second) { +#if PY_MAJOR_VERSION >= 3 + std::vector<PyStructSequence_Field> fields(inspector.GetMembersCount() + 1U); + for (ui32 i = 0U; i < inspector.GetMembersCount(); ++i) { + fields[i] = {const_cast<char*>(inspector.GetMemberName(i).Data()), nullptr}; + } + fields.back() = {nullptr, nullptr}; + + PyStructSequence_Desc desc = { + INIT_MEMBER(name, "yql.Struct"), + INIT_MEMBER(doc, nullptr), + INIT_MEMBER(fields, fields.data()), + INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount())) + }; + + const auto typeObject = new PyTypeObject(); + if (0 > PyStructSequence_InitType2(typeObject, &desc)) { + throw yexception() << "can't create struct type: " << GetLastErrorAsString(); + } + + it.first->second.ResetSteal(reinterpret_cast<PyObject*>(typeObject)); + } + + const TPyObjectPtr object = PyStructSequence_New(it.first->second.GetAs<PyTypeObject>()); +#else + const auto className = TString("yql.Struct_") += ToString(ctx->StructTypes.size()); + PyObject* metaclass = (PyObject *) &PyClass_Type; + const TPyObjectPtr name = PyRepr(TStringBuf(className)); + const TPyObjectPtr bases = PyTuple_New(0); + const TPyObjectPtr dict = PyDict_New(); + + TPyObjectPtr newClass = PyObject_CallFunctionObjArgs( + metaclass, name.Get(), bases.Get(), dict.Get(), + nullptr); + if (!newClass) { + throw yexception() << "can't create new type: " << GetLastErrorAsString(); + } + + it.first->second = std::move(newClass); + } + + Y_UNUSED(inspector); + const TPyObjectPtr object = PyInstance_New(it.first->second.Get(), nullptr, nullptr); +#endif + if (!object) { + throw yexception() << "can't struct instance: " << GetLastErrorAsString(); + } + return object; +} + +} + +TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const TPyObjectPtr object = CreateNewStrucInstance(ctx, type, inspector); + const auto membersCount = inspector.GetMembersCount(); + + if (auto ptr = value.GetElements()) { + for (Py_ssize_t i = 0; i < membersCount; ++i) { +#if PY_MAJOR_VERSION >= 3 + auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++); + PyStructSequence_SetItem(object.Get(), i, item.Release()); +#else + const TStringBuf name = inspector.GetMemberName(i); + const auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++); + if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { + throw yexception() + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); + } +#endif + } + } else { + for (Py_ssize_t i = 0; i < membersCount; ++i) { +#if PY_MAJOR_VERSION >= 3 + auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i)); + PyStructSequence_SetItem(object.Get(), i, item.Release()); +#else + const TStringBuf name = inspector.GetMemberName(i); + const auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i)); + if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { + throw yexception() + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); + } +#endif + } + } + + return object; +} + +NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value) +{ + NUdf::TUnboxedValue* items = nullptr; + const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto membersCount = inspector.GetMembersCount(); + auto mkqlStruct = ctx->ValueBuilder->NewArray(membersCount, items); + + TVector<TString> errors; + if (PyDict_Check(value)) { + for (ui32 i = 0; i < membersCount; i++) { + TStringBuf memberName = inspector.GetMemberName(i); + auto memberType = inspector.GetMemberType(i); + // borrowed reference - no need to manage ownership + PyObject* item = PyDict_GetItemString(value, memberName.data()); + if (!item) { + TPyObjectPtr bytesMemberName = PyBytes_FromStringAndSize(memberName.data(), memberName.size()); + item = PyDict_GetItem(value, bytesMemberName.Get()); + } + if (!item) { + if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional) { + items[i] = NUdf::TUnboxedValue(); + continue; + } + + errors.push_back(TStringBuilder() << "Dict has no item '" << memberName << "'"); + continue; + } + + try { + items[i] = FromPyObject(ctx, inspector.GetMemberType(i), item); + } catch (const yexception& e) { + errors.push_back(TStringBuilder() << "Failed to convert dict item '" << memberName << "' - " << e.what()); + } + } + + if (!errors.empty()) { + throw yexception() << "Failed to convert dict to struct\n" << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value); + } + } else { + for (ui32 i = 0; i < membersCount; i++) { + TStringBuf memberName = inspector.GetMemberName(i); + auto memberType = inspector.GetMemberType(i); + TPyObjectPtr attr = PyObject_GetAttrString(value, memberName.data()); + if (!attr) { + if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional && + PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + items[i] = NUdf::TUnboxedValue(); + continue; + } + + errors.push_back(TStringBuilder() << "Object has no attr '" << memberName << "' , error: " << GetLastErrorAsString()); + continue; + } + + try { + items[i] = FromPyObject(ctx, memberType, attr.Get()); + } catch (const yexception& e) { + errors.push_back(TStringBuilder() << "Failed to convert object attr '" << memberName << "' - " << e.what()); + } + } + + if (!errors.empty()) { + throw yexception() << "Failed to convert object to struct\n" << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value); + } + } + + return mkqlStruct; +} + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.h b/yql/essentials/udfs/common/python/bindings/py_struct.h new file mode 100644 index 0000000000..79a380283f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_struct.h @@ -0,0 +1,17 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyStruct( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyStruct( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp new file mode 100644 index 0000000000..a97507f549 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp @@ -0,0 +1,307 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyStructTest) { + Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age, name):\n" + " self.age = age\n" + " self.name = name\n" + "\n" + "def Test():\n" + " return Person(99, 'Jamel')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyObjectMissingOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField("name", optionalStringType, &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age):\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return Person(99)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyDict) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'name': 'Jamel', 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyDictMissingOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField("name", optionalStringType, &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) { + TPythonTestEngine engine; + + ui32 ageIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("a\0ge", &ageIdx) + .Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { b'a\\0ge': 99 }\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyNamedTuple) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Person = namedtuple('Person', 'name age')\n" + " return Person(age=13, name='Tony')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13); + }); + } + + Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField("name", optionalStringType, &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Pers = namedtuple('Person', 'name age')\n" + " return Pers(name=None, age=15)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15); + }); + } + + Y_UNIT_TEST(FromPyEmptyStruct) { + TPythonTestEngine engine; + auto emptyStruct = engine.GetTypeBuilder().Struct()->Build(); + + engine.ToMiniKQL(emptyStruct, + "class Empty: pass\n" + "\n" + "def Test():\n" + " return Empty()\n", + [](const NUdf::TUnboxedValuePod&) {}); + } + + Y_UNIT_TEST(ToPyObject) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0; + auto addressType = engine.GetTypeBuilder().Struct()-> + AddField<NUdf::TUtf8>("city", &cityIdx) + .AddField<NUdf::TUtf8>("street", &streetIdx) + .AddField<ui16>("building", &buildingIdx) + .Build(); + + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<ui16>("age", &ageIdx) + .AddField<NUdf::TUtf8>("name", &nameIdx) + .AddField("address", addressType, &addressIdx) + .Build(); + + + engine.ToPython(personType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97)); + items[nameIdx] = vb.NewString("Jamel"); + NUdf::TUnboxedValue* items2 = nullptr; + items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2); + items2[cityIdx] = vb.NewString("Moscow");; + items2[streetIdx] = vb.NewString("L'va Tolstogo"); + items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16)); + return new_struct; + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" + " assert value.name == 'Jamel'\n" + " assert value.age == 97\n" + " assert value.address.city == 'Moscow'\n" + " assert value.address.building == 16\n" + ); + } + + Y_UNIT_TEST(ToPyObjectKeywordsAsFields) { + TPythonTestEngine engine; + + ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0; + auto structType = engine.GetTypeBuilder().Struct()-> + AddField<NUdf::TUtf8>("pass", &passIdx) + .AddField<NUdf::TUtf8>("while", &whileIdx) + .AddField<NUdf::TUtf8>("if", &ifIdx) + .AddField<NUdf::TUtf8>("not", ¬Idx) + .Build(); + + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ifIdx] = vb.NewString("You"); + items[whileIdx] = vb.NewString("Shall"); + items[notIdx] = vb.NewString("Not"); + items[passIdx] = vb.NewString("Pass"); + return new_struct; + }, + "def Test(value):\n" + " assert getattr(value, 'if') == 'You'\n" + " assert getattr(value, 'while') == 'Shall'\n" + " assert getattr(value, 'not') == 'Not'\n" + " assert getattr(value, 'pass') == 'Pass'\n" + ); + } + +#if PY_MAJOR_VERSION >= 3 // TODO: Fix for python 2 + Y_UNIT_TEST(ToPyObjectTryModify) { + TPythonTestEngine engine; + + ui32 field1Idx = 0, field2Idx = 0; + auto structType = engine.GetTypeBuilder().Struct()-> + AddField<NUdf::TUtf8>("field1", &field1Idx) + .AddField<NUdf::TUtf8>("field2", &field2Idx) + .Build(); + + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[field1Idx] = NUdf::TUnboxedValuePod::Zero(); + items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty"); + return new_struct; + }, + "def Test(value):\n" + " try:\n" + " setattr(value, 'field1', 17)\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " value.field2 = 18\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } +#endif + + Y_UNIT_TEST(ToPyObjectEmptyStruct) { + TPythonTestEngine engine; + + auto personType = engine.GetTypeBuilder().Struct()->Build(); + + engine.ToPython(personType, + [](const TType*, const NUdf::IValueBuilder& vb) { + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" +#if PY_MAJOR_VERSION >= 3 + " assert len(value) == 0\n" +#endif + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp new file mode 100644 index 0000000000..6cef25ea47 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp @@ -0,0 +1,61 @@ +#include "py_tuple.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +using namespace NKikimr; + +namespace NPython { + +TPyObjectPtr ToPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto elementsCount = inspector.GetElementsCount(); + + const TPyObjectPtr tuple(PyTuple_New(elementsCount)); + + if (auto ptr = value.GetElements()) { + for (ui32 i = 0U; i < elementsCount; ++i) { + auto item = ToPyObject(ctx, inspector.GetElementType(i), *ptr++); + PyTuple_SET_ITEM(tuple.Get(), i, item.Release()); + } + } else { + for (ui32 i = 0U; i < elementsCount; ++i) { + auto item = ToPyObject(ctx, inspector.GetElementType(i), value.GetElement(i)); + PyTuple_SET_ITEM(tuple.Get(), i, item.Release()); + } + } + + return tuple; +} + +NUdf::TUnboxedValue FromPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value) +{ + const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + if (const TPyObjectPtr fast = PySequence_Fast(value, "Expected tuple or list.")) { + const Py_ssize_t itemsCount = PySequence_Fast_GET_SIZE(fast.Get()); + + if (itemsCount < 0 || inspector.GetElementsCount() != itemsCount) { + throw yexception() << "Tuple elements count mismatch."; + } + + NUdf::TUnboxedValue* tuple_items = nullptr; + const auto tuple = ctx->ValueBuilder->NewArray(inspector.GetElementsCount(), tuple_items); + for (Py_ssize_t i = 0; i < itemsCount; i++) { + const auto item = PySequence_Fast_GET_ITEM(fast.Get(), i); + *tuple_items++ = FromPyObject(ctx, inspector.GetElementType(i), item); + } + + return tuple; + } + + throw yexception() << "Expected Tuple or Sequence but got: " << PyObjectRepr(value); +} + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.h b/yql/essentials/udfs/common/python/bindings/py_tuple.h new file mode 100644 index 0000000000..7d66af9b01 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.h @@ -0,0 +1,17 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyTuple( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyTuple( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp new file mode 100644 index 0000000000..a6b9b6cc3e --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp @@ -0,0 +1,108 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyTupleTest) { + Y_UNIT_TEST(FromPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<>>( + "def Test(): return ()", + [](const NUdf::TUnboxedValuePod&) {}); + } + + Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return [1, 2, 3]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); + } + + Y_UNIT_TEST(FromPyIter) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return iter({1, 2, 3})", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); + } + + Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>( + "def Test(): return (1, float(2.3), '4')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + auto second = value.GetElement(1); + UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001); + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "4"); + }); + } + + Y_UNIT_TEST(FromPyTupleInTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>( + "def Test(): return (1, (2, float(3.4)), '5')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1); + + auto second = value.GetElement(1); + UNIT_ASSERT(second); + UNIT_ASSERT(second.IsBoxed()); + UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2); + UNIT_ASSERT_DOUBLES_EQUAL( + second.GetElement(1).Get<float>(), 3.4, 0.0001); + + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "5"); + }); + } + + Y_UNIT_TEST(ToPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 0\n" + " assert value == ()\n"); + } + + Y_UNIT_TEST(ToPyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + items[0] = vb.NewString("111"); + items[1] = NUdf::TUnboxedValuePod((ui64) 2); + items[2] = NUdf::TUnboxedValuePod((ui8) 3); + items[3] = NUdf::TUnboxedValuePod((float) 4.5); + return tuple; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 4\n" + " assert value == ('111', 2, 3, 4.5)\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp new file mode 100644 index 0000000000..e9f5971c78 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp @@ -0,0 +1,85 @@ +#include "py_variant.h" +#include "ut3/py_test_engine.h" +#include <yql/essentials/minikql/mkql_type_ops.h> + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyTzDateTest) { + Y_UNIT_TEST(FromDate) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDate>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); + } + + Y_UNIT_TEST(FromDatetime) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDatetime>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui32>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); + } + + Y_UNIT_TEST(FromTimestamp) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzTimestamp>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui64>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); + } + + Y_UNIT_TEST(ToDate) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDate>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui16)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); + } + + Y_UNIT_TEST(ToDatetime) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDatetime>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui32)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); + } + + Y_UNIT_TEST(ToTimestamp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzTimestamp>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui64)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.cpp b/yql/essentials/udfs/common/python/bindings/py_utils.cpp new file mode 100644 index 0000000000..d1e0e8b484 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_utils.cpp @@ -0,0 +1,89 @@ +#include "py_utils.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" + +#include <util/generic/yexception.h> +#include <util/string/split.h> + +#include <regex> + + +namespace NPython { + +TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern) { + for (auto c : asciiStr) { + Y_ABORT_UNLESS((c&0x80) == 0, "expected ascii"); + } + + Py_ssize_t size = static_cast<Py_ssize_t>(asciiStr.size()); +#if PY_MAJOR_VERSION >= 3 + TPyObjectPtr pyStr = PyUnicode_FromStringAndSize(asciiStr.data(), size); +#else + TPyObjectPtr pyStr = PyString_FromStringAndSize(asciiStr.data(), size); +#endif + Y_ABORT_UNLESS(pyStr, "Can't get repr string"); + if (!intern) { + return pyStr; + } + + PyObject* tmp = pyStr.Release(); +#if PY_MAJOR_VERSION >= 3 + PyUnicode_InternInPlace(&tmp); +#else + PyString_InternInPlace(&tmp); +#endif + return TPyObjectPtr(tmp); +} + +TString PyObjectRepr(PyObject* value) { + static constexpr size_t maxLen = 1000; + static constexpr std::string_view truncSuffix = "(truncated)"; + const TPyObjectPtr repr(PyObject_Repr(value)); + if (!repr) { + return TString("repr error: ") + GetLastErrorAsString(); + } + + TString string; + if (!TryPyCast(repr.Get(), string)) { + string = "can't get repr as string"; + } + if (string.size() > maxLen) { + string.resize(maxLen - truncSuffix.size()); + string += truncSuffix; + } + return string; +} + +bool HasEncodingCookie(const TString& source) { + // + // To define a source code encoding, a magic comment must be placed + // into the source files either as first or second line in the file. + // + // See https://www.python.org/dev/peps/pep-0263 for more details. + // + + static std::regex encodingRe( + "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*"); + + int i = 0; + for (const auto& it: StringSplitter(source).Split('\n')) { + if (i++ == 2) break; + + TStringBuf line = it.Token(); + if (std::regex_match(line.begin(), line.end(), encodingRe)) { + return true; + } + } + return false; +} + +void PyCleanup() { + TPyGilLocker lock; + PyErr_Clear(); + PySys_SetObject("last_type", Py_None); + PySys_SetObject("last_value", Py_None); + PySys_SetObject("last_traceback", Py_None); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.h b/yql/essentials/udfs/common/python/bindings/py_utils.h new file mode 100644 index 0000000000..0c5ef058f1 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_utils.h @@ -0,0 +1,28 @@ +#pragma once + +#include "py_ptr.h" + +#include <util/generic/strbuf.h> + +#ifdef _win_ +#define INIT_MEMBER(member, value) value //member +#else +#define INIT_MEMBER(member, value) .member = (value) +#endif + +namespace NPython { + +TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern = false); + +template <size_t size> +TPyObjectPtr PyRepr(const char(&str)[size]) { + return PyRepr(TStringBuf(str, size - 1), true); +} + +TString PyObjectRepr(PyObject* value); + +bool HasEncodingCookie(const TString& source); + +void PyCleanup(); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp new file mode 100644 index 0000000000..ce521689b4 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp @@ -0,0 +1,37 @@ +#include "py_utils.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyUtilsTest) { + + Y_UNIT_TEST(EncodingCookie) { + UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-")); + UNIT_ASSERT(HasEncodingCookie( + "# This Python file uses the following encoding: utf-8")); + + // encoding commend on second line + UNIT_ASSERT(HasEncodingCookie( + "#!/usr/local/bin/python\n" + "# -*- coding: iso-8859-15 -*-\n" + "print 'hello'")); + + // missing "coding:" prefix + UNIT_ASSERT(false == HasEncodingCookie("# latin-1")); + + // encoding comment not on line 1 or 2 + UNIT_ASSERT(false == HasEncodingCookie( + "#!/usr/local/bin/python\n" + "#\n" + "# -*- coding: latin-1 -*-\n")); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.cpp b/yql/essentials/udfs/common/python/bindings/py_variant.cpp new file mode 100644 index 0000000000..ab222b3432 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_variant.cpp @@ -0,0 +1,97 @@ +#include "py_variant.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> + + +using namespace NKikimr; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// public functions +////////////////////////////////////////////////////////////////////////////// +TPyObjectPtr ToPyVariant( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + auto& th = *castCtx->PyCtx->TypeInfoHelper; + NUdf::TVariantTypeInspector varInsp(th, type); + const NUdf::TType* subType = varInsp.GetUnderlyingType(); + ui32 index = value.GetVariantIndex(); + auto item = value.GetVariantItem(); + + const NUdf::TType* itemType = nullptr; + if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) { + itemType = tupleInsp.GetElementType(index); + TPyObjectPtr pyIndex = PyCast<ui32>(index); + TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item); + return PyTuple_Pack(2, pyIndex.Get(), pyItem.Get()); + } else if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { + itemType = structInsp.GetMemberType(index); + TPyObjectPtr pyName = ToPyUnicode<NUdf::TStringRef>( + structInsp.GetMemberName(index)); + TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item); + return PyTuple_Pack(2, pyName.Get(), pyItem.Get()); + } + + throw yexception() << "Cannot get Variant item type"; +} + +NUdf::TUnboxedValue FromPyVariant( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) +{ + PY_ENSURE(PyTuple_Check(value), + "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); + + Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); + PY_ENSURE(tupleSize == 2, + "Expected to get Tuple with 2 elements, but got " + << tupleSize << " elements"); + + auto& th = *castCtx->PyCtx->TypeInfoHelper; + NUdf::TVariantTypeInspector varInsp(th, type); + const NUdf::TType* subType = varInsp.GetUnderlyingType(); + + PyObject* el0 = PyTuple_GET_ITEM(value, 0); + PyObject* el1 = PyTuple_GET_ITEM(value, 1); + + ui32 index; + NUdf::TStringRef name; + if (TryPyCast(el0, index)) { + if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) { + PY_ENSURE(index < tupleInsp.GetElementsCount(), + "Index must be < " << tupleInsp.GetElementsCount() + << ", but got " << index); + auto* itemType = tupleInsp.GetElementType(index); + return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); + } else { + throw yexception() << "Cannot convert " << PyObjectRepr(value) + << " underlying Variant type is not a Tuple"; + } + } else if (TryPyCast(el0, name)) { + if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { + ui32 index = structInsp.GetMemberIndex(name); + PY_ENSURE(index < structInsp.GetMembersCount(), + "Unknown member name: " << TStringBuf(name)); + auto* itemType = structInsp.GetMemberType(index); + return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); + } else { + throw yexception() << "Cannot convert " << PyObjectRepr(value) + << " underlying Variant type is not a Struct"; + } + } else { + throw yexception() + << "Expected first Tuple element to either be an int " + "or a str, but got " << Py_TYPE(el0)->tp_name; + } +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.h b/yql/essentials/udfs/common/python/bindings/py_variant.h new file mode 100644 index 0000000000..ca97123183 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_variant.h @@ -0,0 +1,17 @@ +#pragma once + +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyVariant( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyVariant( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp new file mode 100644 index 0000000000..77ab9bc6e8 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp @@ -0,0 +1,101 @@ +#include "py_variant.h" +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyVariantTest) { + Y_UNIT_TEST(FromPyWithIndex) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>( + "def Test():\n" + " return (2, 'hello')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello"); + }); + } + + Y_UNIT_TEST(FromPyWithName) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> + AddField<ui32>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant()->Over(personType).Build(); + + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('age', 99)\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99); + }); + + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('name', 'Jamel')\n", + [nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel"); + }); + } + + Y_UNIT_TEST(ToPyWithIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TVariant<float, ui32, char*>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32) 42)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (1, 42)\n"); + } + + Y_UNIT_TEST(ToPyWithName) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> + AddField<ui32>("age", &ageIdx) + .AddField<NUdf::TUtf8>("name", &nameIdx) + .Build(); + + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant()->Over(personType).Build(); + + engine.ToPython( + variantType, + [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32) 99)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('age', 99)\n" + ); + + engine.ToPython( + variantType, + [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(nameIdx, vb.NewString("Jamel")); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('name', 'Jamel')\n" + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_void.cpp b/yql/essentials/udfs/common/python/bindings/py_void.cpp new file mode 100644 index 0000000000..ef72c052fb --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_void.cpp @@ -0,0 +1,117 @@ +#include "py_void.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> + +using namespace NKikimr; + +namespace NPython { +namespace { + +static PyObject* VoidRepr(PyObject*) { + return PyRepr("yql.Void").Release(); +} + +static void VoidDealloc(PyObject*) { + Py_FatalError("Deallocating yql.Void"); +} + +} // namespace + +PyTypeObject PyVoidType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.Void"), + INIT_MEMBER(tp_basicsize , 0), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , VoidDealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , VoidRepr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , 0), + INIT_MEMBER(tp_doc , "yql.Void object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , nullptr), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +PyObject PyVoidObject = { + _PyObject_EXTRA_INIT + 1, &PyVoidType +}; + +TPyObjectPtr ToPyVoid( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + Y_UNUSED(ctx); + Y_UNUSED(type); + Y_UNUSED(value); + return TPyObjectPtr(&PyVoidObject, TPyObjectPtr::ADD_REF); +} + +NUdf::TUnboxedValue FromPyVoid( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* value) +{ + Y_UNUSED(ctx); + Y_UNUSED(type); + Y_UNUSED(value); + PY_ENSURE(value == &PyVoidObject, "Expected object of yql.Void type"); + return NUdf::TUnboxedValue::Void(); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void.h b/yql/essentials/udfs/common/python/bindings/py_void.h new file mode 100644 index 0000000000..3c8203ab6e --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_void.h @@ -0,0 +1,21 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyVoidType; +extern PyObject PyVoidObject; + +TPyObjectPtr ToPyVoid( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyVoid( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp new file mode 100644 index 0000000000..7fbeca2043 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp @@ -0,0 +1,37 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyVoidTest) { + Y_UNIT_TEST(FromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<void>( + "import yql\n" + "\n" + "def Test():\n" + " return yql.Void\n", + [](const NUdf::TUnboxedValue& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(false == value.IsBoxed()); + }); + } + + Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<void>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValue::Void(); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value) == 'yql.Void'\n" + " assert repr(value) == 'yql.Void'\n" + " assert isinstance(value, yql.TVoid)\n" + " assert value is yql.Void\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp new file mode 100644 index 0000000000..5d1497f7c7 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp @@ -0,0 +1,251 @@ +#include "py_yql_module.h" + +#include "py_void.h" +#include "py_iterator.h" +#include "py_list.h" +#include "py_dict.h" +#include "py_stream.h" +#include "py_utils.h" +#include "py_callable.h" + +#include <library/cpp/resource/resource.h> +#include <yql/essentials/udfs/common/python/python_udf/python_udf.h> + +namespace NPython { + +static PyMethodDef ModuleMethods[] = { + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +#define MODULE_NAME "yql" + +#if PY_MAJOR_VERSION >= 3 +#define MODULE_NAME_TYPING "yql.typing" +#endif + +#define MODULE_INITIALIZED_ATTRIBUTE "_initialized" + +PyDoc_STRVAR(ModuleDoc, + "This module provides YQL specific types for Python."); + +#if PY_MAJOR_VERSION >= 3 +PyDoc_STRVAR(ModuleDocTyping, + "This module provides annotations for YQL types for Python."); +#endif + +PyDoc_STRVAR(StopIterationException_doc, + "Can be throwed to yield stream iteration."); + +#define PREPARE_TYPE(Name, Type) \ + do { \ + if (PyType_Ready(Type) < 0) { \ + throw yexception() << "Can't prepare type: " << (Name); \ + } \ + } while (0) + +#define REGISTER_TYPE(Name, Type) \ + do { \ + PREPARE_TYPE(Name, Type); \ + Py_INCREF(Type); \ + if (PyModule_AddObject(module, (Name), (PyObject*) Type) < 0) { \ + throw yexception() << "Can't add type: " << (Name); \ + } \ + } while (0) + +#define REGISTER_OBJECT(Name, Object) \ + do { \ + if (PyDict_SetItemString(dict, (Name), (PyObject *) (Object)) < 0) \ + throw yexception() << "Can't register object: " << (Name); \ + } while (0) + +#define REGISTER_EXCEPTION(Name, Object, Doc) \ + do { \ + if (!Object) { \ + Object = PyErr_NewExceptionWithDoc((char*) MODULE_NAME "." Name, Doc, nullptr, nullptr); \ + if (!Object) { \ + throw yexception() << "Can't register exception: " << (Name); \ + } \ + REGISTER_OBJECT(Name, Object); \ + } \ + } while (0) + +#if PY_MAJOR_VERSION >= 3 +static PyModuleDef ModuleDefinition = { + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME), + INIT_MEMBER(m_doc, ModuleDoc), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, ModuleMethods), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), +}; + +static PyModuleDef ModuleDefinitionTyping = { + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME_TYPING), + INIT_MEMBER(m_doc, ModuleDocTyping), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, nullptr), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), +}; + +PyMODINIT_FUNC PyInit_YQL(void) +{ + auto mod = PyModule_Create(&ModuleDefinition); + PyModule_AddObject(mod, "__path__", Py_BuildValue("()")); + return mod; +} + +void go_throw(); + +PyMODINIT_FUNC PyInit_YQLTyping(void) +{ + return PyModule_Create(&ModuleDefinitionTyping); +} +#else +PyMODINIT_FUNC PyInit_YQL(void) +{ + Py_InitModule3(MODULE_NAME, ModuleMethods, ModuleDoc); +} +#endif + +void PrepareYqlModule() { + PyImport_AppendInittab(MODULE_NAME, &PyInit_YQL); +#if PY_MAJOR_VERSION >= 3 + PyImport_AppendInittab(MODULE_NAME_TYPING, &PyInit_YQLTyping); +#endif +} + +#if PY_MAJOR_VERSION >= 3 +void RegisterRuntimeModule(const char* name, PyObject* module) { + if (!module || !PyModule_Check(module)) { + throw yexception() << "Invalid object for module " << name; + } + + // borrowed reference + PyObject* modules = PyImport_GetModuleDict(); + if (!modules || !PyDict_CheckExact(modules)) { + throw yexception() << "Can't get sys.modules dictionary"; + } + + if (PyDict_SetItemString(modules, name, module) < 0) { + throw yexception() << "Can't register module " << name; + } +} +#endif + +void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone) { + TPyObjectPtr m = PyImport_ImportModule(MODULE_NAME); + if (!standalone && !m) { + PyErr_Clear(); +#if PY_MAJOR_VERSION >= 3 + m = PyInit_YQL(); + RegisterRuntimeModule(MODULE_NAME, m.Get()); +#else + PyInit_YQL(); +#endif + m = PyImport_ImportModule(MODULE_NAME); + } + + PyObject* module = m.Get(); + + if (!module) { + throw yexception() << "Can't get YQL module."; + } + + TPyObjectPtr initialized = PyObject_GetAttrString(module, MODULE_INITIALIZED_ATTRIBUTE); + if (!initialized) { + PyErr_Clear(); + } else if (initialized.Get() == Py_True) { + return; + } + + PyObject* dict = PyModule_GetDict(module); + + REGISTER_TYPE("TVoid", &PyVoidType); + REGISTER_OBJECT("Void", &PyVoidObject); + + PREPARE_TYPE("TIterator", &PyIteratorType); + PREPARE_TYPE("TPairIterator", &PyPairIteratorType); + + PREPARE_TYPE("TDict", &PyLazyDictType); + PREPARE_TYPE("TSet", &PyLazySetType); + + PREPARE_TYPE("TLazyListIterator", &PyLazyListIteratorType); + PREPARE_TYPE("TLazyList", &PyLazyListType); + PREPARE_TYPE("TThinListIterator", &PyThinListIteratorType); + PREPARE_TYPE("TThinList", &PyThinListType); + + PREPARE_TYPE("TStream", &PyStreamType); + PREPARE_TYPE("TCallable", &PyCallableType); + + REGISTER_EXCEPTION("TYieldIteration", PyYieldIterationException, StopIterationException_doc); + +#if PY_MAJOR_VERSION >= 3 + if (pythonFlavor == NYql::NUdf::EPythonFlavor::Arcadia) { + if (!standalone) { + TPyObjectPtr typingModule = PyImport_ImportModule(MODULE_NAME_TYPING); + if (!typingModule) { + PyErr_Clear(); + typingModule = PyInit_YQLTyping(); + RegisterRuntimeModule(MODULE_NAME_TYPING, typingModule.Get()); + } + } + + const auto typing = NResource::Find(TStringBuf("typing.py")); + const auto rc = PyRun_SimpleStringFlags(typing.c_str(), nullptr); + + if (rc < 0) { + // Not sure if PyErr_Print() works after PyRun_SimpleStringFlags, + // but just in case... + PyErr_Print(); + ythrow yexception() << "Can't parse YQL type annotations module"; + } + + auto processError = [&] (PyObject* obj, TStringBuf message) { + if (obj) { + return; + } + PyObject *ptype, *pvalue, *ptraceback; + PyErr_Fetch(&ptype, &pvalue, &ptraceback); + if (pvalue) { + auto pstr = PyObject_Str(pvalue); + if (pstr) { + if (auto err_msg = PyUnicode_AsUTF8(pstr)) { + Cerr << err_msg << Endl; + } + } + PyErr_Restore(ptype, pvalue, ptraceback); + } + ythrow yexception() << "Can't setup YQL type annotations module: " << message; + }; + + auto main = PyImport_ImportModule("__main__"); + processError(main, "PyImport_ImportModule"); + auto function = PyObject_GetAttrString(main, "main"); + processError(function, "PyObject_GetAttrString"); + auto args = PyTuple_New(0); + processError(args, "PyTuple_New"); + auto result = PyObject_CallObject(function, args); + processError(result, "PyObject_CallObject"); + + Py_DECREF(result); + Py_DECREF(args); + Py_DECREF(function); + Py_DECREF(main); + } +#endif + + REGISTER_OBJECT(MODULE_INITIALIZED_ATTRIBUTE, Py_True); +} + +void TermYqlModule() { + PyYieldIterationException = nullptr; +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.h b/yql/essentials/udfs/common/python/bindings/py_yql_module.h new file mode 100644 index 0000000000..970471d029 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.h @@ -0,0 +1,11 @@ +#pragma once + +#include <yql/essentials/udfs/common/python/python_udf/python_udf.h> + +namespace NPython { + +void PrepareYqlModule(); +void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone = true); +void TermYqlModule(); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/typing.py b/yql/essentials/udfs/common/python/bindings/typing.py new file mode 100644 index 0000000000..0e53ad1e0a --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/typing.py @@ -0,0 +1,188 @@ +def main(): + import importlib.abc + import importlib.machinery + import sys + + class Finder(importlib.abc.MetaPathFinder): + def find_spec(self, fullname, path, target=None): + if fullname in sys.builtin_module_names: + return importlib.machinery.ModuleSpec( + fullname, + importlib.machinery.BuiltinImporter, + ) + + sys.meta_path.append(Finder()) + + try: + import yandex.type_info.type_base as ti_base + import yandex.type_info.typing as ti_typing + import six + except ImportError as e: + raise ImportError( + str(e) + ". Make sure that library/python/type_info is in your PEERDIR list" + ) + + from yql import typing + + AutoMap = ti_base.make_primitive_type("AutoMap") + + def _format_arg(arg): + res = [] + if arg[0]: + res.append("{}:".format(ti_base.quote_string(arg[0]))) + res.append(str(arg[1])) + if arg[2]: + res.append("{Flags:") + res.append(",".join(str(x) for x in sorted(list(arg[2])))) + res.append("}") + return "".join(res) + + Stream = ti_typing._SingleArgumentGeneric("Stream") + + @six.python_2_unicode_compatible + class GenericResourceAlias(ti_base.Type): + REQUIRED_ATTRS = ti_base.Type.REQUIRED_ATTRS + ["tag"] + + def __str__(self): + return u"{}<{}>".format(self.name, ti_base.quote_string(self.tag)) + + def to_yson_type(self): + return {"type_name": self.yt_type_name, "tag": self.tag} + + class GenericResource(ti_base.Generic): + def __getitem__(self, params): + if not isinstance(params, str): + raise ValueError("Expected str, but got: {}".format(ti_base._with_type(params))) + + attrs = { + "name": self.name, + "yt_type_name": self.yt_type_name, + "tag": params, + } + + return GenericResourceAlias(attrs) + + def from_dict(self): + raise NotImplementedError() + + Resource = GenericResource("Resource") + + def _extract_arg_info(param): + name = "" + arg_type = param + flags = set() + if isinstance(param, slice): + name = param.start + if name is None: + name = "" + if not isinstance(name, str): + raise ValueError("Expected str as argument name but got: {}".format(ti_base._with_type(name))) + arg_type = param.stop + ti_base.validate_type(arg_type) + if param.step is not None: + for x in param.step: + if x != AutoMap: + raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x))) + flags.add(x) + else: + ti_base.validate_type(arg_type) + return (name, arg_type, flags) + + @six.python_2_unicode_compatible + class GenericCallableAlias(ti_base.Type): + def __str__(self): + return ("Callable<(" + + ",".join(_format_arg(x) for x in self.args[:len(self.args)-self.optional_args]) + + ("," if len(self.args) > self.optional_args and self.optional_args else "") + + ("[" if self.optional_args else "") + + ",".join(_format_arg(x) for x in self.args[len(self.args)-self.optional_args:]) + + ("]" if self.optional_args else "") + + ")->" + str(getattr(self, "return")) + ">") + + def to_yson_type(self): + yson_repr = { + "optional_args": self.optional_args, + "return": getattr(self, "return"), + "args": self.args, + "type_name": self.yt_type_name, + } + return yson_repr + + + class GenericCallable(ti_base.Generic): + def __getitem__(self, params): + if not isinstance(params, tuple) or len(params) < 2 or not isinstance(params[0], int) or not ti_typing.is_valid_type(params[1]): + raise ValueError("Expected at least two arguments (integer and type of return value) but got: {}".format(ti_base._with_type(params))) + args = [] + for param in params[2:]: + name, arg_type, flags = _extract_arg_info(param) + args.append((name, arg_type, flags)) + + if params[0] < 0 or params[0] > len(args): + raise ValueError("Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]") + + attrs = { + "optional_args": params[0], + "return": params[1], + "args": args, + "name": "Tagged", + "yt_type_name": "tagged", + } + + return GenericCallableAlias(attrs) + + def from_dict(self): + raise NotImplementedError() + + Callable = GenericCallable("Callable") + + def parse_slice_arg(arg): + try: + return _format_arg(_extract_arg_info(arg)) + except ValueError: + pass + + typing.Type = ti_base.Type + typing.is_valid_type = ti_base.is_valid_type + typing.parse_slice_arg = parse_slice_arg + + typing.Bool = ti_typing.Bool + typing.Int8 = ti_typing.Int8 + typing.Uint8 = ti_typing.Uint8 + typing.Int16 = ti_typing.Int16 + typing.Uint16 = ti_typing.Uint16 + typing.Int32 = ti_typing.Int32 + typing.Uint32 = ti_typing.Uint32 + typing.Int64 = ti_typing.Int64 + typing.Uint64 = ti_typing.Uint64 + typing.Float = ti_typing.Float + typing.Double = ti_typing.Double + typing.String = ti_typing.String + typing.Utf8 = ti_typing.Utf8 + typing.Yson = ti_typing.Yson + typing.Json = ti_typing.Json + typing.Uuid = ti_typing.Uuid + typing.Date = ti_typing.Date + typing.Datetime = ti_typing.Datetime + typing.Timestamp = ti_typing.Timestamp + typing.Interval = ti_typing.Interval + typing.TzDate = ti_typing.TzDate + typing.TzDatetime = ti_typing.TzDatetime + typing.TzTimestamp = ti_typing.TzTimestamp + typing.Void = ti_typing.Void + typing.Null = ti_typing.Null + typing.EmptyTuple = ti_typing.EmptyTuple + typing.EmptyStruct = ti_typing.EmptyStruct + typing.Optional = ti_typing.Optional + typing.List = ti_typing.List + typing.Dict = ti_typing.Dict + typing.Tuple = ti_typing.Tuple + typing.Struct = ti_typing.Struct + typing.Variant = ti_typing.Variant + typing.Tagged = ti_typing.Tagged + typing.Decimal = ti_typing.Decimal + + typing.Stream = Stream + typing.Resource = Resource + typing.Callable = Callable + typing.AutoMap = AutoMap diff --git a/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h new file mode 100644 index 0000000000..a36e19fa32 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h @@ -0,0 +1,227 @@ +#pragma once + +#include "py_cast.h" +#include "py_yql_module.h" +#include "py_utils.h" + +#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h> +#include <yql/essentials/minikql/mkql_type_builder.h> +#include <yql/essentials/minikql/computation/mkql_value_builder.h> +#include <yql/essentials/udfs/common/python/python_udf/python_udf.h> + +#include <library/cpp/testing/unittest/registar.h> + +#define PYTHON_TEST_TAG "Python2Test" + + +using namespace NKikimr; +using namespace NMiniKQL; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// TPyInitializer +////////////////////////////////////////////////////////////////////////////// +struct TPyInitializer { + TPyInitializer() { + PrepareYqlModule(); + Py_Initialize(); + InitYqlModule(NYql::NUdf::EPythonFlavor::Arcadia); + } + ~TPyInitializer() { + TermYqlModule(); + Py_Finalize(); + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TPythonTestEngine +////////////////////////////////////////////////////////////////////////////// +class TPythonTestEngine { +public: + TPythonTestEngine() + : MemInfo_("Memory") + , Alloc_(__LOCATION__) + , Env_(Alloc_) + , TypeInfoHelper_(new TTypeInfoHelper) + , FunctionInfoBuilder_(Env_, TypeInfoHelper_, "", nullptr, {}) + { + HolderFactory_ = MakeHolder<THolderFactory>( + Alloc_.Ref(), + MemInfo_, + nullptr); + ValueBuilder_ = MakeHolder<TDefaultValueBuilder>(*HolderFactory_, NUdf::EValidatePolicy::Exception); + BindTerminator_ = MakeHolder<TBindTerminator>(ValueBuilder_.Get()); + Singleton<TPyInitializer>(); + CastCtx_ = MakeIntrusive<TPyCastContext>(&GetValueBuilder(), + MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition()) + ); + } + + ~TPythonTestEngine() { + PyCleanup(); + } + + NUdf::IFunctionTypeInfoBuilder& GetTypeBuilder() { + return FunctionInfoBuilder_; + } + + const NUdf::IValueBuilder& GetValueBuilder() const { + return *ValueBuilder_; + } + + template <typename TChecker> + void ToMiniKQL(NUdf::TType* udfType, const TStringBuf& script, TChecker&& checker) { + TPyObjectPtr result = RunPythonFunction(script); + UNIT_ASSERT_C(!!result, script); + + TType* type = static_cast<TType*>(udfType); + auto value = FromPyObject(CastCtx_, type, result.Get()); + checker(value); + } + + template <typename TExpectedType, typename TChecker> + void ToMiniKQL(const TStringBuf& script, TChecker&& checker) { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + ToMiniKQL<TChecker>(type, script, std::move(checker)); + } + + template <typename TChecker> + void ToMiniKQLWithArg( + NUdf::TType* udfType, PyObject* argValue, + const TStringBuf& script, TChecker&& checker) + { + TPyObjectPtr args = Py_BuildValue("(O)", argValue); + + auto result = RunPythonFunction(script, args.Get()); + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + + TType* type = static_cast<TType*>(udfType); + auto value = FromPyObject(CastCtx_, type, result.Get()); + checker(value); + } + + template <typename TExpectedType, typename TChecker> + void ToMiniKQLWithArg( + PyObject* argValue, + const TStringBuf& script, TChecker&& checker) + { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + ToMiniKQLWithArg<TChecker>(type, argValue, script, std::move(checker)); + } + + template <typename TMiniKQLValueBuilder> + TPyObjectPtr ToPython( + NUdf::TType* udfType, + TMiniKQLValueBuilder&& builder, + const TStringBuf& script) + { + try { + TType* type = static_cast<TType*>(udfType); + NUdf::TUnboxedValue value = builder(type, GetValueBuilder()); + TPyObjectPtr pyValue = ToPyObject(CastCtx_, type, value); + if (!pyValue || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("object execution error"); + } + TPyObjectPtr args = Py_BuildValue("(O)", pyValue.Get()); + + auto result = RunPythonFunction(script, args.Get()); + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + return result; + } catch (const yexception& e) { + Cerr << e << Endl; + UNIT_FAIL("cast error"); + } + + Py_RETURN_NONE; + } + + template <typename TExpectedType, typename TMiniKQLValueBuilder> + TPyObjectPtr ToPython(TMiniKQLValueBuilder&& builder, const TStringBuf& script) { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + return ToPython<TMiniKQLValueBuilder>(type, std::move(builder), script); + } + + NUdf::TUnboxedValue FromPython(NUdf::TType* udfType, const TStringBuf& script) { + auto result = RunPythonFunction(script); + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + + TType* type = static_cast<TType*>(udfType); + return FromPyObject(CastCtx_, type, result.Get()); + } + + template <typename TExpectedType> + NUdf::TUnboxedValue FromPython(const TStringBuf& script) { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + return FromPython(type, script); + } + + template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder> + NUdf::TUnboxedValue ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script) { + const auto aType = GetTypeBuilder().SimpleType<TArgumentType>(); + const auto result = ToPython<TMiniKQLValueBuilder>(aType, std::move(builder), script); + + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + + const auto rType = static_cast<TType*>(GetTypeBuilder().SimpleType<TReturnType>()); + return FromPyObject(CastCtx_, rType, result.Get()); + } + + template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder, typename TChecker> + void ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script, TChecker&& checker) { + const auto result = ToPythonAndBack<TArgumentType, TReturnType, TMiniKQLValueBuilder>(std::move(builder), script); + checker(result); + } + +private: + TPyObjectPtr RunPythonFunction( + const TStringBuf& script, PyObject* args = nullptr) + { + TString filename(TStringBuf("embedded:test.py")); + TPyObjectPtr code(Py_CompileString(script.data(), filename.data(), Py_file_input)); + if (!code) { + PyErr_Print(); + UNIT_FAIL("can't compile python script"); + } + + TString moduleName(TStringBuf("py_cast_ut")); + TPyObjectPtr module(PyImport_ExecCodeModule(moduleName.begin(), code.Get())); + if (!module) { + PyErr_Print(); + UNIT_FAIL("can't create python module"); + } + + TPyObjectPtr function(PyObject_GetAttrString(module.Get(), "Test")); + if (!function) { + PyErr_Print(); + UNIT_FAIL("function 'Test' is not found in module"); + } + return PyObject_CallObject(function.Get(), args); + } + +private: + TMemoryUsageInfo MemInfo_; + TScopedAlloc Alloc_; + TTypeEnvironment Env_; + const NUdf::ITypeInfoHelper::TPtr TypeInfoHelper_; + TFunctionTypeInfoBuilder FunctionInfoBuilder_; + THolder<THolderFactory> HolderFactory_; + THolder<TDefaultValueBuilder> ValueBuilder_; + THolder<TBindTerminator> BindTerminator_; + TPyCastContext::TPtr CastCtx_; +}; + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/ut3/ya.make b/yql/essentials/udfs/common/python/bindings/ut3/ya.make new file mode 100644 index 0000000000..b9d500938c --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ut3/ya.make @@ -0,0 +1,37 @@ +IF (OS_LINUX) + IF (NOT WITH_VALGRIND) + UNITTEST_FOR(yql/essentials/udfs/common/python/bindings) + + SRCS( + py_callable_ut.cpp + py_cast_ut.cpp + py_dict_ut.cpp + py_list_ut.cpp + py_decimal_ut.cpp + py_number_ut.cpp + py_optional_ut.cpp + py_resource_ut.cpp + py_stream_ut.cpp + py_string_ut.cpp + py_struct_ut.cpp + py_tuple_ut.cpp + py_tzdate_ut.cpp + py_utils_ut.cpp + py_variant_ut.cpp + py_void_ut.cpp + ) + + USE_PYTHON3() + + PEERDIR( + library/python/type_info + yql/essentials/minikql/computation/llvm14 + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql/pg_dummy + ) + + YQL_LAST_ABI_VERSION() + + END() + ENDIF() +ENDIF() diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make new file mode 100644 index 0000000000..efb5b475c4 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ya.make @@ -0,0 +1,54 @@ +PY23_NATIVE_LIBRARY() + +YQL_ABI_VERSION(2 27 0) + +SRCS( + py_callable.cpp + py_cast.cpp + py_decimal.cpp + py_errors.cpp + py_dict.cpp + py_list.cpp + py_lazy_mkql_dict.cpp + py_lazy_mkql_list.cpp + py_iterator.cpp + py_resource.cpp + py_stream.cpp + py_struct.cpp + py_tuple.cpp + py_utils.cpp + py_variant.cpp + py_void.cpp + py_yql_module.cpp +) + +IF (USE_SYSTEM_PYTHON AND _SYSTEM_PYTHON27) + # we should be able to run on python 2.7.X versions + # with X ranging from 3 to (at least) 15 + # + # for now bindings already use some functionality from 2.7.15, + # which doesn't exist earlier versions + # (according symbols won't be loaded from system python) + # + # so we provide backported implementation for this scenario to work as intended + SRCS( + py27_backports.c + ) +ENDIF() + +RESOURCE( + typing.py typing.py +) + +PEERDIR( + yql/essentials/public/udf + yql/essentials/utils +) + +NO_COMPILER_WARNINGS() + +END() + +RECURSE_FOR_TESTS( + ut3 +) diff --git a/yql/essentials/udfs/common/python/main_py3/__main__.pyx b/yql/essentials/udfs/common/python/main_py3/__main__.pyx new file mode 100644 index 0000000000..6f4ca94358 --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/__main__.pyx @@ -0,0 +1,50 @@ +import os +import runpy +import importlib + +import __res + + +cdef env_entry_point = 'Y_PYTHON_ENTRY_POINT' + + +cdef extern from 'main.h': + pass + + +def find_pymain(): + py_main = __res.find('PY_MAIN') + + if isinstance(py_main, bytes): + py_main = py_main.decode('utf8') + + if isinstance(py_main, unicode): + return py_main + + return None + + +def run_main(): + entry_point = os.environ.pop(env_entry_point, None) + + if entry_point is None: + entry_point = find_pymain() + + if entry_point is None: + raise RuntimeError('No entry point found') + + module_name, colon, func_name = entry_point.partition(':') + + if not colon: + runpy._run_module_as_main(module_name, alter_argv=False) + return + + if not module_name: + module_name = 'library.python.runtime_py3.entry_points' + + module = importlib.import_module(module_name) + func = getattr(module, func_name) + func() + + +run_main() diff --git a/yql/essentials/udfs/common/python/main_py3/include/main.h b/yql/essentials/udfs/common/python/main_py3/include/main.h new file mode 100644 index 0000000000..c96402004e --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/include/main.h @@ -0,0 +1,12 @@ +#pragma once +#include <util/system/compiler.h> + +#ifdef __cplusplus +extern "C" { +#endif +Y_PUBLIC +int RunPython(int argc, char** argv); +#ifdef __cplusplus +} +#endif + diff --git a/yql/essentials/udfs/common/python/main_py3/main.cpp b/yql/essentials/udfs/common/python/main_py3/main.cpp new file mode 100644 index 0000000000..edc3c89ca5 --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/main.cpp @@ -0,0 +1,9 @@ +#include "main.h" + +extern "C" +int RunPythonImpl(int argc, char** argv); + +extern "C" +int RunPython(int argc, char** argv) { + return RunPythonImpl(argc, argv); +} diff --git a/yql/essentials/udfs/common/python/main_py3/ya.make b/yql/essentials/udfs/common/python/main_py3/ya.make new file mode 100644 index 0000000000..cc13fb77e4 --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +USE_PYTHON3() + +ADDINCL( + yql/essentials/udfs/common/python/main_py3/include +) + +SRCS(GLOBAL main.cpp) + +BUILDWITH_CYTHON_C(__main__.pyx --embed=RunPythonImpl) + +END() diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json new file mode 100644 index 0000000000..dd55da78b5 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json @@ -0,0 +1,61 @@ +{ + "test.test[Annotations]": [ + { + "checksum": "19c6d906cb8617cf9d2b5d484e09caf8", + "size": 7570, + "uri": "https://{canondata_backend}/212715/49b4751c22bd43fa7057cc92ae5cbedb40404f40/resource.tar.gz#test.test_Annotations_/results.txt" + } + ], + "test.test[BytesDecodeModeStrict]": [ + { + "checksum": "f8534cff0843faaf876c41e0875dcf05", + "size": 3120, + "uri": "https://{canondata_backend}/1775319/4c4fed0942b33bcc70d44f7dd2972a8e05c6db97/resource.tar.gz#test.test_BytesDecodeModeStrict_/results.txt" + } + ], + "test.test[Cleanup]": [ + { + "checksum": "036e77892757e48fa3fb319ed324b019", + "size": 954, + "uri": "https://{canondata_backend}/1871182/9909e0b25b15bb1f21d5def23fb072d64c82f07e/resource.tar.gz#test.test_Cleanup_/results.txt" + } + ], + "test.test[CustomYsonConverter]": [ + { + "checksum": "7716204e544d2fcb9313412c3919e66d", + "size": 1625, + "uri": "https://{canondata_backend}/1130705/576535b56a4e74992911431865e5edd0f7d55520/resource.tar.gz#test.test_CustomYsonConverter_/results.txt" + } + ], + "test.test[Data]": [ + { + "checksum": "f40e83806b294be420681fdfbf2133e8", + "size": 25268, + "uri": "https://{canondata_backend}/1031349/7065a0985fe0cd26a754a5bee7a4c808836a4692/resource.tar.gz#test.test_Data_/results.txt" + } + ], + "test.test[Excepthook]": [ + { + "uri": "file://test.test_Excepthook_/extracted" + } + ], + "test.test[GreedyInputContainers]": [ + { + "checksum": "02a619c86f180e8a4c536087d64bab6d", + "size": 1328, + "uri": "https://{canondata_backend}/995452/085d43bbd16f44afc51d6cafed42465a3d20215c/resource.tar.gz#test.test_GreedyInputContainers_/results.txt" + } + ], + "test.test[OptionalNested]": [ + { + "uri": "file://test.test_OptionalNested_/extracted" + } + ], + "test.test[Switch]": [ + { + "checksum": "e60320702512bdcecd5c663f387ee939", + "size": 9172, + "uri": "https://{canondata_backend}/1130705/493ee46b1e8f2e848ab928f97913d332cb4fffc7/resource.tar.gz#test.test_Switch_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted new file mode 100644 index 0000000000..b260fe7616 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted @@ -0,0 +1,15 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:44:1: Fatal: Execution of node: Result + SELECT $udf(@@{"abc":1}@@); + ^ + <tmp_path>/program.sql:<main>:40:17: Fatal: Failed to execute: +CUSTOM_EXCEPTHOOK +True +Traceback (most recent call last): + File "embedded:f", line 31, in f +Exception + + + $udf = Python3::f(Callable<(String)->String>, $script); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted new file mode 100644 index 0000000000..413eb2f4ec --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted @@ -0,0 +1,14 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:12:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem + SELECT $optOptList("42"); + ^ + <tmp_path>/program.sql:<main>:12:8: Error: At function: Apply + SELECT $optOptList("42"); + ^ + <tmp_path>/program.sql:<main>:2:24: Error: At function: ScriptUdf + $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@ + ^ + <tmp_path>/program.sql:<main>:2:24: Error: Nested optionals are unsupported in script UDF + $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@ + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql new file mode 100644 index 0000000000..3f845322e2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql @@ -0,0 +1,67 @@ +--sanitizer ignore memory +$script = @@ +from yql.typing import * + +def primitive(a0:Bool,a1:Int8,a2:Uint8,a3:Int16,a4:Uint16,a5:Int32,a6:Uint32, + a7:Int64,a8:Uint64,a9:Float,a10:Double,a11:String,a12:Utf8,a13:Yson,a14:Json, + a15:Uuid,a16:Date,a17:Datetime,a18:Timestamp,a19:Interval,a20:TzDate, + a21:TzDatetime,a22:TzTimestamp)->Decimal(10,3): + pass + +def singletons(a0:Void,a1:Null,a2:EmptyStruct,a3:EmptyTuple)->Void: + pass + +def containers(a0:Optional[Int32],a1:List[List[Bool]],a2:Stream[String],a3:Dict[Int32,String], + a4:Tuple[Int32,String],a5:Tuple[Int32],a6:Struct["a":Int32,"b":String],a7:Struct["a":Int32], + a8:Variant[Int32,String],a9:Variant[Int32],a10:Variant["a":Int32,"b":String],a11:Variant["a":Int32])->List[String]: + pass + +def special(a0:Resource["Python3"],a1:Tagged[Int32,"foo"])->Void: + pass + +def c0()->Callable[0,Int32]: pass +def c1()->Callable[1,Int32,Optional[List[Int32]]]: pass +def c2()->Callable[1,Int32,Int32,Optional[List[Int32]]]: pass +def c3()->Callable[0,Int32,"a":Int32:{AutoMap}]: pass +def c4()->Callable[0,Int32,"":Int32:{AutoMap}]: pass +def c5()->Callable[0,Int32,"":Int32:{}]: pass +def c6()->Callable[0,Int32,"foo":Int32]: pass + +def f0(x:Optional[Int32]=None,y:Optional[Int32]=None)->Void: pass +def f1(x:Optional[Int32],y:Optional[Int32]=None)->Void: pass +def f2(x:Optional[Int32],y:Optional[Int32])->Void: pass +def f3(x:slice("",Int32,{AutoMap}), y:slice("name",String))->Void: pass + +@@; + +$t = ($name)->{ + return FormatType(EvaluateType( + ParseTypeHandle(Core::PythonFuncSignature(AsAtom("Python3"), $script, $name)))); +}; + +-- Singletons + +select $t("primitive"); +select $t("singletons"); + +-- Containers & Special + +select $t("containers"); +select $t("special"); + +-- Callable +select + $t("c0") as c0, + $t("c1") as c1, + $t("c2") as c2, + $t("c3") as c3, + $t("c4") as c4, + $t("c5") as c5, + $t("c6") as c6; + +-- Top level +select + $t("f0") as f0, + $t("f1") as f1, + $t("f2") as f2, + $t("f3") as f3; diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql new file mode 100644 index 0000000000..e540dbf38a --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql @@ -0,0 +1,11 @@ +--sanitizer ignore memory +$script = @@ +def f(string, uuid, yson): + return (string, str(type(string)), uuid, str(type(uuid)), yson, str(type(yson))) + +f._yql_bytes_decode_mode = 'strict' +@@; + +$udf = Python3::f(Callable<(String?, UUid?, Yson?)->Tuple<String?, String, UUid?, String, Yson?, String>>, $script); + +SELECT $udf("string", UUid('1812bc18-5838-4cde-98aa-287302697b90'), cast(@@{"abc"=1}@@ as yson)); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in new file mode 100644 index 0000000000..d5ddcb4083 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in @@ -0,0 +1 @@ +{"key"="1";"subkey"="2";"value"="3"}; diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql new file mode 100644 index 0000000000..9db9840292 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql @@ -0,0 +1,12 @@ +--sanitizer ignore memory +$udfScript = @@ +import yql +def mapper(records): + yql.g = records + for record in records: + yield dict(yid=b"bla", rnd=0.) +@@; + +$udf = Python3::mapper(Callable<(Stream<Struct<key:String, subkey:String, value:String>>)->Stream<Struct<yid:String, rnd:Double>>>, $udfScript); + +PROCESS Input using $udf(TableRows()); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql new file mode 100644 index 0000000000..43dd00cb3d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql @@ -0,0 +1,20 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$script = @@ +import json + +def yloads(z): + return json.loads(str(z, 'latin-1').replace("=",":")) + +def ydumps(z): + return bytes(json.dumps(z).replace(":","="), 'latin-1') + +def f(s): + return (s.get("abc",0),s) + +f._yql_convert_yson = (yloads,ydumps) +@@; + +$udf = Python3::f(Callable<(Yson?)->Tuple<Int64, Yson?>>, $script); + +SELECT $udf(cast(@@{"abc"=1}@@ as yson)); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql new file mode 100644 index 0000000000..3f7de07d5c --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql @@ -0,0 +1,61 @@ +--sanitizer ignore memory +$data = AsTuple( + Bool("true"), + Bool("FalsE"), + Int8("-128"), + Int8("127"), + Uint8("0"), + Uint8("255"), + Int16("-32768"), + Int16("32767"), + Uint16("0"), + Uint16("65535"), + Int32("-2147483648"), + Int32("2147483647"), + Uint32("0"), + Uint32("4294967295"), + Int64("-9223372036854775808"), + Int64("9223372036854775807"), + Uint64("0"), + Uint64("18446744073709551615"), + Float("0"), + Float("1"), + Float("-1e30"), + Float("-inf"), + Float("+inf"), + Float("nan"), + Double("0"), + Double("1"), + Double("-1e300"), + Double("-inf"), + Double("+inf"), + Double("nan"), + String("foo\xffbar"), + Utf8("привет"), + Yson("<a=1>[3;%false]"), + Json(@@{"a":1,"b":null}@@), + Date("2000-01-01"), + Datetime("2000-01-01T01:02:03Z"), + Timestamp("2000-01-01T01:02:03.4Z"), + Interval("P1DT12H"), + TzDate("2000-01-01,Europe/Moscow"), + TzDatetime("2000-01-01T01:02:03,Europe/Moscow"), + TzTimestamp("2000-01-01T01:02:03.4,Europe/Moscow"), + Uuid('31323334-3536-3738-393a-3b3c3d3e3f40'), + Decimal('3.1415926535897932384626433832795029', 35, 34), + Decimal('-.00000000000000000000000000000000001', 35, 35), + Decimal('NAN', 10, 5), + Decimal('-iNf', 1, 0) +); + +$type = CallableType(0, + TypeOf($data), + TypeOf($data) +); + +$f = Python3::f($type, @@ +def f(x): + return x +@@); + +select $data, $f($data); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg new file mode 100644 index 0000000000..5dae597903 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg @@ -0,0 +1 @@ +xfail diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql new file mode 100644 index 0000000000..100086c9e4 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql @@ -0,0 +1,23 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$script = @@ +import sys +import traceback + + +def excepthook(*args): + print('CUSTOM_EXCEPTHOOK', file=sys.stderr) + print(all(_ for _ in args), file=sys.stderr) + print("".join(traceback.format_exception(*args)), file=sys.stderr) + + +sys.excepthook = excepthook + + +def f(string): + raise Exception() +@@; + +$udf = Python3::f(Callable<(String)->String>, $script); + +SELECT $udf(@@{"abc":1}@@); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql new file mode 100644 index 0000000000..a43af8791d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql @@ -0,0 +1,19 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$s = @@ +def list_func(lst): + return lst.count(1) +list_func._yql_lazy_input = False +@@; + +$u = Python3::list_func(Callable<(List<Int32>)->Int32>, $s); +select $u(AsList(1,2,3)); + +$s = @@ +def dict_func(dict): + return list(dict.values()).count(b"b") +dict_func._yql_lazy_input = False +@@; + +$v = Python3::dict_func(Callable<(Dict<Int32, String>)->Int32>, $s); +select $v(AsDict(AsTuple(1,"a"),AsTuple(2,"b"))); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg new file mode 100644 index 0000000000..5dae597903 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg @@ -0,0 +1 @@ +xfail diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql new file mode 100644 index 0000000000..33396f036a --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql @@ -0,0 +1,7 @@ +--sanitizer ignore memory +$optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@ +def opt_opt_list(in_str): + return [in_str] if len(in_str) % 2 == 0 else None +@@); + +SELECT $optOptList("42"); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql new file mode 100644 index 0000000000..c2576a72e4 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql @@ -0,0 +1,92 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$x = AsList(1,2,3); + +$s1 = @@ +def f(input): + for x in input: + yield x +@@; + +$s2 = @@ +class Iter: + def __init__(self, input): + self.input = input + + def __next__(self): + return next(self.input) +@@; + +$s3 = @@ +class CallableIter: + def __init__(self, input): + self.input = input + + def __call__(self): + def f(input): + for x in input: + yield x + + return f(self.input) +@@; + +$s4 = @@ +class Iterable: + def __init__(self, input): + self.input = input + + def __iter__(self): + return iter(self.input) +@@; + +$f1 = Python3::f(Callable<(Stream<Int32>)->Stream<Int32>>, $s1); + +$f2 = Python3::Iter(Callable<(Stream<Int32>)->Stream<Int32>>, $s2); + +$f3 = Python3::CallableIter(Callable<(Stream<Int32>)->Stream<Int32>>, $s3); + +$f4 = Python3::Iterable(Callable<(Stream<Int32>)->Stream<Int32>>, $s4); + +$g = ($stream)->{ + return $stream; +}; + +select Yql::Collect($g(Yql::Iterator($x, Yql::DependsOn("A1")))); + +select Yql::Collect($f1(Yql::Iterator($x, Yql::DependsOn("A2")))); + +select Yql::Collect($f2(Yql::Iterator($x, Yql::DependsOn("A3")))); + +select Yql::Collect($f3(Yql::Iterator($x, Yql::DependsOn("A4")))); + +select Yql::Collect($f4(Yql::Iterator($x, Yql::DependsOn("A5")))); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B1")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $g)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B2")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f1)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B3")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f2)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B4")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f3)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B5")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f4)); diff --git a/yql/essentials/udfs/common/python/python3_small/test/ya.make b/yql/essentials/udfs/common/python/python3_small/test/ya.make new file mode 100644 index 0000000000..ac03d94668 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/ya.make @@ -0,0 +1,10 @@ +YQL_UDF_TEST_CONTRIB() + +TIMEOUT(300) +SIZE(MEDIUM) + +DEPENDS( + yql/essentials/udfs/common/python/python3_small +) + +END() diff --git a/yql/essentials/udfs/common/python/python3_small/ya.make b/yql/essentials/udfs/common/python/python3_small/ya.make new file mode 100644 index 0000000000..f815fa8d75 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(python3_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME Python3 + RESOURCE_NAME Python3 +) + +PEERDIR( + yql/essentials/public/udf +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h new file mode 100644 index 0000000000..a4e393b486 --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h @@ -0,0 +1,111 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_registrator.h> +#include <yql/essentials/public/udf/udf_terminator.h> +#include <yql/essentials/udfs/common/python/bindings/py_ptr.h> +#include <yql/essentials/udfs/common/python/bindings/py_callable.h> +#include <yql/essentials/udfs/common/python/bindings/py_cast.h> +#include <yql/essentials/udfs/common/python/bindings/py_errors.h> +#include <yql/essentials/udfs/common/python/bindings/py_gil.h> +#include <yql/essentials/udfs/common/python/bindings/py_utils.h> +#include <yql/essentials/udfs/common/python/bindings/py_yql_module.h> + +#include <util/generic/yexception.h> +#include <util/stream/str.h> +#include <util/stream/printf.h> +#include <util/string/builder.h> +#include <util/string/cast.h> + +using namespace NYql::NUdf; +using namespace NPython; + +////////////////////////////////////////////////////////////////////////////// +// TPythonFunctionFactory +////////////////////////////////////////////////////////////////////////////// +class TPythonFunctionFactory: public TBoxedValue +{ +public: + TPythonFunctionFactory( + const TStringRef& name, + const TStringRef& tag, + const TType* functionType, + ITypeInfoHelper::TPtr&& helper, + const NYql::NUdf::TSourcePosition& pos) + : Ctx(new TPyContext(helper, tag, pos)) + , FunctionName(name) + , FunctionType_(functionType) + { + } + + ~TPythonFunctionFactory() { + Ctx->Cleanup(); + PyCleanup(); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override + { + TPyCastContext::TPtr castCtx = MakeIntrusive<TPyCastContext>(valueBuilder, Ctx); + + // for get propper c-compatible null-terminating string + TString source(args[0].AsStringRef()); + + TPyGilLocker lock; + TPyObjectPtr module = CompileModule(FunctionName, source); + if (!module) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to compile module: " << GetLastErrorAsString()).data()); + } + + TPyObjectPtr function(PyObject_GetAttrString(module.Get(), FunctionName.data())); + if (!function) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to find entry point: " << GetLastErrorAsString()).data()); + } + + if (!PyCallable_Check(function.Get())) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Entry point is not a callable").data()); + } + + try { + SetupCallableSettings(castCtx, function.Get()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to setup callable settings: " + << e.what()).data()); + } + return FromPyCallable(castCtx, FunctionType_, function.Release()); + } + + static TPyObjectPtr CompileModule(const TString& name, const TString& source) { + unsigned int moduleNum = AtomicCounter++; + TString filename(TStringBuf("embedded:")); + filename += name; + + TPyObjectPtr module, code; + if (HasEncodingCookie(source)) { + code.ResetSteal(Py_CompileString(source.data(), filename.data(), Py_file_input)); + } else { + PyCompilerFlags cflags; + cflags.cf_flags = PyCF_SOURCE_IS_UTF8; + + code.ResetSteal(Py_CompileStringFlags( + source.data(), filename.data(), Py_file_input, &cflags)); + } + + if (code) { + TString nameWithNum = name + ToString(moduleNum); + char* moduleName = const_cast<char*>(nameWithNum.data()); + module.ResetSteal(PyImport_ExecCodeModule(moduleName, code.Get())); + } + + return module; + } + + const TPyContext::TPtr Ctx; + const TString FunctionName; + const TType* FunctionType_; + inline static std::atomic_uint AtomicCounter = 0; +}; diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp new file mode 100644 index 0000000000..b1739a1775 --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp @@ -0,0 +1,232 @@ +#include "python_udf.h" +#include "python_function_factory.h" + +#include <yql/essentials/public/udf/udf_version.h> +#include <yql/essentials/udfs/common/python/bindings/py_utils.h> + +#include <util/generic/vector.h> +#include <util/system/execpath.h> + +namespace { + +#if PY_MAJOR_VERSION >= 3 +#define PYTHON_PROGRAMM_NAME L"YQL::Python3" +#else +#define PYTHON_PROGRAMM_NAME "YQL::Python2" +#endif + +int AddToPythonPath(const TVector<TStringBuf>& pathVals) +{ + char pathVar[] = "path"; // PySys_{Get,Set}Object take a non-const char* arg + + TPyObjectPtr sysPath(PySys_GetObject(pathVar), TPyObjectPtr::ADD_REF); + if (!sysPath) return -1; + + for (const auto& val: pathVals) { + TPyObjectPtr pyStr = PyRepr(val.data()); + int rc = PyList_Append(sysPath.Get(), pyStr.Get()); + if (rc != 0) { + return rc; + } + } + + return PySys_SetObject(pathVar, sysPath.Get()); +} + +void InitArcadiaPythonRuntime() +{ + // Arcadia static python import hook resides in __res module + // It modifies sys.meta_path upon import + + TPyObjectPtr mod(PyImport_ImportModule("__res")); + Y_ABORT_UNLESS(mod, "Can't import arcadia python runtime"); +} + +////////////////////////////////////////////////////////////////////////////// +// TPythonModule +////////////////////////////////////////////////////////////////////////////// +class TPythonModule: public IUdfModule +{ +public: + TPythonModule(const TString& resourceName, EPythonFlavor pythonFlavor, bool standalone = true) + : ResourceName(resourceName), Standalone(standalone) + { + if (Standalone) { + Py_SetProgramName(PYTHON_PROGRAMM_NAME); + PrepareYqlModule(); + Py_Initialize(); + } + + InitYqlModule(pythonFlavor, standalone); + + const auto rc = PyRun_SimpleString(R"( +# numpy on import may find installed openblas library and load it, +# which in turn causes it to start CPUCOUNT threads +# with approx. 40Mb memory reserved for each thread; +# +# See more detailed explanation here: https://st.yandex-team.ru/STATLIBS-1715#5bfc68ecbbc039001cec572a +# +# Thus, we reduce negative effects as much as possible +import os +os.environ['OPENBLAS_NUM_THREADS'] = '1' + + +# Following part allows us later to format tracebacks via sys.excepthook +# in thread-safe manner +import sys +import threading +if sys.version_info >= (3, 0): + from io import StringIO, TextIOWrapper as SysStderrType +else: + from cStringIO import StringIO + SysStderrType = file + +class StderrLocal(threading.local): + + def __init__(self): + self.is_real_mode = True + self.buffer = StringIO() + + +class StderrProxy(object): + def __init__(self, stderr): + self._stderr = stderr + self._tls = StderrLocal() + + def _toggle_real_mode(self): + self._tls.is_real_mode = not self._tls.is_real_mode + if not self._tls.is_real_mode: + self._tls.buffer.clear() + + def _get_value(self): + assert not self._tls.is_real_mode + return self._tls.buffer.getvalue() + + def __getattr__(self, attr): + target = self._stderr + if not self._tls.is_real_mode: + target = self._tls.buffer + + return getattr(target, attr) + +if isinstance(sys.stderr, SysStderrType): + sys.stderr = StderrProxy(sys.stderr) +)"); + Y_ABORT_UNLESS(rc >= 0, "Can't setup module"); + + if (pythonFlavor == EPythonFlavor::Arcadia) { + InitArcadiaPythonRuntime(); + } + +#ifndef _win_ + if (Standalone) { + TVector<TStringBuf> paths; + if (pythonFlavor == EPythonFlavor::System) { + paths.push_back(TStringBuf("/usr/lib/python2.7/dist-packages")); + } + paths.push_back(TStringBuf(".")); + const auto r = AddToPythonPath(paths); + Y_ABORT_UNLESS(r >= 0, "Can't add dist-packages into sys.path"); + } +#endif + + char executableVar[] = "executable"; // PySys_{Get,Set}Object take a non-const char* arg + TPyObjectPtr pyExecutableStr = PyRepr(GetExecPath().data()); + Y_ABORT_UNLESS(PySys_SetObject(executableVar, pyExecutableStr.Get()) >= 0, "Can't set sys.executable"); + + if (Standalone) { + PyEval_InitThreads(); + MainThreadState_ = PyEval_SaveThread(); + } + } + + ~TPythonModule() { + if (Standalone) { + PyEval_RestoreThread(MainThreadState_); + Py_Finalize(); + } + } + + void CleanupOnTerminate() const final { + PyCleanup(); + } + + void GetAllFunctions(IFunctionsSink&) const final {} + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + if (flags & TFlags::TypesOnly) { + return; + } + + try { + auto typeHelper = builder.TypeInfoHelper(); + if (ETypeKind::Callable != typeHelper->GetTypeKind(userType)) { + return builder.SetError(TStringRef::Of("Expected callable type")); + } + + const auto pos = builder.GetSourcePosition(); + builder.Implementation(new TPythonFunctionFactory(name, ResourceName, userType, std::move(typeHelper), pos)); + } catch (const yexception& e) { + builder.SetError(TStringBuf(e.what())); + } + } + +private: + TString ResourceName; + bool Standalone; + PyThreadState* MainThreadState_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TStubModule +////////////////////////////////////////////////////////////////////////////// +class TStubModule: public IUdfModule { + void GetAllFunctions(IFunctionsSink&) const final {} + + void BuildFunctionTypeInfo( + const TStringRef& /*name*/, + TType* /*userType*/, + const TStringRef& /*typeConfig*/, + ui32 flags, + IFunctionTypeInfoBuilder& /*builder*/) const final + { + Y_DEBUG_ABORT_UNLESS(flags & TFlags::TypesOnly, + "in stub module this function can be called only for types loading"); + } + + void CleanupOnTerminate() const final {} +}; + +} // namespace + +void NKikimr::NUdf::RegisterYqlPythonUdf( + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor) +{ + if (flags & IRegistrator::TFlags::TypesOnly) { + registrator.AddModule(moduleName, new TStubModule); + } else { + registrator.AddModule( + moduleName, + NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true) + ); + } +} + +TUniquePtr<NKikimr::NUdf::IUdfModule> NKikimr::NUdf::GetYqlPythonUdfModule( + TStringBuf resourceName, NKikimr::NUdf::EPythonFlavor pythonFlavor, + bool standalone +) { + return new TPythonModule(TString(resourceName), pythonFlavor, standalone); +} diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h new file mode 100644 index 0000000000..16d7da096d --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h @@ -0,0 +1,26 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_registrator.h> + +namespace NYql { +namespace NUdf { + +enum class EPythonFlavor { + System, + Arcadia, +}; + +void RegisterYqlPythonUdf( + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor); + +TUniquePtr<IUdfModule> GetYqlPythonUdfModule( + TStringBuf resourceName, + EPythonFlavor pythonFlavor, + bool standalone); + +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports new file mode 100644 index 0000000000..2ffd6f54b5 --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports @@ -0,0 +1,5 @@ +C Register +C AbiVersion +C RunPython +C BindSymbols +C SetBackTraceCallback diff --git a/yql/essentials/udfs/common/python/python_udf/ya.make b/yql/essentials/udfs/common/python/python_udf/ya.make new file mode 100644 index 0000000000..9a2090665a --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/ya.make @@ -0,0 +1,20 @@ +PY23_NATIVE_LIBRARY() + +YQL_ABI_VERSION(2 27 0) + +SRCS( + python_udf.cpp +) + +PEERDIR( + yql/essentials/public/udf + yql/essentials/udfs/common/python/bindings +) + +CFLAGS( + -DDISABLE_PYDEBUG +) + +NO_COMPILER_WARNINGS() + +END() diff --git a/yql/essentials/udfs/common/python/system_python/README.MD b/yql/essentials/udfs/common/python/system_python/README.MD new file mode 100644 index 0000000000..16d46fd51d --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/README.MD @@ -0,0 +1,7 @@ +python3_N folders here are mirrors of python3_small, adjusted for system python (Name Python3 -> SystemPython3_N, LDFLAGS(-lpython3.N)) + +They are supposed to be built with local python: `ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.N -DPYTHON_CONFIG=python3.N-config python3.N` + +One way to get all pythons on the same machine is `sudo add-apt-repository ppa:deadsnakes/ppa` and `sudo apt install python3.N-dev` + +Use build_system_python_udfs.sh to build all python udfs with system pythons(local pythons) diff --git a/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh new file mode 100755 index 0000000000..8dd2245230 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -eux +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.8 -DPYTHON_CONFIG=python3.8-config python3_8 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.9 -DPYTHON_CONFIG=python3.9-config python3_9 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.10 -DPYTHON_CONFIG=python3.10-config python3_10 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.11 -DPYTHON_CONFIG=python3.11-config python3_11 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.12 -DPYTHON_CONFIG=python3.12-config python3_12 diff --git a/yql/essentials/udfs/common/python/system_python/python3_10/ya.make b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make new file mode 100644 index 0000000000..12068a33a1 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_10_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_10 + RESOURCE_NAME SystemPython3_10 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.10") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_11/ya.make b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make new file mode 100644 index 0000000000..483432b9b9 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_11_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_11 + RESOURCE_NAME SystemPython3_11 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.11") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_12/ya.make b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make new file mode 100644 index 0000000000..8220fda0ea --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_12_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_12 + RESOURCE_NAME SystemPython3_12 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.12") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_8/ya.make b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make new file mode 100644 index 0000000000..df447bacb4 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_8_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_8 + RESOURCE_NAME SystemPython3_8 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.8") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_9/ya.make b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make new file mode 100644 index 0000000000..ea3e5d849e --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_9_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_9 + RESOURCE_NAME SystemPython3_9 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.9") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/ya.make b/yql/essentials/udfs/common/python/system_python/ya.make new file mode 100644 index 0000000000..3afc7796bd --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/ya.make @@ -0,0 +1,7 @@ +RECURSE( + python3_8 + python3_9 + python3_10 + python3_11 + python3_12 +) diff --git a/yql/essentials/udfs/common/python/ya.make b/yql/essentials/udfs/common/python/ya.make new file mode 100644 index 0000000000..bb6a4c8d5b --- /dev/null +++ b/yql/essentials/udfs/common/python/ya.make @@ -0,0 +1,10 @@ +# This module should not be exported under CMake since it requires Python build +NO_BUILD_IF(STRICT EXPORT_CMAKE) + +RECURSE( + bindings + main_py3 + python3_small + python_udf + system_python +) |