aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common/python
diff options
context:
space:
mode:
authorimunkin <imunkin@yandex-team.com>2024-11-08 10:00:23 +0300
committerimunkin <imunkin@yandex-team.com>2024-11-08 10:12:13 +0300
commita784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch)
tree05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs/common/python
parentd70137a7b530ccaa52834274913bbb5a3d1ca06e (diff)
downloadydb-a784a2f943d6e15caa6241e2e96d80aac6dbf375.tar.gz
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories: * clickhouse/client * datetime * knn * roaring commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs/common/python')
-rw-r--r--yql/essentials/udfs/common/python/bindings/py27_backports.c91
-rw-r--r--yql/essentials/udfs/common/python/bindings/py27_backports.h26
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_callable.cpp423
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_callable.h22
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp87
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast.cpp955
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast.h45
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp90
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_ctx.h120
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_decimal.cpp59
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_decimal.h12
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp122
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_dict.cpp683
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_dict.h50
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp722
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_errors.cpp72
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_errors.h24
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_gil.h37
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_iterator.cpp280
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_iterator.h23
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp705
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp382
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_list.cpp1116
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_list.h33
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_list_ut.cpp1025
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_number_ut.cpp359
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp56
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_ptr.h69
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_resource.cpp116
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_resource.h20
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp81
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_stream.cpp343
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_stream.h24
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp208
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_string_ut.cpp98
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_struct.cpp188
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_struct.h17
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp307
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tuple.cpp61
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tuple.h17
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp108
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp85
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_utils.cpp89
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_utils.h28
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp37
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_variant.cpp97
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_variant.h17
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp101
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_void.cpp117
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_void.h21
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_void_ut.cpp37
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_yql_module.cpp251
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_yql_module.h11
-rw-r--r--yql/essentials/udfs/common/python/bindings/typing.py188
-rw-r--r--yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h227
-rw-r--r--yql/essentials/udfs/common/python/bindings/ut3/ya.make37
-rw-r--r--yql/essentials/udfs/common/python/bindings/ya.make54
-rw-r--r--yql/essentials/udfs/common/python/main_py3/__main__.pyx50
-rw-r--r--yql/essentials/udfs/common/python/main_py3/include/main.h12
-rw-r--r--yql/essentials/udfs/common/python/main_py3/main.cpp9
-rw-r--r--yql/essentials/udfs/common/python/main_py3/ya.make13
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/canondata/result.json61
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted15
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted14
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql67
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql11
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in1
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql12
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql20
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Data.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql61
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg1
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql23
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql19
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg1
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql7
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in0
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql92
-rw-r--r--yql/essentials/udfs/common/python/python3_small/test/ya.make10
-rw-r--r--yql/essentials/udfs/common/python/python3_small/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_function_factory.h111
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_udf.cpp232
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_udf.h26
-rw-r--r--yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports5
-rw-r--r--yql/essentials/udfs/common/python/python_udf/ya.make20
-rw-r--r--yql/essentials/udfs/common/python/system_python/README.MD7
-rwxr-xr-xyql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh7
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_10/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_11/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_12/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_8/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/python3_9/ya.make16
-rw-r--r--yql/essentials/udfs/common/python/system_python/ya.make7
-rw-r--r--yql/essentials/udfs/common/python/ya.make10
98 files changed, 11693 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.c b/yql/essentials/udfs/common/python/bindings/py27_backports.c
new file mode 100644
index 0000000000..cf21a97cef
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py27_backports.c
@@ -0,0 +1,91 @@
+#include "py27_backports.h"
+
+
+// Provide implementations from python 2.7.15 as backports
+
+int
+_PySlice_Unpack(PyObject *_r,
+ Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
+{
+ PySliceObject *r = (PySliceObject *)_r;
+ /* this is harder to get right than you might think */
+
+ assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX);
+
+ if (r->step == Py_None) {
+ *step = 1;
+ }
+ else {
+ if (!_PyEval_SliceIndex(r->step, step)) return -1;
+ if (*step == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "slice step cannot be zero");
+ return -1;
+ }
+ /* Here *step might be -PY_SSIZE_T_MAX-1; in this case we replace it
+ * with -PY_SSIZE_T_MAX. This doesn't affect the semantics, and it
+ * guards against later undefined behaviour resulting from code that
+ * does "step = -step" as part of a slice reversal.
+ */
+ if (*step < -PY_SSIZE_T_MAX)
+ *step = -PY_SSIZE_T_MAX;
+ }
+
+ if (r->start == Py_None) {
+ *start = *step < 0 ? PY_SSIZE_T_MAX : 0;
+ }
+ else {
+ if (!_PyEval_SliceIndex(r->start, start)) return -1;
+ }
+
+ if (r->stop == Py_None) {
+ *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX;
+ }
+ else {
+ if (!_PyEval_SliceIndex(r->stop, stop)) return -1;
+ }
+
+ return 0;
+}
+
+Py_ssize_t
+_PySlice_AdjustIndices(Py_ssize_t length,
+ Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step)
+{
+ /* this is harder to get right than you might think */
+
+ assert(step != 0);
+ assert(step >= -PY_SSIZE_T_MAX);
+
+ if (*start < 0) {
+ *start += length;
+ if (*start < 0) {
+ *start = (step < 0) ? -1 : 0;
+ }
+ }
+ else if (*start >= length) {
+ *start = (step < 0) ? length - 1 : length;
+ }
+
+ if (*stop < 0) {
+ *stop += length;
+ if (*stop < 0) {
+ *stop = (step < 0) ? -1 : 0;
+ }
+ }
+ else if (*stop >= length) {
+ *stop = (step < 0) ? length - 1 : length;
+ }
+
+ if (step < 0) {
+ if (*stop < *start) {
+ return (*start - *stop - 1) / (-step) + 1;
+ }
+ }
+ else {
+ if (*start < *stop) {
+ return (*stop - *start - 1) / step + 1;
+ }
+ }
+ return 0;
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.h b/yql/essentials/udfs/common/python/bindings/py27_backports.h
new file mode 100644
index 0000000000..766af6a76f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py27_backports.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "Python.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Declare functions which are to be backported
+// (see details about need for backports in ya.make)
+
+int _PySlice_Unpack(PyObject *slice,
+ Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step);
+
+Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length,
+ Py_ssize_t *start, Py_ssize_t *stop,
+ Py_ssize_t step);
+
+// Declare py23 compatible names
+
+#define PySlice_Unpack _PySlice_Unpack
+#define PySlice_AdjustIndices _PySlice_AdjustIndices
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.cpp b/yql/essentials/udfs/common/python/bindings/py_callable.cpp
new file mode 100644
index 0000000000..c60403bdca
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_callable.cpp
@@ -0,0 +1,423 @@
+#include "py_callable.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_stream.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyCallableObject
+//////////////////////////////////////////////////////////////////////////////
+struct TPyCallableObject
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* Type;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ NUdf::TCallableTypeInspector Inspector;
+
+ TPyCallableObject(const TPyCastContext::TPtr& castCtx, const NUdf::TType* type)
+ : CastCtx(castCtx)
+ , Type(type)
+ , Inspector(*castCtx->PyCtx->TypeInfoHelper, type)
+ {}
+};
+
+inline TPyCallableObject* CastToCallable(PyObject* o)
+{
+ return reinterpret_cast<TPyCallableObject*>(o);
+}
+
+void CallableDealloc(PyObject* self)
+{
+ delete CastToCallable(self);
+}
+
+PyObject* CallableRepr(PyObject*)
+{
+ // TODO: print callable signature
+ return PyRepr("<yql.TCallable>").Release();
+}
+
+PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ Y_UNUSED(kwargs);
+
+ PY_TRY {
+ TPyCallableObject* callable = CastToCallable(self);
+ auto callableType = callable->Type;
+ auto valueBuilder = callable->CastCtx->ValueBuilder;
+ const auto& inspector = callable->Inspector;
+
+ TSmallVec<NUdf::TUnboxedValue> cArgs;
+ cArgs.resize(inspector.GetArgsCount());
+ FromPyArgs(callable->CastCtx, callableType, args, cArgs.data(), inspector);
+
+ NUdf::TUnboxedValue result;
+ {
+ TPyGilUnlocker unlock;
+ result = NUdf::TBoxedValueAccessor::Run(*callable->Value.Get(), valueBuilder, cArgs.data());
+ }
+
+ return ToPyObject(callable->CastCtx, inspector.GetReturnType(), result).Release();
+ } PY_CATCH(nullptr)
+}
+
+}
+
+PyTypeObject PyCallableType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TCallable"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyCallableObject)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , CallableDealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , CallableRepr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , CallableCall),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , 0),
+ INIT_MEMBER(tp_doc , "yql.TCallable object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , nullptr),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyCallable
+//////////////////////////////////////////////////////////////////////////////
+class TPyCallable: public NUdf::TBoxedValue
+{
+public:
+ TPyCallable(
+ PyObject* function,
+ const NUdf::TType* functionType,
+ const TPyCastContext::TPtr& castCtx)
+ : Function_(function, TPyObjectPtr::ADD_REF)
+ , FunctionType_(functionType)
+ , CastCtx_(castCtx)
+ , Inspector_(*castCtx->PyCtx->TypeInfoHelper, functionType)
+ {
+ // keep ownership of function closure if any
+ if (PyFunction_Check(function)) {
+ PyObject* closure = PyFunction_GetClosure(function);
+ if (closure) {
+ Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF);
+ }
+ }
+ }
+
+ ~TPyCallable() {
+ TPyGilLocker lock;
+ Closure_.Reset();
+ Function_.Reset();
+ CastCtx_.Reset();
+ }
+
+private:
+ NUdf::TUnboxedValue Run(
+ const NUdf::IValueBuilder*,
+ const NUdf::TUnboxedValuePod* args) const final
+ {
+ TPyGilLocker lock;
+ try {
+ TPyObjectPtr pyArgs = ToPyArgs(CastCtx_, FunctionType_, args, Inspector_);
+ TPyObjectPtr resultObj =
+ PyObject_CallObject(Function_.Get(), pyArgs.Get());
+ if (!resultObj) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+
+ auto returnType = Inspector_.GetReturnType();
+ if (CastCtx_->PyCtx->TypeInfoHelper->GetTypeKind(returnType) == NUdf::ETypeKind::Stream) {
+ return FromPyStream(CastCtx_, returnType, resultObj, Function_, Closure_, pyArgs);
+ }
+
+ return FromPyObject(CastCtx_, returnType, resultObj.Get());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" << e.what()).data());
+ }
+ }
+
+ TPyObjectPtr Function_;
+ TPyObjectPtr Closure_;
+ const NUdf::TType* FunctionType_;
+ TPyCastContext::TPtr CastCtx_;
+ NUdf::TCallableTypeInspector Inspector_;
+};
+
+
+TPyObjectPtr ToPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ TPyCallableObject* callable = new TPyCallableObject(castCtx, type);
+ PyObject_INIT(callable, &PyCallableType);
+
+ callable->Value.Set(castCtx->PyCtx, value.AsBoxed());
+
+ return reinterpret_cast<PyObject*>(callable);
+}
+
+NUdf::TUnboxedValue FromPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ PyObject* value)
+{
+ return NUdf::TUnboxedValuePod(new TPyCallable(value, type, castCtx));
+}
+
+TMaybe<TPyObjectPtr> GetOptionalAttribute(PyObject* value, const char* attrName) {
+ if (TPyObjectPtr attr = PyObject_GetAttrString(value, attrName)) {
+ return attr;
+ } else {
+ if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_Clear();
+ return Nothing();
+ } else {
+ throw yexception() << "Cannot get attribute '" << attrName << "', error: " << GetLastErrorAsString();
+ }
+ }
+}
+
+
+struct TPySecureParam
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+
+ TPySecureParam(const TPyCastContext::TPtr& castCtx) : CastCtx(castCtx) {}
+};
+
+inline TPySecureParam* CastToSecureParam(PyObject* o)
+{
+ return reinterpret_cast<TPySecureParam*>(o);
+}
+
+void SecureParamDealloc(PyObject* self)
+{
+ delete CastToSecureParam(self);
+}
+
+PyObject* SecureParamRepr(PyObject*)
+{
+ return PyRepr("<yql.TSecureParam>").Release();
+}
+
+PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+ Y_UNUSED(kwargs);
+
+ struct PyBufDeleter {
+ void operator() (Py_buffer* view) { PyBuffer_Release(view); }
+ };
+ Py_buffer input;
+ if (!PyArg_ParseTuple(args, "s*", &input)) {
+ return nullptr;
+ }
+ std::unique_ptr<Py_buffer, PyBufDeleter> bufPtr(&input);
+ auto valueBuilder = CastToSecureParam(self)->CastCtx->ValueBuilder;
+ NUdf::TStringRef key(static_cast<const char*>(input.buf), input.len);
+ PY_TRY {
+ if (!valueBuilder->GetSecureParam(key, key)) {
+ throw yexception() << "Cannot get secure parameter for key: " << key;
+ }
+ return PyRepr(TStringBuf(key.Data(), key.Size())).Release();
+ } PY_CATCH(nullptr)
+}
+
+static PyTypeObject PySecureParamType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TSecureParam"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPySecureParam)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , SecureParamDealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , SecureParamRepr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , SecureParamCall),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , 0),
+ INIT_MEMBER(tp_doc , "yql.TSecureParam object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , nullptr),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+TPyObjectPtr ToPySecureParam(const TPyCastContext::TPtr& castCtx)
+{
+ TPySecureParam* ret = new TPySecureParam(castCtx);
+ PyObject_INIT(ret, &PySecureParamType);
+ return reinterpret_cast<PyObject*>(ret);
+}
+
+
+void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value) {
+ if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) try {
+ castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get());
+ } catch (const yexception& e) {
+ throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what();
+ }
+
+ if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) try {
+ Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get());
+ if (itemsCount != 2) {
+ throw yexception() << "Expected tuple of 2 callables";
+ }
+
+ castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0));
+ castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1));
+ if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) {
+ throw yexception() << "Expected tuple of 2 callables";
+ }
+ } catch (const yexception& e) {
+ throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what();
+ }
+
+ if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) try {
+ PyObject* bytesValue = nullptr;
+ if (PyBytes_Check(bytesDecodeMode->Get())) {
+ bytesValue = PyObject_Bytes(bytesDecodeMode->Get());
+ } else if (PyUnicode_Check(bytesDecodeMode->Get())) {
+ bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get());
+ } else {
+ throw yexception() << "Expected bytes or unicode";
+ }
+ if (!bytesValue) {
+ PyErr_Clear();
+ throw yexception() << "Failed to convert to bytes";
+ }
+
+ TStringBuf view(PyBytes_AS_STRING(bytesValue));
+ if (view == "never") {
+ castCtx->BytesDecodeMode = EBytesDecodeMode::Never;
+ } else if (view == "strict") {
+ castCtx->BytesDecodeMode = EBytesDecodeMode::Strict;
+ } else {
+ Py_DECREF(bytesValue);
+ throw yexception() << "Expected values 'never' or 'strict'";
+ }
+ Py_DECREF(bytesValue);
+ } catch (const yexception& e) {
+ throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what();
+ }
+
+ if (PyObject_SetAttrString(value, "_yql_secure_param", ToPySecureParam(castCtx).Get()) != 0) {
+ throw yexception() << "Cannot set attribute '_yql_secure_param'";
+ }
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.h b/yql/essentials/udfs/common/python/bindings/py_callable.h
new file mode 100644
index 0000000000..4ce79e1d7f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_callable.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyCallableType;
+
+TPyObjectPtr ToPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyCallable(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp
new file mode 100644
index 0000000000..1c58d7b371
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp
@@ -0,0 +1,87 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyCallableTest) {
+ struct TTestCallable: public NUdf::TBoxedValue {
+ NUdf::TUnboxedValue Run(
+ const NUdf::IValueBuilder* valueBuilder,
+ const NUdf::TUnboxedValuePod* args) const override
+ {
+ Y_UNUSED(valueBuilder);
+ return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42);
+ }
+ };
+
+ Y_UNIT_TEST(FromPyFunction) {
+ TPythonTestEngine engine;
+ const NUdf::IValueBuilder* vb = &engine.GetValueBuilder();
+
+ engine.ToMiniKQL<char* (*)(char*, ui32)>(
+ "def Test():\n"
+ " def test(str, count):\n"
+ " return str * count\n"
+ " return test",
+ [vb](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ NUdf::TUnboxedValue args[2];
+ args[0] = vb->NewString("j");
+ args[1] = NUdf::TUnboxedValuePod((ui32) 5);
+ auto result = value.Run(vb, args);
+
+ UNIT_ASSERT(result);
+ UNIT_ASSERT(5 == result.AsStringRef().Size());
+ UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj");
+ });
+ }
+
+ Y_UNIT_TEST(ToPython) {
+ TPythonTestEngine engine;
+ engine.ToPython<i32 (*)(i32)>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new TTestCallable);
+ },
+ "def Test(value):\n"
+ " assert type(value).__name__ == 'TCallable'\n"
+ " assert value.__call__ != None\n"
+ " assert value(-2) == 40\n"
+ " assert value(-1) == 41\n"
+ " assert value(0) == 42\n"
+ " assert value(1) == 43\n"
+ " assert value(2) == 44\n");
+ }
+
+ Y_UNIT_TEST(ToPythonAndBack) {
+ struct TTestCallable: public NUdf::TBoxedValue {
+ NUdf::TUnboxedValue Run(
+ const NUdf::IValueBuilder* valueBuilder,
+ const NUdf::TUnboxedValuePod* args) const override
+ {
+ Y_UNUSED(valueBuilder);
+ return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42);
+ }
+ };
+
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<i32 (*)(i32)>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new TTestCallable);
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32) 5);
+ const auto result = value.Run(nullptr, &arg);
+
+ UNIT_ASSERT(result);
+ UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>());
+ });
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp
new file mode 100644
index 0000000000..3aa5537b21
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp
@@ -0,0 +1,955 @@
+#include "py_cast.h"
+#include "py_ptr.h"
+#include "py_errors.h"
+#include "py_callable.h"
+#include "py_dict.h"
+#include "py_list.h"
+#include "py_gil.h"
+#include "py_utils.h"
+#include "py_void.h"
+#include "py_resource.h"
+#include "py_stream.h"
+#include "py_struct.h"
+#include "py_tuple.h"
+#include "py_variant.h"
+#include "py_decimal.h"
+
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_type_printer.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+#include <yql/essentials/utils/utf8.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/string/join.h>
+#include <util/string/builder.h>
+
+#ifdef HAVE_LONG_LONG
+# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask
+# define YQL_PyLong_Asi64 PyLong_AsLongLong
+# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong
+#else
+# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask
+# define YQL_PyLong_Asi64 PyLong_AsLong
+# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong
+#endif
+
+#define TO_PYTHON(Format, Type) \
+ template <> \
+ ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \
+ return Py_BuildValue(Format, value); \
+ }
+
+#define TO_PYTHON_BYTES(Type) \
+ template <> \
+ ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \
+ TStringBuf value = val; \
+ if (value.data() == nullptr) \
+ Py_RETURN_NONE; \
+ const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
+ return PyBytes_FromStringAndSize(value.data(), size); \
+ }
+
+#define TO_PYTHON_UNICODE(Type) \
+ template <> \
+ ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \
+ TStringBuf value = val; \
+ if (value.data() == nullptr) \
+ Py_RETURN_NONE; \
+ Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
+ return PyUnicode_FromStringAndSize(value.data(), size); \
+ }
+
+#define PY_ENSURE_TYPE(Type, Value, Message) \
+ do { \
+ if (!Py##Type##_Check(Value)) { \
+ throw yexception() << Message << " " #Type "; Object repr: " \
+ << PyObjectRepr(Value); \
+ } \
+ } while (0)
+
+#define FROM_PYTHON_FLOAT(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ double result = PyFloat_AsDouble(value); \
+ if (result == -1.0 && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Float"); \
+ } \
+ return static_cast<Type>(result); \
+ }
+
+#define FROM_PYTHON_LONG(Type, BigType) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } \
+ ThrowCastTypeException(value, "Long"); \
+ }
+
+#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyInt_Check(value)) { \
+ long result = PyInt_AsLong(value); \
+ if (result == -1L && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if ( \
+ static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \
+ static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \
+ ) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } else if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } \
+ ThrowCastTypeException(value, "Long"); \
+ }
+
+#define FROM_PYTHON_BYTES_OR_UTF(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyUnicode_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
+ if (!str || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ } else if (PyBytes_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ char *str = nullptr; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ } \
+ ThrowCastTypeException(value, "String"); \
+ }
+
+#define FROM_PYTHON_BYTES(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ PY_ENSURE_TYPE(Bytes, value, "Expected"); \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ }
+
+#define TRY_FROM_PYTHON_FLOAT(Type) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& result) { \
+ double v = PyFloat_AsDouble(value); \
+ if (v == -1.0 && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ result = static_cast<Type>(v); \
+ return true; \
+ }
+
+#define TRY_FROM_PYTHON_LONG(Type, BigType) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& res) { \
+ if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& res) { \
+ if (PyInt_Check(value)) { \
+ long result = PyInt_AsLong(value); \
+ if (result == -1L && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \
+ return false; \
+ } \
+ return true; \
+ } else if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \
+ template <> \
+ bool TryPyCast(PyObject* value, Type& result) { \
+ if (PyUnicode_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
+ if (!str || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else if (PyBytes_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ char *str = nullptr; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \
+ template <> \
+ bool TryPyCast(PyObject* value, Type& result) { \
+ if (PyUnicode_Check(value)) { \
+ const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \
+ if (rc == -1 || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else if (PyBytes_Check(value)) { \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ int rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else { \
+ return false; \
+ } \
+ }
+
+namespace NPython {
+
+using namespace NKikimr;
+
+inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) {
+ throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType
+ << "; Object repr: " << PyObjectRepr(value);
+}
+
+inline void ThrowCastException(PyObject* value, TStringBuf toType) {
+ throw yexception() << "Cast error object " << PyObjectRepr(value) << " to " << toType << ": "
+ << GetLastErrorAsString();
+}
+
+
+template <>
+bool TryPyCast<bool>(PyObject* value, bool& result)
+{
+ int isTrue = PyObject_IsTrue(value);
+ if (isTrue == -1) {
+ return false;
+ }
+ result = (isTrue == 1);
+ return true;
+}
+
+#if PY_MAJOR_VERSION >= 3
+TRY_FROM_PYTHON_LONG(i8, i64)
+TRY_FROM_PYTHON_LONG(ui8, ui64)
+TRY_FROM_PYTHON_LONG(i16, i64)
+TRY_FROM_PYTHON_LONG(ui16, ui64)
+TRY_FROM_PYTHON_LONG(i32, i64)
+TRY_FROM_PYTHON_LONG(ui32, ui64)
+TRY_FROM_PYTHON_LONG(i64, i64)
+TRY_FROM_PYTHON_LONG(ui64, ui64)
+TRY_FROM_PYTHON_BYTES_OR_UTF(TString)
+TRY_FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
+#else
+TRY_FROM_PYTHON_INT_OR_LONG(i8, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui8, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i16, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui16, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i32, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui32, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i64, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui64, ui64)
+TRY_FROM_PYTHON_STR_OR_UTF(TString)
+TRY_FROM_PYTHON_STR_OR_UTF(NUdf::TStringRef)
+#endif
+
+TRY_FROM_PYTHON_FLOAT(float)
+TRY_FROM_PYTHON_FLOAT(double)
+
+template <>
+bool PyCast<bool>(PyObject* value)
+{
+ int res = PyObject_IsTrue(value);
+ if (res == -1) {
+ throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to bool. "
+ << GetLastErrorAsString();
+ }
+ return res == 1;
+}
+
+#if PY_MAJOR_VERSION >= 3
+FROM_PYTHON_LONG(i8, i64)
+FROM_PYTHON_LONG(ui8, ui64)
+FROM_PYTHON_LONG(i16, i64)
+FROM_PYTHON_LONG(ui16, ui64)
+FROM_PYTHON_LONG(i32, i64)
+FROM_PYTHON_LONG(ui32, ui64)
+FROM_PYTHON_LONG(i64, i64)
+FROM_PYTHON_LONG(ui64, ui64)
+FROM_PYTHON_BYTES_OR_UTF(TString)
+FROM_PYTHON_BYTES_OR_UTF(TStringBuf)
+FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
+#else
+FROM_PYTHON_INT_OR_LONG(i8, i64)
+FROM_PYTHON_INT_OR_LONG(ui8, ui64)
+FROM_PYTHON_INT_OR_LONG(i16, i64)
+FROM_PYTHON_INT_OR_LONG(ui16, ui64)
+FROM_PYTHON_INT_OR_LONG(i32, i64)
+FROM_PYTHON_INT_OR_LONG(ui32, ui64)
+FROM_PYTHON_INT_OR_LONG(i64, i64)
+FROM_PYTHON_INT_OR_LONG(ui64, ui64)
+FROM_PYTHON_BYTES(TString)
+FROM_PYTHON_BYTES(TStringBuf)
+FROM_PYTHON_BYTES(NUdf::TStringRef)
+#endif
+
+FROM_PYTHON_FLOAT(float)
+FROM_PYTHON_FLOAT(double)
+
+template <>
+TPyObjectPtr PyCast<bool>(bool value)
+{
+ PyObject* res = value ? Py_True : Py_False;
+ return TPyObjectPtr(res, TPyObjectPtr::ADD_REF);
+}
+
+TO_PYTHON("b", i8)
+TO_PYTHON("B", ui8)
+TO_PYTHON("h", i16)
+TO_PYTHON("H", ui16)
+TO_PYTHON("i", i32)
+TO_PYTHON("I", ui32)
+#ifdef HAVE_LONG_LONG
+TO_PYTHON("L", i64)
+TO_PYTHON("K", ui64)
+#else
+TO_PYTHON("l", i64)
+TO_PYTHON("k", ui64)
+#endif
+
+TO_PYTHON_BYTES(TString)
+TO_PYTHON_BYTES(TStringBuf)
+TO_PYTHON_BYTES(NUdf::TStringRef)
+TO_PYTHON_UNICODE(TString)
+TO_PYTHON_UNICODE(TStringBuf)
+TO_PYTHON_UNICODE(NUdf::TStringRef)
+
+template <typename T>
+NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) {
+ PY_ENSURE(PyTuple_Check(value),
+ "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name);
+
+ Py_ssize_t tupleSize = PyTuple_GET_SIZE(value);
+ PY_ENSURE(tupleSize == 2,
+ "Expected to get Tuple with 2 elements, but got "
+ << tupleSize << " elements");
+
+ PyObject* el0 = PyTuple_GET_ITEM(value, 0);
+ PyObject* el1 = PyTuple_GET_ITEM(value, 1);
+ auto num = PyCast<T>(el0);
+ if (num >= limit) {
+ throw yexception() << "Python object " << PyObjectRepr(el0) \
+ << " is out of range for " << typeName;
+ }
+
+ auto name = PyCast<NUdf::TStringRef>(el1);
+ auto ret = NUdf::TUnboxedValuePod(num);
+ ui32 tzId;
+ if (!ctx->ValueBuilder->GetDateBuilder().FindTimezoneId(name, tzId)) {
+ throw yexception() << "Unknown timezone: " << TStringBuf(name);
+ }
+
+ ret.SetTimezoneId(tzId);
+ return ret;
+}
+
+TO_PYTHON("f", float)
+TO_PYTHON("d", double)
+
+namespace {
+
+TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto typeId = inspector.GetTypeId();
+
+ switch (typeId) {
+ case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>());
+ case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>());
+ case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>());
+ case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>());
+ case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>());
+ case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>());
+ case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>());
+ case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>());
+ case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>());
+ case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>());
+ case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>());
+ case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
+ case NUdf::TDataType<const char*>::Id: {
+ if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) {
+ return PyCast<NUdf::TStringRef>(value.AsStringRef());
+ } else {
+ auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
+ if (!pyObj) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos <<
+ "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" <<
+ GetLastErrorAsString()).data()
+ );
+ }
+ return pyObj;
+ }
+ }
+ case NUdf::TDataType<NUdf::TYson>::Id: {
+ auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef());
+ if (ctx->YsonConverterIn) {
+ TPyObjectPtr pyArgs(PyTuple_New(1));
+ PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release());
+ pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get());
+ if (!pyObj) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+ }
+
+ return pyObj;
+ }
+ case NUdf::TDataType<NUdf::TUuid>::Id:
+ return PyCast<NUdf::TStringRef>(value.AsStringRef());
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ case NUdf::TDataType<NUdf::TUtf8>::Id:
+ return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
+ case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>());
+ case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>());
+ case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>());
+ case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>());
+ case NUdf::TDataType<NUdf::TTzDate>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ case NUdf::TDataType<NUdf::TTzDatetime>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ case NUdf::TDataType<NUdf::TTzTimestamp>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ }
+
+ throw yexception()
+ << "Unsupported type " << typeId;
+}
+
+NUdf::TUnboxedValue FromPyData(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto typeId = inspector.GetTypeId();
+
+ switch (typeId) {
+ case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value));
+ case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value));
+ case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value));
+ case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value));
+ case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value));
+ case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value));
+ case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value));
+ case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value));
+ case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value));
+ case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value));
+ case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value));
+ case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
+ case NUdf::TDataType<NUdf::TYson>::Id: {
+ if (ctx->YsonConverterOut) {
+ TPyObjectPtr input(value, TPyObjectPtr::ADD_REF);
+ TPyObjectPtr pyArgs(PyTuple_New(1));
+ // PyTuple_SET_ITEM steals reference, so pass ownership to it
+ PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release());
+ input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get()));
+ if (!input) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get()));
+ }
+ }
+#if PY_MAJOR_VERSION >= 3
+ case NUdf::TDataType<const char*>::Id:
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ case NUdf::TDataType<NUdf::TUtf8>::Id:
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ if (PyUnicode_Check(value)) {
+ const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value));
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get()));
+ }
+ throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode";
+#else
+ case NUdf::TDataType<const char*>::Id:
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ case NUdf::TDataType<NUdf::TUtf8>::Id: {
+ if (PyUnicode_Check(value)) {
+ const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value));
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get()));
+ }
+
+ if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) &&
+ PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) {
+ throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json";
+ }
+
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ }
+#endif
+ case NUdf::TDataType<NUdf::TUuid>::Id: {
+ const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ if (ret.AsStringRef().Size() != 16) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " has invalid value for Uuid";
+ }
+
+ return ret;
+ }
+ case NUdf::TDataType<NUdf::TDate>::Id: {
+ auto num = PyCast<ui16>(value);
+ if (num >= NUdf::MAX_DATE) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Date";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TDatetime>::Id: {
+ auto num = PyCast<ui32>(value);
+ if (num >= NUdf::MAX_DATETIME) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Datetime";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TTimestamp>::Id: {
+ auto num = PyCast<ui64>(value);
+ if (num >= NUdf::MAX_TIMESTAMP) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Timestamp";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TInterval>::Id: {
+ auto num = PyCast<i64>(value);
+ if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Interval";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TTzDate>::Id:
+ return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx);
+ case NUdf::TDataType<NUdf::TTzDatetime>::Id:
+ return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx);
+ case NUdf::TDataType<NUdf::TTzTimestamp>::Id:
+ return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx);
+ }
+
+ throw yexception()
+ << "Unsupported type " << typeId;
+}
+
+TPyObjectPtr ToPyList(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto itemType = inspector.GetItemType();
+
+ if (ctx->LazyInputObjects) {
+ return ToPyLazyList(ctx, itemType, value);
+ }
+
+ TPyObjectPtr list(PyList_New(0));
+ const auto iterator = value.GetListIterator();
+ for (NUdf::TUnboxedValue item; iterator.Next(item);) {
+ auto pyItem = ToPyObject(ctx, itemType, item);
+ if (PyList_Append(list.Get(), pyItem.Get()) < 0) {
+ throw yexception() << "Can't append item to list"
+ << GetLastErrorAsString();
+ }
+ }
+
+ return list;
+}
+
+NUdf::TUnboxedValue FromPyList(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+
+ if (PyList_Check(value)) {
+ // eager list to list conversion
+ auto itemType = inspector.GetItemType();
+ Py_ssize_t cnt = PyList_GET_SIZE(value);
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto list = ctx->ValueBuilder->NewArray(cnt, items);
+ for (Py_ssize_t i = 0; i < cnt; ++i) {
+ PyObject *item = PyList_GET_ITEM(value, i);
+ *items++ = FromPyObject(ctx, itemType, item);
+ }
+ return list;
+ }
+
+ if (PyTuple_Check(value)) {
+ // eager tuple to list conversion
+ auto itemType = inspector.GetItemType();
+ Py_ssize_t cnt = PyTuple_GET_SIZE(value);
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto list = ctx->ValueBuilder->NewArray(cnt, items);
+ for (Py_ssize_t i = 0; i < cnt; ++i) {
+ PyObject *item = PyTuple_GET_ITEM(value, i);
+ *items++ = FromPyObject(ctx, itemType, item);
+ }
+ return list;
+ }
+
+ if (PyGen_Check(value)) {
+ TPyObjectPtr valuePtr(PyObject_GetIter(value));
+ return FromPyLazyIterator(ctx, type, std::move(valuePtr));
+ }
+
+ if (PyIter_Check(value)
+#if PY_MAJOR_VERSION < 3
+ // python 2 iterators must also implement "next" method
+ && 1 == PyObject_HasAttrString(value, "next")
+#endif
+ ) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyIterator(ctx, type, std::move(valuePtr));
+ }
+
+ // assume that this function will returns generator
+ if (PyCallable_Check(value)) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyGenerator(ctx, type, std::move(valuePtr));
+ }
+
+ if (PySequence_Check(value) || PyObject_HasAttrString(value, "__iter__")) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyIterable(ctx, type, std::move(valuePtr));
+ }
+
+ throw yexception() << "Expected list, tuple, generator, generator factory, "
+ "iterator or iterable object, but got: " << PyObjectRepr(value);
+}
+
+TPyObjectPtr ToPyOptional(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ if (!value) {
+ return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF);
+ }
+
+ const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ return ToPyObject(ctx, inspector.GetItemType(), value);
+}
+
+NUdf::TUnboxedValue FromPyOptional(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ if (value == Py_None) {
+ return NUdf::TUnboxedValue();
+ }
+
+ const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ return FromPyObject(ctx, inspector.GetItemType(), value).Release().MakeOptional();
+}
+
+TPyObjectPtr ToPyDict(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto keyType = inspector.GetKeyType();
+ const auto valueType = inspector.GetValueType();
+
+ if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
+ if (ctx->LazyInputObjects) { // TODO
+ return ToPyLazySet(ctx, keyType, value);
+ }
+
+ const TPyObjectPtr set(PyFrozenSet_New(nullptr));
+ const auto iterator = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; iterator.Next(key);) {
+ auto pyKey = ToPyObject(ctx, keyType, key);
+ if (PySet_Add(set.Get(), pyKey.Get()) < 0) {
+ throw yexception() << "Can't add item to set" << GetLastErrorAsString();
+ }
+ }
+
+ return set;
+ } else {
+ if (ctx->LazyInputObjects) {
+ return ToPyLazyDict(ctx, keyType, valueType, value);
+ }
+
+ const TPyObjectPtr dict(PyDict_New());
+ const auto iterator = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, valueObj; iterator.NextPair(key, valueObj);) {
+ auto pyKey = ToPyObject(ctx, keyType, key);
+ auto pyValue = ToPyObject(ctx, valueType, valueObj);
+ if (PyDict_SetItem(dict.Get(), pyKey.Get(), pyValue.Get()) < 0) {
+ throw yexception() << "Can't add item to dict" << GetLastErrorAsString();
+ }
+ }
+
+ return dict;
+ }
+}
+
+NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto keyType = inspector.GetKeyType();
+ const auto valueType = inspector.GetValueType();
+
+ if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType)
+ && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) {
+ const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType);
+ if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) {
+ return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value);
+ }
+ } else if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
+ if (PyAnySet_Check(value)) {
+ return FromPySet(ctx, keyType, value);
+ } else if (value->ob_type->tp_as_sequence && value->ob_type->tp_as_sequence->sq_contains) {
+ return FromPySequence(ctx, keyType, value);
+ }
+ } else if (PyDict_Check(value)) {
+ return FromPyDict(ctx, keyType, valueType, value);
+ } else if (PyMapping_Check(value)) {
+ return FromPyMapping(ctx, keyType, valueType, value);
+ }
+
+ throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict.";
+}
+
+} // namespace
+
+TPyObjectPtr ToPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
+ case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value);
+ case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value);
+ case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value);
+ case NUdf::ETypeKind::List: return ToPyList(ctx, type, value);
+ case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value);
+ case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value);
+ case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value);
+ case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value);
+ case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value);
+ case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value);
+ case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value);
+ default: {
+ ::TStringBuilder sb;
+ sb << "Failed to export: ";
+ NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
+ throw yexception() << sb;
+ }
+ }
+}
+
+NUdf::TUnboxedValue FromPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
+ case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value);
+ case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value);
+ case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value);
+ case NUdf::ETypeKind::List: return FromPyList(ctx, type, value);
+ case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value);
+ case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value);
+ case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value);
+ case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value);
+ case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value);
+ case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr);
+ case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value);
+ default: {
+ ::TStringBuilder sb;
+ sb << "Failed to import: ";
+ NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
+ throw yexception() << sb;
+ }
+ }
+}
+
+TPyObjectPtr ToPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod* args,
+ const NUdf::TCallableTypeInspector& inspector)
+{
+ const auto argsCount = inspector.GetArgsCount();
+ TPyObjectPtr tuple(PyTuple_New(argsCount));
+
+ for (ui32 i = 0; i < argsCount; i++) {
+ auto arg = ToPyObject(ctx, inspector.GetArgType(i), args[i]);
+ PyTuple_SET_ITEM(tuple.Get(), i, arg.Release());
+ }
+
+ return tuple;
+}
+
+void FromPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ PyObject* pyArgs,
+ NUdf::TUnboxedValue* cArgs,
+ const NUdf::TCallableTypeInspector& inspector)
+{
+ PY_ENSURE_TYPE(Tuple, pyArgs, "Expected");
+
+ const auto argsCount = inspector.GetArgsCount();
+ const auto optArgsCount = inspector.GetOptionalArgsCount();
+
+ ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs));
+ PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount,
+ "arguments count missmatch: "
+ "min " << (argsCount - optArgsCount) << ", max " << argsCount
+ << ", got " << pyArgsCount);
+
+ for (ui32 i = 0; i < pyArgsCount; i++) {
+ PyObject* item = PyTuple_GET_ITEM(pyArgs, i);
+ cArgs[i] = FromPyObject(ctx, inspector.GetArgType(i), item);
+ }
+
+ for (ui32 i = pyArgsCount; i < argsCount; i++) {
+ cArgs[i] = NUdf::TUnboxedValuePod();
+ }
+}
+
+class TDummyMemoryLock : public IMemoryLock {
+public:
+ void Acquire() override {}
+ void Release() override {}
+};
+
+TPyCastContext::TPyCastContext(
+ const NKikimr::NUdf::IValueBuilder* builder,
+ TPyContext::TPtr pyCtx,
+ THolder<IMemoryLock> memoryLock)
+ : ValueBuilder(builder)
+ , PyCtx(std::move(pyCtx))
+ , MemoryLock(std::move(memoryLock))
+{
+ if (!MemoryLock) {
+ MemoryLock = MakeHolder<TDummyMemoryLock>();
+ }
+}
+
+TPyCastContext::~TPyCastContext() {
+ TPyGilLocker locker;
+ StructTypes.clear();
+ YsonConverterIn.Reset();
+ YsonConverterOut.Reset();
+ TimezoneNames.clear();
+}
+
+const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) {
+ auto& x = TimezoneNames[id];
+ if (!x) {
+ NKikimr::NUdf::TStringRef ref;
+ if (!ValueBuilder->GetDateBuilder().FindTimezoneName(id, ref)) {
+ throw yexception() << "Unknown timezone id: " << id;
+ }
+
+ x = PyRepr(ref);
+ }
+
+ return x;
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.h b/yql/essentials/udfs/common/python/bindings/py_cast.h
new file mode 100644
index 0000000000..e6850c7404
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+#include <util/generic/typetraits.h>
+
+namespace NPython {
+
+template <typename T>
+TPyObjectPtr PyCast(typename TTypeTraits<T>::TFuncParam value);
+
+template <typename T>
+T PyCast(PyObject* value);
+
+template <typename T>
+bool TryPyCast(PyObject* value, T& result);
+
+template <typename T>
+TPyObjectPtr ToPyUnicode(const T& value);
+
+TPyObjectPtr ToPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+TPyObjectPtr ToPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod* args,
+ const NKikimr::NUdf::TCallableTypeInspector& inspector);
+
+void FromPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* pyArgs,
+ NKikimr::NUdf::TUnboxedValue* cArgs,
+ const NKikimr::NUdf::TCallableTypeInspector& inspector);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp
new file mode 100644
index 0000000000..47f65ab6fa
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp
@@ -0,0 +1,90 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyCastTest) {
+ Y_UNIT_TEST(FromPyStrToInt) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<i32>(
+ "def Test():\n"
+ " return '123a'",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "str");
+ }
+
+ Y_UNIT_TEST(FromPyTupleToLong) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<ui64>(
+ "def Test():\n"
+ " return 1, 1",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "tuple");
+ }
+
+ Y_UNIT_TEST(FromPyFuncToString) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<char*>(
+ "def f():\n"
+ " return 42\n"
+ "def Test():\n"
+ " return f",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "function");
+ }
+
+ Y_UNIT_TEST(FromPyNoneToString) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<char*>(
+ "def Test():\n"
+ " return None",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ }),
+ yexception, "None");
+ }
+
+ Y_UNIT_TEST(BadFromPythonFloat) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<float>(
+ "def Test():\n"
+ " return '3 <dot> 1415926'",
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ Y_UNREACHABLE();
+ }),
+ yexception, "Cast error object '3 <dot> 1415926' to Float");
+ }
+
+#if PY_MAJOR_VERSION >= 3
+# define RETVAL "-1"
+#else
+# define RETVAL "-18446744073709551616L"
+#endif
+
+ Y_UNIT_TEST(BadFromPythonLong) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ engine.ToMiniKQL<ui64>(
+ "def Test():\n"
+ " return " RETVAL,
+ [](const NUdf::TUnboxedValuePod& value) {
+ Y_UNUSED(value);
+ Y_UNREACHABLE();
+ }),
+ yexception, "Cast error object " RETVAL " to Long");
+ }
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_ctx.h b/yql/essentials/udfs/common/python/bindings/py_ctx.h
new file mode 100644
index 0000000000..9e86042908
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_ctx.h
@@ -0,0 +1,120 @@
+#pragma once
+
+#include "py_ptr.h"
+
+#include <yql/essentials/public/udf/udf_types.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_string.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/intrlist.h>
+
+#include <unordered_map>
+
+namespace NPython {
+
+enum class EBytesDecodeMode {
+ Never,
+ Strict,
+};
+
+class IMemoryLock {
+public:
+ virtual ~IMemoryLock() = default;
+ virtual void Acquire() = 0;
+ virtual void Release() = 0;
+};
+
+struct TPyCleanupListItemBase: public TIntrusiveListItem<TPyCleanupListItemBase> {
+ virtual ~TPyCleanupListItemBase() = default;
+ virtual void Cleanup() = 0;
+};
+
+template <typename TValueType>
+class TPyCleanupListItem: public TPyCleanupListItemBase {
+public:
+ TPyCleanupListItem() = default;
+ virtual ~TPyCleanupListItem() {
+ Unlink();
+ }
+
+ void Cleanup() override {
+ Value = {};
+ }
+
+ template <typename TCtx>
+ void Set(const TIntrusivePtr<TCtx>& ctx, TValueType val) {
+ Value = std::move(val);
+ ctx->CleanupList.PushBack(this);
+ }
+
+ bool IsSet() const {
+ return !!Value;
+ }
+
+ const TValueType& Get() const {
+ if (!Value) {
+ throw yexception() << "Trying to use python wrap object with destroyed yql value";
+ }
+ return Value;
+ }
+
+private:
+ TValueType Value;
+};
+
+struct TPyContext: public TSimpleRefCount<TPyContext> {
+ const NKikimr::NUdf::ITypeInfoHelper::TPtr TypeInfoHelper;
+ const NKikimr::NUdf::TStringRef ResourceTag;
+ const NKikimr::NUdf::TSourcePosition Pos;
+ TIntrusiveList<TPyCleanupListItemBase> CleanupList;
+
+ TPyContext(NKikimr::NUdf::ITypeInfoHelper::TPtr helper, const NKikimr::NUdf::TStringRef& tag, const NKikimr::NUdf::TSourcePosition& pos)
+ : TypeInfoHelper(std::move(helper))
+ , ResourceTag(tag)
+ , Pos(pos)
+ {
+ }
+
+ void Cleanup() {
+ for (auto& o: CleanupList) {
+ o.Cleanup();
+ }
+ CleanupList.Clear();
+ }
+
+ ~TPyContext() = default;
+
+ using TPtr = TIntrusivePtr<TPyContext>;
+};
+
+struct TPyCastContext: public TSimpleRefCount<TPyCastContext> {
+ const NKikimr::NUdf::IValueBuilder *const ValueBuilder;
+ const TPyContext::TPtr PyCtx;
+ std::unordered_map<const NKikimr::NUdf::TType*, TPyObjectPtr> StructTypes;
+ bool LazyInputObjects = true;
+ TPyObjectPtr YsonConverterIn;
+ TPyObjectPtr YsonConverterOut;
+ EBytesDecodeMode BytesDecodeMode = EBytesDecodeMode::Never;
+ TPyObjectPtr Decimal;
+ std::unordered_map<ui32, TPyObjectPtr> TimezoneNames;
+ THolder<IMemoryLock> MemoryLock;
+
+ TPyCastContext(
+ const NKikimr::NUdf::IValueBuilder* builder,
+ TPyContext::TPtr pyCtx,
+ THolder<IMemoryLock> memoryLock = {});
+
+ ~TPyCastContext();
+
+ const TPyObjectPtr& GetTimezoneName(ui32 id);
+ const TPyObjectPtr& GetDecimal();
+
+ using TPtr = TIntrusivePtr<TPyCastContext>;
+};
+
+using TPyCastContextPtr = TPyCastContext::TPtr;
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp
new file mode 100644
index 0000000000..0070e3420f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp
@@ -0,0 +1,59 @@
+#include "py_decimal.h"
+#include "py_errors.h"
+#include "py_utils.h"
+#include "py_cast.h"
+
+#include <util/stream/str.h>
+
+#include <yql/essentials/public/udf/udf_value.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale)
+{
+ const auto str = NYql::NDecimal::ToString(value.GetInt128(), precision, scale);
+ PY_ENSURE(str, "Bad decimal value.");
+
+ const TPyObjectPtr pyStr(PyRepr(str));
+
+ const TPyObjectPtr args(PyTuple_Pack(1, pyStr.Get()));
+ PY_ENSURE(args, "Can't pack args.");
+
+ const TPyObjectPtr dec(PyObject_CallObject(ctx->GetDecimal().Get(), args.Get()));
+ PY_ENSURE(dec, "Can't create Decimal.");
+ return dec;
+}
+
+NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& ctx, PyObject* value, ui8 precision, ui8 scale)
+{
+ const TPyObjectPtr print(PyObject_Str(value));
+ PY_ENSURE(print, "Can't print decimal.");
+
+ TString str;
+ PY_ENSURE(TryPyCast<TString>(print.Get(), str), "Can't get decimal string.");
+
+ if (str.EndsWith("Infinity")) {
+ str.resize(str.size() - 5U);
+ }
+
+ const auto dec = NYql::NDecimal::FromStringEx(str.c_str(), precision, scale);
+ PY_ENSURE(!NYql::NDecimal::IsError(dec), "Can't make Decimal from string.");
+
+ return NKikimr::NUdf::TUnboxedValuePod(dec);
+}
+
+const TPyObjectPtr& TPyCastContext::GetDecimal() {
+ if (!Decimal) {
+ const TPyObjectPtr module(PyImport_ImportModule("decimal"));
+ PY_ENSURE(module, "Can't import decimal.");
+
+ Decimal.ResetSteal(PyObject_GetAttrString(module.Get(), "Decimal"));
+ PY_ENSURE(Decimal, "Can't get Decimal.");
+ }
+
+ return Decimal;
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.h b/yql/essentials/udfs/common/python/bindings/py_decimal.h
new file mode 100644
index 0000000000..5764fe4fa8
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_decimal.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& castCtx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale);
+
+NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& castCtx, PyObject* value, ui8 precision, ui8 scale);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp
new file mode 100644
index 0000000000..8388c110f3
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp
@@ -0,0 +1,122 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyDecimalTest) {
+ Y_UNIT_TEST(FromPyZero) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<12,5>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal()
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(!value.GetInt128());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyPi) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<28,18>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('3.141592653589793238')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTini) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<35,35>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('-.00000000000000000000000000000000001')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == -1);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyNan) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('NaN')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyInf) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>(
+ R"(
+from decimal import Decimal
+def Test(): return Decimal('-inf')
+ )",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf());
+ });
+ }
+
+ Y_UNIT_TEST(ToPyZero) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<7,7>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod::Zero();
+ },
+ "def Test(value): assert value.is_zero()"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyPi) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<20,18>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL));
+ },
+ "def Test(value): assert str(value) == '3.141592653589793238'"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyTini) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<35,35>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1));
+ },
+ "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyNan) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<2,2>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan());
+ },
+ "def Test(value): assert value.is_nan()"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyInf) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDecimalDataType<30,0>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf());
+ },
+ "def Test(value): assert value.is_infinite() and value.is_signed()"
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_dict.cpp
new file mode 100644
index 0000000000..f2bd0669ed
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_dict.cpp
@@ -0,0 +1,683 @@
+#include "py_dict.h"
+#include "py_iterator.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+
+
+using namespace NKikimr;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyDict interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazyDict
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyLazyDict, TPyPtrOps<TPyLazyDict>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* KeyType;
+ const NUdf::TType* PayloadType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+
+ inline static TPyLazyDict* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazyDict*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ NUdf::IBoxedValuePtr&& value);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static PyObject* Subscript(PyObject* self, PyObject* key);
+ static int Contains(PyObject* self, PyObject* key);
+ static PyObject* Get(PyObject* self, PyObject* args);
+
+ static PyObject* Iter(PyObject* self) { return Keys(self, nullptr); }
+ static PyObject* Keys(PyObject* self, PyObject* /* args */);
+ static PyObject* Items(PyObject* self, PyObject* /* args */);
+ static PyObject* Values(PyObject* self, PyObject* /* args */);
+};
+
+PyMappingMethods LazyDictMapping = {
+ INIT_MEMBER(mp_length, TPyLazyDict::Len),
+ INIT_MEMBER(mp_subscript, TPyLazyDict::Subscript),
+ INIT_MEMBER(mp_ass_subscript, nullptr),
+};
+
+PySequenceMethods LazyDictSequence = {
+ INIT_MEMBER(sq_length , TPyLazyDict::Len),
+ INIT_MEMBER(sq_concat , nullptr),
+ INIT_MEMBER(sq_repeat , nullptr),
+ INIT_MEMBER(sq_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_slice , nullptr),
+#else
+ INIT_MEMBER(sq_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_ass_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_ass_slice , nullptr),
+#else
+ INIT_MEMBER(sq_ass_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_contains , TPyLazyDict::Contains),
+ INIT_MEMBER(sq_inplace_concat , nullptr),
+ INIT_MEMBER(sq_inplace_repeat , nullptr),
+};
+
+PyNumberMethods LazyDictNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyLazyDict::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#define Py_TPFLAGS_HAVE_SEQUENCE_IN 0
+#endif
+
+PyDoc_STRVAR(get__doc__,
+ "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.");
+PyDoc_STRVAR(keys__doc__,
+ "D.keys() -> an iterator over the keys of D");
+PyDoc_STRVAR(values__doc__,
+ "D.values() -> an iterator over the values of D");
+PyDoc_STRVAR(items__doc__,
+ "D.items() -> an iterator over the (key, value) items of D");
+#if PY_MAJOR_VERSION < 3
+PyDoc_STRVAR(iterkeys__doc__,
+ "D.iterkeys() -> an iterator over the keys of D");
+PyDoc_STRVAR(itervalues__doc__,
+ "D.itervalues() -> an iterator over the values of D");
+PyDoc_STRVAR(iteritems__doc__,
+ "D.iteritems() -> an iterator over the (key, value) items of D");
+#endif
+
+static PyMethodDef LazyDictMethods[] = {
+ { "get", TPyLazyDict::Get, METH_VARARGS, get__doc__ },
+ { "keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__ },
+ { "items", TPyLazyDict::Items, METH_NOARGS, items__doc__ },
+ { "values", TPyLazyDict::Values, METH_NOARGS, values__doc__ },
+#if PY_MAJOR_VERSION < 3
+ { "iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__ },
+ { "iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__ },
+ { "itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__ },
+#endif
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+PyTypeObject PyLazyDictType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TDict"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazyDict)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazyDict::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazyDict::Repr),
+ INIT_MEMBER(tp_as_number , &LazyDictNumbering),
+ INIT_MEMBER(tp_as_sequence , &LazyDictSequence),
+ INIT_MEMBER(tp_as_mapping , &LazyDictMapping),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN),
+ INIT_MEMBER(tp_doc , "yql.TDict object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , &TPyLazyDict::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , LazyDictMethods),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazySet interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazySet
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyLazySet, TPyPtrOps<TPyLazySet>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+
+ inline static TPyLazySet* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazySet*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr&& value);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static int Contains(PyObject* self, PyObject* key);
+ static PyObject* Get(PyObject* self, PyObject* args);
+
+ static PyObject* Iter(PyObject* self);
+};
+
+PySequenceMethods LazySetSequence = {
+ INIT_MEMBER(sq_length , TPyLazySet::Len),
+ INIT_MEMBER(sq_concat , nullptr),
+ INIT_MEMBER(sq_repeat , nullptr),
+ INIT_MEMBER(sq_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_slice , nullptr),
+#else
+ INIT_MEMBER(sq_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_ass_item , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(was_sq_ass_slice , nullptr),
+#else
+ INIT_MEMBER(sq_ass_slice , nullptr),
+#endif
+ INIT_MEMBER(sq_contains , TPyLazySet::Contains),
+ INIT_MEMBER(sq_inplace_concat , nullptr),
+ INIT_MEMBER(sq_inplace_repeat , nullptr),
+};
+
+PyNumberMethods LazySetNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyLazySet::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyLazySet::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+PyTypeObject PyLazySetType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TSet"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazySet)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazySet::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazySet::Repr),
+ INIT_MEMBER(tp_as_number , &LazySetNumbering),
+ INIT_MEMBER(tp_as_sequence , &LazySetSequence),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN),
+ INIT_MEMBER(tp_doc , "yql.TSet object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , &TPyLazySet::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyDict implementation
+//////////////////////////////////////////////////////////////////////////////
+int TPyLazyDict::Bool(PyObject* self)
+{
+ PY_TRY {
+ return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyDict::Repr(PyObject*)
+{
+ return PyRepr("<yql.TDict>").Release();
+}
+
+Py_ssize_t TPyLazyDict::Len(PyObject* self)
+{
+ PY_TRY {
+ return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get()));
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key)
+{
+ PY_TRY {
+ TPyLazyDict* dict = Cast(self);
+
+ if (dict->KeyType) {
+ const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key);
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+
+ const TPyObjectPtr repr = PyObject_Repr(key);
+ PyErr_SetObject(PyExc_KeyError, repr.Get());
+ return nullptr;
+ } else {
+ if (!PyIndex_Check(key)) {
+ const TPyObjectPtr type = PyObject_Type(key);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ }
+
+ const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (index < 0) {
+ return nullptr;
+ }
+
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+
+ const TPyObjectPtr repr = PyObject_Repr(key);
+ PyErr_SetObject(PyExc_IndexError, repr.Get());
+ return nullptr;
+ }
+
+ } PY_CATCH(nullptr)
+}
+
+// -1 error
+// 0 not found
+// 1 found
+int TPyLazyDict::Contains(PyObject* self, PyObject* key)
+{
+ PY_TRY {
+ TPyLazyDict* dict = Cast(self);
+ NUdf::TUnboxedValue mkqlKey;
+
+ if (dict->KeyType) {
+ mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key);
+ } else {
+ if (!PyIndex_Check(key)) {
+ const TPyObjectPtr type = PyObject_Type(key);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return -1;
+ }
+
+ const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (index < 0) {
+ return 0;
+ }
+ mkqlKey = NUdf::TUnboxedValuePod(ui64(index));
+ }
+
+ return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyDict::Get(PyObject* self, PyObject* args)
+{
+ PY_TRY {
+ PyObject* key = nullptr;
+ PyObject* failobj = Py_None;
+
+ if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
+ return nullptr;
+
+ TPyLazyDict* dict = Cast(self);
+ if (dict->KeyType) {
+ const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key);
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+ } else {
+ if (!PyIndex_Check(key)) {
+ const TPyObjectPtr type = PyObject_Type(key);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ }
+
+ const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (index < 0) {
+ return nullptr;
+ }
+
+ if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) {
+ return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release();
+ }
+ }
+
+ Py_INCREF(failobj);
+ return failobj;
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::Keys(PyObject* self, PyObject* /* args */)
+{
+ PY_TRY {
+ const auto dict = Cast(self);
+ return ToPyIterator(dict->CastCtx, dict->KeyType,
+ NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::Items(PyObject* self, PyObject* /* args */)
+{
+ PY_TRY {
+ const auto dict = Cast(self);
+ return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType,
+ NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::Values(PyObject* self, PyObject* /* args */)
+{
+ PY_TRY {
+ const auto dict = Cast(self);
+ return ToPyIterator(dict->CastCtx, dict->PayloadType,
+ NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyDict::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ NUdf::IBoxedValuePtr&& value)
+{
+ TPyLazyDict* dict = new TPyLazyDict;
+ PyObject_INIT(dict, &PyLazyDictType);
+
+ dict->CastCtx = castCtx;
+ dict->KeyType = keyType;
+ dict->PayloadType = payloadType;
+ dict->Value.Set(castCtx->PyCtx, value);
+ return reinterpret_cast<PyObject*>(dict);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazySet implementation
+//////////////////////////////////////////////////////////////////////////////
+int TPyLazySet::Bool(PyObject* self)
+{
+ PY_TRY {
+ return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazySet::Repr(PyObject*)
+{
+ return PyRepr("<yql.TSet>").Release();
+}
+
+Py_ssize_t TPyLazySet::Len(PyObject* self)
+{
+ PY_TRY {
+ return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get()));
+ } PY_CATCH(-1)
+}
+
+// -1 error
+// 0 not found
+// 1 found
+int TPyLazySet::Contains(PyObject* self, PyObject* key)
+{
+ PY_TRY {
+ const auto set = Cast(self);
+ const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key);
+ return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazySet::Iter(PyObject* self)
+{
+ PY_TRY {
+ const auto set = Cast(self);
+ return ToPyIterator(set->CastCtx, set->ItemType,
+ NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazySet::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr&& value)
+{
+ TPyLazySet* dict = new TPyLazySet;
+ PyObject_INIT(dict, &PyLazySetType);
+
+ dict->CastCtx = castCtx;
+ dict->ItemType = itemType;
+ dict->Value.Set(castCtx->PyCtx, value);
+ return reinterpret_cast<PyObject*>(dict);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+TPyObjectPtr ToPyLazyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyLazyDict::New(castCtx, keyType, payloadType, value.AsBoxed());
+}
+
+TPyObjectPtr ToPyLazySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyLazySet::New(castCtx, itemType, value.AsBoxed());
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.h b/yql/essentials/udfs/common/python/bindings/py_dict.h
new file mode 100644
index 0000000000..538ca69a12
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_dict.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyLazyDictType;
+extern PyTypeObject PyLazySetType;
+
+TPyObjectPtr ToPyLazyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payloadType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+TPyObjectPtr ToPyLazySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyMapping(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payType,
+ PyObject* map);
+
+NKikimr::NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payType,
+ PyObject* dict);
+
+NKikimr::NUdf::TUnboxedValue FromPySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ PyObject* set);
+
+NKikimr::NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ PyObject* sequence);
+
+NKikimr::NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TDataTypeId keyType,
+ PyObject* sequence);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp
new file mode 100644
index 0000000000..9ac9627ebb
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp
@@ -0,0 +1,722 @@
+#include "ut3/py_test_engine.h"
+
+#include <yql/essentials/public/udf/udf_ut_helpers.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyDictTest) {
+ Y_UNIT_TEST(FromPyEmptyDict) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 0);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Length) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT(!value.IsSortedDict());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Lookup) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1)));
+ UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one");
+ const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2)));
+ UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two");
+ const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3)));
+ UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three");
+
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0))));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Contains) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Items) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::map<ui32, TString> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace(key.Get<ui32>(), payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[1], "one");
+ UNIT_ASSERT_EQUAL(items[2], "two");
+ UNIT_ASSERT_EQUAL(items[3], "three");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Keys) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<ui32> items;
+ const auto it = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; it.Next(key);) {
+ items.emplace_back(key.Get<ui32>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+
+ std::sort(items.begin(), items.end());
+ UNIT_ASSERT_EQUAL(items[0], 1U);
+ UNIT_ASSERT_EQUAL(items[1], 2U);
+ UNIT_ASSERT_EQUAL(items[2], 3U);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict_Values) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return {1: 'one', 3: 'three', 2: 'two'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<TString> items;
+ const auto it = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; it.Next(payload);) {
+ items.emplace_back(payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+
+ std::sort(items.begin(), items.end());
+ UNIT_ASSERT_EQUAL(items[0], "one");
+ UNIT_ASSERT_EQUAL(items[1], "three");
+ UNIT_ASSERT_EQUAL(items[2], "two");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList_Length) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "def Test(): return ['one', 'two', 'three']",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT(value.IsSortedDict());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple_Lookup) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<i32, char*>>(
+ "def Test(): return ('one', 'two', 'three')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0)));
+ UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one");
+ const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1)));
+ UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two");
+ const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2)));
+ UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three");
+ const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1)));
+ UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three");
+ const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2)));
+ UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two");
+ const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3)));
+ UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one");
+
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3))));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList_Contains) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<i16, char*>>(
+ "def Test(): return ['one', 'two', 'three']",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2))));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4))));
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple_Items) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui16, char*>>(
+ "def Test(): return ('one', 'two', 'three')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<std::pair<ui16, TString>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<ui16>(), payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3U);
+ UNIT_ASSERT_EQUAL(items[0].first, 0);
+ UNIT_ASSERT_EQUAL(items[1].first, 1);
+ UNIT_ASSERT_EQUAL(items[2].first, 2);
+ UNIT_ASSERT_EQUAL(items[0].second, "one");
+ UNIT_ASSERT_EQUAL(items[1].second, "two");
+ UNIT_ASSERT_EQUAL(items[2].second, "three");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList_Keys) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<i64, char*>>(
+ "def Test(): return ['one', 'two', 'three']",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<i64> items;
+ const auto it = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; it.Next(key);) {
+ items.emplace_back(key.Get<i64>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0], 0);
+ UNIT_ASSERT_EQUAL(items[1], 1);
+ UNIT_ASSERT_EQUAL(items[2], 2);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple_Values) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui64, char*>>(
+ "def Test(): return ('one', 'two', 'three')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ std::vector<TString> items;
+ const auto it = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; it.Next(payload);) {
+ items.emplace_back(payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0], "one");
+ UNIT_ASSERT_EQUAL(items[1], "two");
+ UNIT_ASSERT_EQUAL(items[2], "three");
+ });
+ }
+
+ Y_UNIT_TEST(ToPyEmptyDict) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDict<ui8, ui32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build();
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ " assert len(value) == 0\n"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyDict) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDict<int, double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value):\n"
+ " assert value\n"
+ " assert len(value) == 3\n"
+ " assert iter(value) is not None\n"
+ " assert 2 in value\n"
+ " assert 0 not in value\n"
+ " assert set(iter(value)) == set([1, 2, 3])\n"
+ " assert value[2] == 0.2\n"
+ " assert value.get(0, 0.7) == 0.7\n"
+ " assert value.get(3, 0.7) == 0.3\n"
+ " assert sorted(value.keys()) == [1, 2, 3]\n"
+ " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n"
+ " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n"
+#if PY_MAJOR_VERSION < 3
+ " assert all(isinstance(k, int) for k in value.iterkeys())\n"
+ " assert all(isinstance(v, float) for v in value.itervalues())\n"
+ " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n"
+#endif
+ );
+ }
+
+ Y_UNIT_TEST(ToPyDictWrongKey) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TDict<int, double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value):\n"
+ " try:\n"
+ " print(value[0])\n"
+ " except KeyError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(FromPyEmptySet) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<ui32, void>>(
+ "def Test(): return set([])",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 0);
+ });
+
+ }
+
+ Y_UNIT_TEST(FromPySet) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<char*, void>>(
+ "def Test(): return set(['one', 'two', 'three'])",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT(!value.IsSortedDict());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+
+ std::set<TString> set;
+ const auto it = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; it.Next(key);) {
+ set.emplace(key.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(set.size(), 3);
+ UNIT_ASSERT(set.count("one"));
+ UNIT_ASSERT(set.count("two"));
+ UNIT_ASSERT(set.count("three"));
+ });
+
+ }
+
+ Y_UNIT_TEST(FromPySet_Contains) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<char*, void>>(
+ "def Test(): return {b'one', b'two', b'three'}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one")));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two")));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three")));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero")));
+ });
+
+ }
+
+ Y_UNIT_TEST(ToPyEmptySet) {
+ TPythonTestEngine engine;
+
+ engine.ToPython<NUdf::TDict<ui8, void>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build();
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ " assert len(value) == 0\n"
+ );
+
+ }
+
+ Y_UNIT_TEST(ToPySet) {
+ TPythonTestEngine engine;
+
+ engine.ToPython<NUdf::TDict<ui8, void>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((ui8) 1), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod((ui8) 2), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod((ui8) 3), NUdf::TUnboxedValuePod::Void())
+ .Build();
+
+ },
+ "def Test(value):\n"
+ " assert len(value) == 3\n"
+ " assert all(isinstance(k, int) for k in iter(value))\n"
+ " assert all(i in value for i in [1, 2, 3])\n");
+ }
+
+ Y_UNIT_TEST(FromPyMultiDict) {
+ TPythonTestEngine engine;
+
+ engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>(
+ "def Test(): return {1: ['one', 'two'], 3: ['three']}",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 2);
+
+ std::unordered_map<ui32, std::vector<TString>> map;
+ const auto dictIt = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) {
+ auto& val = map[key.Get<ui32>()];
+ const auto listIt = payload.GetListIterator();
+ for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) {
+ val.emplace_back(listItem.AsStringRef());
+ }
+ }
+
+ UNIT_ASSERT_EQUAL(map.size(), 2);
+ auto it = map.find(1);
+ UNIT_ASSERT(it != map.end());
+ UNIT_ASSERT_EQUAL(it->second.size(), 2);
+ UNIT_ASSERT_EQUAL(it->second[0], "one");
+ UNIT_ASSERT_EQUAL(it->second[1], "two");
+ it = map.find(3);
+ UNIT_ASSERT(it != map.end());
+ UNIT_ASSERT_EQUAL(it->second.size(), 1);
+ UNIT_ASSERT_EQUAL(it->second[0], "three");
+ });
+
+ }
+
+ Y_UNIT_TEST(ToPyMultiDict) {
+ TPythonTestEngine engine;
+
+ engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi;
+ return vb.NewDict(type, flags)->
+ Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("one"))
+ .Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("two"))
+ .Add(NUdf::TUnboxedValuePod((ui8) 3), vb.NewString("three"))
+ .Build();
+
+ },
+ "def Test(value):\n"
+ " assert len(value) == 2\n"
+ " assert 1 in value\n"
+ " assert 3 in value\n"
+ " assert len(value[1]) == 2\n"
+ " assert 'one' in value[1]\n"
+ " assert 'two' in value[1]\n"
+ " assert list(value[3]) == ['three']\n");
+ }
+
+ Y_UNIT_TEST(ToPyAndBackDictAsIs) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TDict<i32, double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Sorted)->
+ Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32) 0)));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32) 3)));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32) 2)).Get<double>(), 0.2);
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32) 4)));
+
+ std::vector<std::pair<i32, double>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<i32>(), payload.Get<double>());
+ }
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0].first, 1);
+ UNIT_ASSERT_EQUAL(items[1].first, 2);
+ UNIT_ASSERT_EQUAL(items[2].first, 3);
+ UNIT_ASSERT_EQUAL(items[0].second, 0.1);
+ UNIT_ASSERT_EQUAL(items[1].second, 0.2);
+ UNIT_ASSERT_EQUAL(items[2].second, 0.3);
+
+ std::vector<i32> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<i32>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 3);
+ UNIT_ASSERT_EQUAL(keys[0], 1);
+ UNIT_ASSERT_EQUAL(keys[1], 2);
+ UNIT_ASSERT_EQUAL(keys[2], 3);
+
+ std::vector<double> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.Get<double>());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 3);
+ UNIT_ASSERT_EQUAL(values[0], 0.1);
+ UNIT_ASSERT_EQUAL(values[1], 0.2);
+ UNIT_ASSERT_EQUAL(values[2], 0.3);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(PyInvertDict) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Hashed)->
+ Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1))
+ .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2))
+ .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3))
+ .Build();
+ },
+ "def Test(value): return { v: k for k, v in value.items() }",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double) 0.1)));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double) 0.0)));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double) 0.4)));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double) 0.2)).Get<i32>(), 2);
+
+ std::map<double, i32> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace(key.Get<double>(), payload.Get<i32>());
+ }
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0.1], 1);
+ UNIT_ASSERT_EQUAL(items[0.2], 2);
+ UNIT_ASSERT_EQUAL(items[0.3], 3);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(FromPyOrderedDict) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TDict<ui32, char*>>(
+ "from collections import OrderedDict\n"
+ "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0))));
+ const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1)));
+ UNIT_ASSERT_EQUAL(v.AsStringRef(), "one");
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4)))));
+
+#if PY_MAJOR_VERSION >= 3
+ std::vector<std::pair<ui32, TString>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<ui32>(), payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 3);
+ UNIT_ASSERT_EQUAL(items[0].first, 2);
+ UNIT_ASSERT_EQUAL(items[1].first, 1);
+ UNIT_ASSERT_EQUAL(items[2].first, 3);
+ UNIT_ASSERT_EQUAL(items[0].second, "two");
+ UNIT_ASSERT_EQUAL(items[1].second, "one");
+ UNIT_ASSERT_EQUAL(items[2].second, "three");
+
+ std::vector<ui32> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<ui32>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 3);
+ UNIT_ASSERT_EQUAL(keys[0], 2);
+ UNIT_ASSERT_EQUAL(keys[1], 1);
+ UNIT_ASSERT_EQUAL(keys[2], 3);
+
+ std::vector<TString> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.AsStringRef());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 3);
+ UNIT_ASSERT_EQUAL(values[0], "two");
+ UNIT_ASSERT_EQUAL(values[1], "one");
+ UNIT_ASSERT_EQUAL(values[2], "three");
+#endif
+ });
+ }
+
+ Y_UNIT_TEST(ToPyAndBackSetAsIs) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TDict<float, void>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ return vb.NewDict(type, NUdf::TDictFlags::Sorted)->
+ Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void())
+ .Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void())
+ .Build();
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 3);
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f)));
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f)));
+ UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f)));
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f)));
+
+ std::vector<float> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<float>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 3);
+ UNIT_ASSERT_EQUAL(keys[0], 0.1f);
+ UNIT_ASSERT_EQUAL(keys[1], 0.2f);
+ UNIT_ASSERT_EQUAL(keys[2], 0.3f);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>(
+ [](const TType*, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto a = vb.NewArray(9U, items);
+ const float f[] = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f };
+ std::transform(f, f + 9U, items, [](float v){ return NUdf::TUnboxedValuePod(v); });
+ return a;
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U);
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10))));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f);
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13))));
+
+ std::vector<std::pair<i8, float>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<i8>(), payload.Get<float>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 9U);
+ UNIT_ASSERT_EQUAL(items.front().first, 0);
+ UNIT_ASSERT_EQUAL(items.back().first, 8);
+ UNIT_ASSERT_EQUAL(items.front().second, 0.1f);
+ UNIT_ASSERT_EQUAL(items.back().second, 0.9f);
+
+ std::vector<i8> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<i8>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 9U);
+ UNIT_ASSERT_EQUAL(keys.front(), 0);
+ UNIT_ASSERT_EQUAL(keys.back(), 8);
+
+ std::vector<float> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.Get<float>());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 9U);
+ UNIT_ASSERT_EQUAL(values.front(), 0.1f);
+ UNIT_ASSERT_EQUAL(values.back(), 0.9f);
+ }
+ );
+ }
+
+ Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>(
+ [](const TType*, const NUdf::IValueBuilder&) {
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10));
+ },
+ "def Test(value): return value",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value.HasDictItems());
+ UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U);
+ UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0))));
+ UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10))));
+ UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6);
+ UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13))));
+
+ std::vector<std::pair<ui8, i32>> items;
+ const auto it = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) {
+ items.emplace_back(key.Get<ui8>(), payload.Get<i32>());
+ }
+
+ UNIT_ASSERT_EQUAL(items.size(), 9U);
+ UNIT_ASSERT_EQUAL(items.front().first, 0);
+ UNIT_ASSERT_EQUAL(items.back().first, 8);
+ UNIT_ASSERT_EQUAL(items.front().second, 1);
+ UNIT_ASSERT_EQUAL(items.back().second, 9);
+
+ std::vector<ui8> keys;
+ const auto kit = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; kit.Next(key);) {
+ keys.emplace_back(key.Get<ui8>());
+ }
+
+ UNIT_ASSERT_EQUAL(keys.size(), 9U);
+ UNIT_ASSERT_EQUAL(keys.front(), 0);
+ UNIT_ASSERT_EQUAL(keys.back(), 8);
+
+ std::vector<i32> values;
+ const auto pit = value.GetPayloadsIterator();
+ for (NUdf::TUnboxedValue payload; pit.Next(payload);) {
+ values.emplace_back(payload.Get<i32>());
+ }
+
+ UNIT_ASSERT_EQUAL(values.size(), 9U);
+ UNIT_ASSERT_EQUAL(values.front(), 1);
+ UNIT_ASSERT_EQUAL(values.back(), 9);
+ }
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.cpp b/yql/essentials/udfs/common/python/bindings/py_errors.cpp
new file mode 100644
index 0000000000..5741978d54
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_errors.cpp
@@ -0,0 +1,72 @@
+#include "py_errors.h"
+#include "py_ptr.h"
+#include "py_cast.h"
+#include "py_utils.h"
+
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+
+namespace NPython {
+
+// this function in conjuction with code after Py_Initialize
+// does approximately following:
+//
+// sys.stderr = StderrProxy(sys.stderr)
+//
+// ...
+//
+// sys.stderr._toggle_real_mode()
+// sys.excepthook(
+// sys.last_type,
+// sys.last_value,
+// sys.last_traceback)
+// sys.stderr._get_value()
+// sys.stderr._toggle_real_mode()
+//
+// where _toggle_real_mode, _get_value & all calls to stderr not in real mode
+// are handled in a thread-safe way
+//
+TString GetLastErrorAsString()
+{
+ PyObject* etype;
+ PyObject* evalue;
+ PyObject* etraceback;
+
+ PyErr_Fetch(&etype, &evalue, &etraceback);
+
+ if (!etype) {
+ return {};
+ }
+
+ TPyObjectPtr etypePtr {etype, TPyObjectPtr::ADD_REF};
+ TPyObjectPtr evaluePtr {evalue, TPyObjectPtr::ADD_REF};
+ TPyObjectPtr etracebackPtr {etraceback, TPyObjectPtr::ADD_REF};
+
+ TPyObjectPtr stderrObject {PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF};
+ if (!stderrObject) {
+ return {};
+ }
+
+ TPyObjectPtr unused = PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr);
+
+ PyErr_Restore(etypePtr.Get(), evaluePtr.Get(), etracebackPtr.Get());
+ // in unusual situations there may be low-level write to stderr
+ // (by direct C FILE* write), but that's OK
+ PyErr_Print();
+
+ TPyObjectPtr error = PyObject_CallMethod(stderrObject.Get(), "_get_value", nullptr);
+ if (!error) {
+ return {};
+ }
+ unused.ResetSteal(
+ PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr)
+ );
+
+ TString errorValue;
+ if (!TryPyCast(error.Get(), errorValue)) {
+ errorValue = TString("can't get error string from: ") += PyObjectRepr(error.Get());
+ }
+ return errorValue;
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.h b/yql/essentials/udfs/common/python/bindings/py_errors.h
new file mode 100644
index 0000000000..2306b47bb9
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_errors.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+namespace NPython {
+
+TString GetLastErrorAsString();
+
+#define PY_TRY try
+
+#define PY_CATCH(ErrorValue) \
+ catch (const yexception& e) { \
+ PyErr_SetString(PyExc_RuntimeError, e.what()); \
+ return ErrorValue; \
+ }
+
+#define PY_ENSURE(condition, message) \
+ do { \
+ if (Y_UNLIKELY(!(condition))) { \
+ throw yexception() << message; \
+ } \
+ } while (0)
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_gil.h b/yql/essentials/udfs/common/python/bindings/py_gil.h
new file mode 100644
index 0000000000..70e9bf3e91
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_gil.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <Python.h>
+
+
+namespace NPython {
+
+struct TPyGilLocker
+{
+ TPyGilLocker()
+ : Gil(PyGILState_Ensure())
+ {
+ }
+
+ ~TPyGilLocker() {
+ PyGILState_Release(Gil);
+ }
+
+private:
+ PyGILState_STATE Gil;
+};
+
+struct TPyGilUnlocker {
+ TPyGilUnlocker()
+ : ThreadState(PyEval_SaveThread())
+ {
+ }
+
+ ~TPyGilUnlocker() {
+ PyEval_RestoreThread(ThreadState);
+ }
+
+private:
+ PyThreadState* ThreadState;
+};
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp
new file mode 100644
index 0000000000..090211be2c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp
@@ -0,0 +1,280 @@
+#include "py_iterator.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyIterator
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator;
+
+ inline static TPyIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TDictKeysIterator>").Release();
+ }
+
+ static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator);
+ static PyObject* Next(PyObject* self);
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TDictKeysIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyPairIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyPairIterator
+{
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* KeyType;
+ const NUdf::TType* PayloadType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator;
+
+ inline static TPyPairIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyPairIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TDictIterator>").Release();
+ }
+
+ static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator);
+ static PyObject* Next(PyObject* self);
+};
+
+PyTypeObject PyPairIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TDictIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyPairIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyPairIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyPairIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TPairIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyPairIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator)
+{
+ TPyIterator* dictIter = new TPyIterator;
+ PyObject_INIT(dictIter, &PyIteratorType);
+ dictIter->CastCtx = ctx;
+ dictIter->ItemType = itemType;
+ dictIter->Iterator.Set(ctx->PyCtx, iterator);
+ return reinterpret_cast<PyObject*>(dictIter);
+}
+
+PyObject* TPyIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ const auto iter = Cast(self);
+ NUdf::TUnboxedValue item;
+ if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) {
+ return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release();
+ }
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyPairIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyPairIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator)
+{
+ TPyPairIterator* dictIter = new TPyPairIterator;
+ PyObject_INIT(dictIter, &PyPairIteratorType);
+ dictIter->CastCtx = ctx;
+ dictIter->KeyType = keyType;
+ dictIter->PayloadType = payloadType;
+ dictIter->Iterator.Set(ctx->PyCtx, iterator);
+ return reinterpret_cast<PyObject*>(dictIter);
+}
+
+PyObject* TPyPairIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ const auto iter = Cast(self);
+ NUdf::TUnboxedValue k, v;
+ if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) {
+ const TPyObjectPtr key = iter->KeyType ?
+ ToPyObject(iter->CastCtx, iter->KeyType, k):
+ PyCast<ui64>(k.Get<ui64>());
+ const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v);
+ return PyTuple_Pack(2, key.Get(), value.Get());
+ }
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyIterator::New(castCtx, itemType, value.AsBoxed());
+}
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payloadType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ return TPyPairIterator::New(castCtx, keyType, payloadType, value.AsBoxed());
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.h b/yql/essentials/udfs/common/python/bindings/py_iterator.h
new file mode 100644
index 0000000000..5c5de27b0b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_iterator.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyIteratorType;
+extern PyTypeObject PyPairIteratorType;
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+TPyObjectPtr ToPyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* keyType,
+ const NKikimr::NUdf::TType* payloadType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp
new file mode 100644
index 0000000000..ffaa2fe4ec
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp
@@ -0,0 +1,705 @@
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/generic/maybe.h>
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+//////////////////////////////////////////////////////////////////////////////
+// TLazyDictBase
+//////////////////////////////////////////////////////////////////////////////
+class TLazyDictBase: public NUdf::TBoxedValue
+{
+protected:
+ class TIterator: public NUdf::TBoxedValue {
+ public:
+ TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter)
+ : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter))
+ {}
+
+ ~TIterator() {
+ const TPyGilLocker lock;
+ PyIter_.Reset();
+ }
+
+ private:
+ bool Skip() override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ value = FromPyObject(CastCtx_, ItemType_, next.Get());
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ payload = NUdf::TUnboxedValuePod::Void();
+ return Next(key);
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PyIter_;
+ };
+
+ class TPairIterator: public NUdf::TBoxedValue {
+ public:
+ TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter)
+ : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter))
+ {}
+
+ ~TPairIterator() {
+ const TPyGilLocker lock;
+ PyIter_.Reset();
+ }
+
+ private:
+ bool Skip() override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ key = FromPyObject(CastCtx_, KeyType_, PyTuple_GET_ITEM(next.Get(), 0));
+ pay = FromPyObject(CastCtx_, PayType_, PyTuple_GET_ITEM(next.Get(), 1));
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* KeyType_;
+ const NUdf::TType* PayType_;
+ TPyObjectPtr PyIter_;
+ };
+
+ TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject)
+ : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef())
+ {}
+
+ ~TLazyDictBase() {
+ const TPyGilLocker lock;
+ PyObject_.Reset();
+ }
+
+ bool HasDictItems() const override try {
+ const TPyGilLocker lock;
+ const auto has = PyObject_IsTrue(PyObject_.Get());
+ if (has < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return bool(has);
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PyObject_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyMapping
+//////////////////////////////////////////////////////////////////////////////
+class TLazyMapping: public TLazyDictBase
+{
+public:
+ TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict)
+ : TLazyDictBase(ctx, keyType, dict), PayType_(payType)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PyMapping_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyMapping_Keys(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyMapping_Values(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyMapping_Items(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ if (const auto item = PyObject_GetItem(PyObject_.Get(), pyKey.Get())) {
+ return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional();
+ }
+
+ if (PyErr_Occurred()) {
+ PyErr_Clear();
+ }
+
+ return NUdf::TUnboxedValue();
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto map = PyObject_.Get();
+ const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ?
+ (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) :
+ PyMapping_HasKey(map, pyKey.Get());
+
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+private:
+ const NUdf::TType* PayType_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyDict
+//////////////////////////////////////////////////////////////////////////////
+class TLazyDict: public TLazyDictBase
+{
+public:
+ TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict)
+ : TLazyDictBase(ctx, keyType, dict), PayType_(payType)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PyDict_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyDict_Keys(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyDict_Values(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyList = PyDict_Items(PyObject_.Get())) {
+ if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) {
+ return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter)));
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ if (const auto item = PyDict_GetItem(PyObject_.Get(), pyKey.Get())) {
+ return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional();
+ } else if (!PyErr_Occurred()) {
+ return NUdf::TUnboxedValue();
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto has = PyDict_Contains(PyObject_.Get(), pyKey.Get());
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+private:
+ const NUdf::TType* PayType_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazySet
+//////////////////////////////////////////////////////////////////////////////
+class TLazySet: public TLazyDictBase
+{
+public:
+ TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set)
+ : TLazyDictBase(ctx, itemType, set)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PySet_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod();
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto has = PySet_Contains(PyObject_.Get(), pyKey.Get());
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const override {
+ return GetKeysIterator();
+ }
+
+ ui64 GetListLength() const override {
+ return GetDictLength();
+ }
+
+ bool HasListItems() const override {
+ return HasDictItems();
+ }
+
+ bool HasFastListLength() const override {
+ return true;
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazySequenceAsSet
+//////////////////////////////////////////////////////////////////////////////
+class TLazySequenceAsSet: public TLazyDictBase
+{
+public:
+ TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence)
+ : TLazyDictBase(ctx, keyType, sequence)
+ {}
+
+private:
+ bool IsSortedDict() const override { return false; }
+
+ ui64 GetDictLength() const override try {
+ const TPyGilLocker lock;
+ const auto len = PySequence_Size(PyObject_.Get());
+ if (len < 0) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return ui64(len);
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod();
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override try {
+ const TPyGilLocker lock;
+ if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) {
+ const auto has = PySequence_Contains(PyObject_.Get(), pyKey.Get());
+ if (has >= 0) {
+ return bool(has);
+ }
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override try {
+ const TPyGilLocker lock;
+ if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter)));
+ }
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override {
+ return GetKeysIterator();
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const override {
+ return GetKeysIterator();
+ }
+
+ ui64 GetListLength() const override {
+ return GetDictLength();
+ }
+
+ bool HasListItems() const override {
+ return HasDictItems();
+ }
+
+ bool HasFastListLength() const override {
+ return true;
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazySequenceAsDict
+//////////////////////////////////////////////////////////////////////////////
+template<typename KeyType>
+class TLazySequenceAsDict: public NUdf::TBoxedValue
+{
+private:
+ class TKeyIterator: public NUdf::TBoxedValue {
+ public:
+ TKeyIterator(Py_ssize_t size)
+ : Size(size), Index(0)
+ {}
+
+ private:
+ bool Skip() override {
+ if (Index >= Size)
+ return false;
+
+ ++Index;
+ return true;
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override {
+ if (Index >= Size)
+ return false;
+
+ value = NUdf::TUnboxedValuePod(KeyType(Index++));
+ return true;
+ }
+
+ private:
+ const Py_ssize_t Size;
+ Py_ssize_t Index;
+ };
+
+ class TIterator: public NUdf::TBoxedValue {
+ public:
+ TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq)
+ : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size(size), Index(0)
+ {}
+
+ ~TIterator() {
+ const TPyGilLocker lock;
+ PySeq_.Reset();
+ }
+
+ private:
+ bool Skip() override {
+ if (Index >= Size)
+ return false;
+
+ ++Index;
+ return true;
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override try {
+ if (Index >= Size)
+ return false;
+
+ const TPyGilLocker lock;
+ value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++));
+ return true;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try {
+ if (Index >= Size)
+ return false;
+
+ const TPyGilLocker lock;
+ key = NUdf::TUnboxedValuePod(KeyType(Index));
+ pay = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++));
+ return true;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PySeq_;
+ const Py_ssize_t Size;
+ Py_ssize_t Index;
+ };
+
+public:
+ TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size)
+ : CastCtx_(ctx), ItemType_(itemType), Size(size), PySeq_(std::move(sequence))
+ {}
+
+ ~TLazySequenceAsDict()
+ {
+ const TPyGilLocker lock;
+ PySeq_.Reset();
+ }
+
+private:
+ bool IsSortedDict() const override { return true; }
+
+ bool HasDictItems() const override {
+ return Size > 0;
+ }
+
+ ui64 GetDictLength() const override {
+ return Size;
+ }
+
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ const Py_ssize_t index = key.Get<KeyType>();
+ if (index >= -Size && index < Size) try {
+ const TPyGilLocker lock;
+ if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size + index)) {
+ return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional();
+ } else if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+ return NUdf::TUnboxedValue();
+ }
+
+ bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ const Py_ssize_t index = key.Get<KeyType>();
+ return index >= -Size && index < Size;
+ }
+
+ NUdf::TUnboxedValue GetKeysIterator() const override {
+ return NUdf::TUnboxedValuePod(new TKeyIterator(Size));
+ }
+
+ NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_));
+ }
+
+ NUdf::TUnboxedValue GetDictIterator() const override {
+ return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_));
+ }
+
+ const TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ const Py_ssize_t Size;
+ TPyObjectPtr PySeq_;
+};
+
+} // namspace
+
+NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payType,
+ PyObject* dict)
+{
+ return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict));
+}
+
+NUdf::TUnboxedValue FromPyMapping(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ const NUdf::TType* payType,
+ PyObject* map)
+{
+ return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map));
+}
+
+NUdf::TUnboxedValue FromPySet(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ PyObject* set)
+{
+ return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set));
+}
+
+NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* keyType,
+ PyObject* set)
+{
+ return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set));
+}
+
+NUdf::TUnboxedValue FromPySequence(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TDataTypeId keyType,
+ PyObject* sequence)
+{
+ if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) {
+ const auto size = PySequence_Fast_GET_SIZE(fast.Get());
+ if (size >= 0) {
+ switch (keyType) {
+#define MAKE_PRIMITIVE_TYPE_SIZE(type) \
+ case NUdf::TDataType<type>::Id: \
+ return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size));
+ INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE)
+#undef MAKE_PRIMITIVE_TYPE_SIZE
+ }
+ Y_ABORT("Invalid key type.");
+ }
+ }
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp
new file mode 100644
index 0000000000..fe3b8892e6
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp
@@ -0,0 +1,382 @@
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/generic/maybe.h>
+#include <util/string/builder.h>
+
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+static ui64 CalculateIteratorLength(PyObject* iter, const TPyCastContext::TPtr& castCtx)
+{
+ PyObject* item;
+
+ ui64 length = 0;
+ while ((item = PyIter_Next(iter))) {
+ length++;
+ Py_DECREF(item);
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return length;
+}
+
+static bool IsIteratorHasItems(PyObject* iter, const TPyCastContext::TPtr& castCtx)
+{
+ if (const TPyObjectPtr item = PyIter_Next(iter)) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TBaseLazyList
+//////////////////////////////////////////////////////////////////////////////
+template<typename TDerived>
+class TBaseLazyList: public NUdf::TBoxedValue
+{
+ using TListSelf = TBaseLazyList<TDerived>;
+
+ class TIterator: public NUdf::TBoxedValue {
+ public:
+ TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter)
+ : CastCtx_(ctx)
+ , PyIter_(std::move(pyIter))
+ , ItemType_(type)
+ {}
+
+ ~TIterator() {
+ const TPyGilLocker lock;
+ PyIter_.Reset();
+ }
+
+ private:
+ bool Skip() override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool Next(NUdf::TUnboxedValue& value) override try {
+ const TPyGilLocker lock;
+ const TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ value = FromPyObject(CastCtx_, ItemType_, next.Get());
+ return true;
+ }
+
+ if (PyErr_Occurred()) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return false;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ private:
+ const TPyCastContext::TPtr CastCtx_;
+ TPyObjectPtr PyIter_;
+ const NUdf::TType* ItemType_;
+ };
+
+public:
+ TBaseLazyList(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : CastCtx_(castCtx)
+ , PyObject_(std::move(pyObject))
+ , ItemType_(NUdf::TListTypeInspector(*CastCtx_->PyCtx->TypeInfoHelper, type).GetItemType())
+ {
+ }
+
+ ~TBaseLazyList() {
+ TPyGilLocker lock;
+ PyObject_.Reset();
+ }
+
+private:
+ TPyObjectPtr GetIterator() const try {
+ return static_cast<const TDerived*>(this)->GetIteratorImpl();
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool HasFastListLength() const override {
+ return Length_.Defined();
+ }
+
+ ui64 GetEstimatedListLength() const override {
+ return GetListLength();
+ }
+
+ ui64 GetListLength() const override try {
+ if (!Length_.Defined()) {
+ const TPyGilLocker lock;
+ TPyObjectPtr iter = GetIterator();
+ Length_ = CalculateIteratorLength(iter.Get(), CastCtx_);
+ }
+
+ return *Length_;
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool HasListItems() const override try {
+ if (Length_.Defined())
+ return *Length_ > 0;
+
+ const TPyGilLocker lock;
+ TPyObjectPtr iter = GetIterator();
+ const bool hasItems = IsIteratorHasItems(iter.Get(), CastCtx_);
+ if (!hasItems) {
+ Length_ = 0;
+ }
+ return hasItems;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const override try {
+ const TPyGilLocker lock;
+ TPyObjectPtr pyIter = GetIterator();
+ auto* self = const_cast<TListSelf*>(this);
+ return NUdf::TUnboxedValuePod(new TIterator(self->CastCtx_, self->ItemType_, std::move(pyIter)));
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ const NUdf::TOpaqueListRepresentation* GetListRepresentation() const override {
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr ReverseListImpl(
+ const NUdf::IValueBuilder& builder) const override
+ {
+ Y_UNUSED(builder);
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr SkipListImpl(
+ const NUdf::IValueBuilder& builder, ui64 count) const override
+ {
+ Y_UNUSED(builder);
+ Y_UNUSED(count);
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr TakeListImpl(
+ const NUdf::IValueBuilder& builder, ui64 count) const override
+ {
+ Y_UNUSED(builder);
+ Y_UNUSED(count);
+ return nullptr;
+ }
+
+ NUdf::IBoxedValuePtr ToIndexDictImpl(
+ const NUdf::IValueBuilder& builder) const override
+ {
+ Y_UNUSED(builder);
+ return nullptr;
+ }
+
+protected:
+ const TPyCastContext::TPtr CastCtx_;
+ TPyObjectPtr PyObject_;
+ const NUdf::TType* ItemType_;
+ mutable TMaybe<ui64> Length_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyIterable
+//////////////////////////////////////////////////////////////////////////////
+class TLazyIterable: public TBaseLazyList<TLazyIterable>
+{
+ using TBase = TBaseLazyList<TLazyIterable>;
+public:
+ TLazyIterable(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : TBase(castCtx, std::move(pyObject), type)
+ {}
+
+ TPyObjectPtr GetIteratorImpl() const {
+ if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) {
+ return ret;
+ }
+
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos
+ << "Cannot get iterator from object: "
+ << PyObjectRepr(PyObject_.Get()) << ", error: "
+ << GetLastErrorAsString()).data());
+ }
+
+private:
+ bool HasFastListLength() const override {
+ return Length_.Defined();
+ }
+
+ ui64 GetListLength() const override try {
+ if (!Length_.Defined()) {
+ const TPyGilLocker lock;
+ const auto len = PyObject_Size(PyObject_.Get());
+ if (len >= 0) {
+ Length_ = len;
+ } else {
+ Length_ = CalculateIteratorLength(GetIteratorImpl().Get(), CastCtx_);
+ }
+ }
+ return *Length_;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+
+ bool HasListItems() const override try {
+ const TPyGilLocker lock;
+ bool hasItems = false;
+ const auto isTrue = PyObject_IsTrue(PyObject_.Get());
+ if (isTrue != -1) {
+ hasItems = static_cast<bool>(isTrue);
+ } else {
+ TPyObjectPtr iter = GetIteratorImpl();
+ hasItems = IsIteratorHasItems(iter.Get(), CastCtx_);
+ }
+ if (!hasItems) {
+ Length_ = 0;
+ }
+ return hasItems;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyIterator
+//////////////////////////////////////////////////////////////////////////////
+class TLazyIterator: public TBaseLazyList<TLazyIterator>
+{
+ using TBase = TBaseLazyList<TLazyIterator>;
+public:
+ TLazyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : TBase(castCtx, std::move(pyObject), type)
+ , IteratorDrained_(false)
+ {}
+
+ TPyObjectPtr GetIteratorImpl() const {
+ if (IteratorDrained_) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos <<
+ "Lazy list was build under python iterator. "
+ "Iterator was already used.").data());
+ }
+ IteratorDrained_ = true;
+ return PyObject_;
+ }
+
+private:
+ mutable bool IteratorDrained_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TLazyGenerator
+//////////////////////////////////////////////////////////////////////////////
+class TLazyGenerator: public TBaseLazyList<TLazyGenerator>
+{
+ using TBase = TBaseLazyList<TLazyGenerator>;
+public:
+ TLazyGenerator(
+ const TPyCastContext::TPtr& castCtx,
+ TPyObjectPtr&& pyObject,
+ const NUdf::TType* type)
+ : TBase(castCtx, std::move(pyObject), type)
+ {
+ // keep ownership of function closure if any
+ if (PyFunction_Check(PyObject_.Get())) {
+ PyObject* closure = PyFunction_GetClosure(PyObject_.Get());
+ if (closure) {
+ Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF);
+ }
+ }
+ }
+
+ ~TLazyGenerator() {
+ const TPyGilLocker lock;
+ Closure_.Reset();
+ }
+
+ TPyObjectPtr GetIteratorImpl() const {
+ TPyObjectPtr generator = PyObject_CallObject(PyObject_.Get(), nullptr);
+ if (!generator || !PyGen_Check(generator.Get())) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected generator as a result of function call").data());
+ }
+ return PyObject_GetIter(generator.Get());
+ }
+
+private:
+ TPyObjectPtr Closure_;
+};
+
+} // namspace
+
+
+NUdf::TUnboxedValue FromPyLazyGenerator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ TPyObjectPtr callableObj)
+{
+ return NUdf::TUnboxedValuePod(new TLazyGenerator(castCtx, std::move(callableObj), type));
+}
+
+NUdf::TUnboxedValue FromPyLazyIterable(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ TPyObjectPtr iterableObj)
+{
+ return NUdf::TUnboxedValuePod(new TLazyIterable(castCtx, std::move(iterableObj), type));
+}
+
+NUdf::TUnboxedValue FromPyLazyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ TPyObjectPtr iteratorObj)
+{
+ return NUdf::TUnboxedValuePod(new TLazyIterator(castCtx, std::move(iteratorObj), type));
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_list.cpp b/yql/essentials/udfs/common/python/bindings/py_list.cpp
new file mode 100644
index 0000000000..376a1ca124
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_list.cpp
@@ -0,0 +1,1116 @@
+#include "py_list.h"
+#include "py_dict.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+using namespace NKikimr;
+
+#if PY_MAJOR_VERSION >= 3
+#define SLICEOBJ(obj) obj
+#else
+#define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj))
+// See details about need for backports in ya.make
+#include "py27_backports.h"
+#endif
+
+namespace NPython {
+
+namespace {
+inline Py_ssize_t CastIndex(PyObject* key, const char* name)
+{
+ Py_ssize_t index = -1;
+ if (PyIndex_Check(key)) {
+ index = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ }
+ if (index < 0) {
+ const TPyObjectPtr value = PyUnicode_FromFormat("argument of %s must be positive integer or long", name);
+ PyErr_SetObject(PyExc_IndexError, value.Get());
+ }
+
+ return index;
+}
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyList interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazyList
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyLazyList, TPyPtrOps<TPyLazyList>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Dict;
+ Py_ssize_t Step;
+ Py_ssize_t CachedLength;
+
+ inline static TPyLazyList* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazyList*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value,
+ Py_ssize_t step = 1,
+ Py_ssize_t size = -1);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static PyObject* Iter(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static PyObject* Subscript(PyObject* self, PyObject* slice);
+ static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */);
+ static PyObject* Reversed(PyObject* self, PyObject* /* arg */);
+ static PyObject* Take(PyObject* self, PyObject* arg);
+ static PyObject* Skip(PyObject* self, PyObject* arg);
+ static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */);
+ static PyObject* HasItems(PyObject* self, PyObject* /* arg */);
+};
+
+PyMappingMethods LazyListMapping = {
+ INIT_MEMBER(mp_length, TPyLazyList::Len),
+ INIT_MEMBER(mp_subscript, TPyLazyList::Subscript),
+ INIT_MEMBER(mp_ass_subscript, nullptr),
+};
+
+PyNumberMethods LazyListNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyLazyList::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyLazyList::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+PyDoc_STRVAR(reversed__doc__, "DEPRECATED: use reversed(list) or list[::-1] instead.");
+PyDoc_STRVAR(take__doc__, "DEPRECATED: use slice list[:n] instead.");
+PyDoc_STRVAR(skip__doc__, "DEPRECATED: use slice list[n:] instead.");
+PyDoc_STRVAR(to_index_dict__doc__, "DEPRECATED: use list[n] instead.");
+PyDoc_STRVAR(has_fast_len__doc__, "DEPRECATED: do not use.");
+PyDoc_STRVAR(has_items__doc__, "DEPRECATED: test list as bool instead.");
+
+static PyMethodDef TPyLazyListMethods[] = {
+ { "__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr },
+ { "to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ },
+ { "reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__ },
+ { "take", TPyLazyList::Take, METH_O, take__doc__ },
+ { "skip", TPyLazyList::Skip, METH_O, skip__doc__ },
+ { "has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__ },
+ { "has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__ },
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyLazyListType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TList"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazyList)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazyList::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazyList::Repr),
+ INIT_MEMBER(tp_as_number , &LazyListNumbering),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , &LazyListMapping),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TList object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , TPyLazyList::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , TPyLazyListMethods),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyListIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyLazyListIterator
+{
+ PyObject_HEAD;
+ TPyLazyList::TPtr List;
+ TPyCleanupListItem<NUdf::TUnboxedValue> Iterator;
+ Py_ssize_t Length;
+ TPyCastContext::TPtr CastCtx;
+
+ inline static TPyLazyListIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyLazyListIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ auto obj = Cast(self);
+ auto ctx = obj->CastCtx;
+ ctx->MemoryLock->Acquire();
+ delete obj;
+ ctx->MemoryLock->Release();
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TListIterator>").Release();
+ }
+
+ static PyObject* New(TPyLazyList* list);
+ static PyObject* Next(PyObject* self);
+};
+
+PyTypeObject PyLazyListIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TListIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyLazyListIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyLazyListIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyLazyListIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.ListIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyLazyListIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyList implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyLazyList::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value,
+ Py_ssize_t step,
+ Py_ssize_t size)
+{
+ TPyLazyList* list = new TPyLazyList;
+ PyObject_INIT(list, &PyLazyListType);
+
+ list->CastCtx = castCtx;
+ list->ItemType = itemType;
+ list->Value.Set(castCtx->PyCtx, value);
+ list->Step = step;
+ list->CachedLength = size;
+
+ return reinterpret_cast<PyObject*>(list);
+}
+
+PyObject* TPyLazyList::Repr(PyObject*)
+{
+ return PyRepr("<yql.TList>").Release();
+}
+
+PyObject* TPyLazyList::Iter(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ return TPyLazyListIterator::New(list);
+ } PY_CATCH(nullptr)
+}
+
+Py_ssize_t TPyLazyList::Len(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (list->CachedLength == -1) {
+ list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get()));
+ }
+ return (list->CachedLength + list->Step - 1) / list->Step;
+ } PY_CATCH(-1)
+}
+
+PyObject* TPyLazyList::Subscript(PyObject* self, PyObject* slice)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ const auto vb = list->CastCtx->ValueBuilder;
+
+ if (PyIndex_Check(slice)) {
+ Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError);
+
+ if (!list->Dict.IsSet()) {
+ list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed());
+ }
+
+ if (index < 0) {
+ if (list->CachedLength == -1) {
+ list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get()));
+ }
+
+ ++index *= list->Step;
+ --index += list->CachedLength;
+ } else {
+ index *= list->Step;
+ }
+
+ if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) {
+ const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength);
+ PyErr_SetObject(PyExc_IndexError, error.Get());
+ return nullptr;
+ }
+
+ if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) {
+ return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release();
+ }
+
+ const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index);
+ PyErr_SetObject(PyExc_IndexError, error.Get());
+ return nullptr;
+ }
+
+ if (PySlice_Check(slice)) {
+ Py_ssize_t start, stop, step, size;
+
+ if (list->CachedLength >= 0) {
+ if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) {
+ return nullptr;
+ }
+ } else {
+ if (PySlice_Unpack(slice, &start, &stop, &step) < 0) {
+ return nullptr;
+ }
+
+ if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) {
+ list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get()));
+ size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step);
+ } else {
+ size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step);
+ }
+ }
+
+ if (!step) {
+ PyErr_SetString(PyExc_ValueError, "slice step cannot be zero");
+ return nullptr;
+ }
+
+ const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step;
+ const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step;
+ step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN);
+
+ NUdf::TUnboxedValue newList;
+ if (size > 0) {
+ size = step > 0 ?
+ (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX):
+ (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX);
+
+ start = start < hi ? start * list->Step : PY_SSIZE_T_MAX;
+ const Py_ssize_t skip = step > 0 ? start : start - size + 1;
+
+ newList = NUdf::TUnboxedValuePod(list->Value.Get().Get());
+ if (skip > 0) {
+ newList = vb->SkipList(newList, skip);
+ }
+
+ if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) {
+ newList = vb->TakeList(newList, size);
+ }
+
+ if (step < 0) {
+ step = -step;
+ newList = vb->ReverseList(newList);
+ }
+ } else {
+ newList = vb->NewEmptyList();
+ }
+
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size);
+ }
+
+ const TPyObjectPtr type = PyObject_Type(slice);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::ToIndexDict(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (!list->Dict.IsSet()) {
+ list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed());
+ }
+
+ return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::Reversed(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::Take(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ Py_ssize_t count = CastIndex(arg, "take");
+ if (count < 0) {
+ return nullptr;
+ }
+ count *= list->Step;
+
+ auto vb = list->CastCtx->ValueBuilder;
+ NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ auto newList = vb->TakeList(value, static_cast<ui64>(count));
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::Skip(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ Py_ssize_t count = CastIndex(arg, "skip");
+ if (count < 0) {
+ return nullptr;
+ }
+ count *= list->Step;
+
+ NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count));
+ return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::HasFastLen(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) {
+ Py_RETURN_TRUE;
+ }
+ Py_RETURN_FALSE;
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get())) {
+ Py_RETURN_TRUE;
+ }
+ Py_RETURN_FALSE;
+ } PY_CATCH(nullptr)
+}
+
+int TPyLazyList::Bool(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyList* list = Cast(self);
+ if (list->CachedLength == -1) {
+ return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0;
+ } else {
+ return list->CachedLength > 0 ? 1 : 0;
+ }
+ } PY_CATCH(-1)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyLazyListIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyLazyListIterator::New(TPyLazyList* list)
+{
+ TPyLazyListIterator* listIter = new TPyLazyListIterator;
+ PyObject_INIT(listIter, &PyLazyListIteratorType);
+ listIter->List.Reset(list);
+ listIter->Iterator.Set(list->CastCtx->PyCtx, NUdf::TBoxedValueAccessor::GetListIterator(*list->Value.Get()));
+ listIter->Length = 0;
+ listIter->CastCtx = list->CastCtx;
+ return reinterpret_cast<PyObject*>(listIter);
+}
+
+PyObject* TPyLazyListIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ TPyLazyListIterator* iter = Cast(self);
+ TPyLazyList* list = iter->List.Get();
+
+ NUdf::TUnboxedValue item;
+ if (iter->Iterator.Get().Next(item)) {
+ ++iter->Length;
+
+ for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length)
+ continue;
+
+ return ToPyObject(list->CastCtx, list->ItemType, item).Release();
+ }
+
+ // store calculated list length after traverse over whole list
+ if (list->CachedLength == -1) {
+ list->CachedLength = iter->Length;
+ }
+
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinList interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyThinList
+{
+ using TPtr = NUdf::TRefCountedPtr<TPyThinList, TPyPtrOps<TPyThinList>>;
+
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ const NUdf::TType* ItemType;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ const NUdf::TUnboxedValue* Elements;
+ Py_ssize_t Length;
+ Py_ssize_t Step;
+
+ inline static TPyThinList* Cast(PyObject* o) {
+ return reinterpret_cast<TPyThinList*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(),
+ const NUdf::TUnboxedValue* elements = nullptr,
+ Py_ssize_t length = 0,
+ Py_ssize_t step = 1);
+
+ static int Bool(PyObject* self);
+ static PyObject* Repr(PyObject* self);
+ static PyObject* Iter(PyObject* self);
+ static Py_ssize_t Len(PyObject* self);
+ static PyObject* Subscript(PyObject* self, PyObject* slice);
+ static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */);
+ static PyObject* Reversed(PyObject* self, PyObject* /* arg */);
+ static PyObject* Take(PyObject* self, PyObject* arg);
+ static PyObject* Skip(PyObject* self, PyObject* arg);
+ static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */);
+ static PyObject* HasItems(PyObject* self, PyObject* /* arg */);
+};
+
+PyMappingMethods ThinListMapping = {
+ INIT_MEMBER(mp_length, TPyThinList::Len),
+ INIT_MEMBER(mp_subscript, TPyThinList::Subscript),
+ INIT_MEMBER(mp_ass_subscript, nullptr),
+};
+
+PyNumberMethods ThinListNumbering = {
+ INIT_MEMBER(nb_add, nullptr),
+ INIT_MEMBER(nb_subtract, nullptr),
+ INIT_MEMBER(nb_multiply, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_divide, nullptr),
+#endif
+ INIT_MEMBER(nb_remainder, nullptr),
+ INIT_MEMBER(nb_divmod, nullptr),
+ INIT_MEMBER(nb_power, nullptr),
+ INIT_MEMBER(nb_negative, nullptr),
+ INIT_MEMBER(nb_positive, nullptr),
+ INIT_MEMBER(nb_absolute, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_bool, TPyThinList::Bool),
+#else
+ INIT_MEMBER(nb_nonzero, TPyThinList::Bool),
+#endif
+ INIT_MEMBER(nb_invert, nullptr),
+ INIT_MEMBER(nb_lshift, nullptr),
+ INIT_MEMBER(nb_rshift, nullptr),
+ INIT_MEMBER(nb_and, nullptr),
+ INIT_MEMBER(nb_xor, nullptr),
+ INIT_MEMBER(nb_or, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_coerce, nullptr),
+#endif
+ INIT_MEMBER(nb_int, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_reserved, nullptr),
+#else
+ INIT_MEMBER(nb_long, nullptr),
+#endif
+ INIT_MEMBER(nb_float, nullptr),
+#if PY_MAJOR_VERSION < 3
+ INIT_MEMBER(nb_oct, nullptr),
+ INIT_MEMBER(nb_hex, nullptr),
+#endif
+
+ INIT_MEMBER(nb_inplace_add, nullptr),
+ INIT_MEMBER(nb_inplace_subtract, nullptr),
+ INIT_MEMBER(nb_inplace_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_remainder, nullptr),
+ INIT_MEMBER(nb_inplace_power, nullptr),
+ INIT_MEMBER(nb_inplace_lshift, nullptr),
+ INIT_MEMBER(nb_inplace_rshift, nullptr),
+ INIT_MEMBER(nb_inplace_and, nullptr),
+ INIT_MEMBER(nb_inplace_xor, nullptr),
+ INIT_MEMBER(nb_inplace_or, nullptr),
+
+ INIT_MEMBER(nb_floor_divide, nullptr),
+ INIT_MEMBER(nb_true_divide, nullptr),
+ INIT_MEMBER(nb_inplace_floor_divide, nullptr),
+ INIT_MEMBER(nb_inplace_true_divide, nullptr),
+
+ INIT_MEMBER(nb_index, nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(nb_matrix_multiply, nullptr),
+ INIT_MEMBER(nb_inplace_matrix_multiply, nullptr),
+#endif
+};
+
+static PyMethodDef TPyThinListMethods[] = {
+ { "__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr },
+ { "to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ },
+ { "reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__ },
+ { "take", TPyThinList::Take, METH_O, take__doc__ },
+ { "skip", TPyThinList::Skip, METH_O, skip__doc__ },
+ { "has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__ },
+ { "has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__ },
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyThinListType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TList"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyThinList)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyThinList::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyThinList::Repr),
+ INIT_MEMBER(tp_as_number , &ThinListNumbering),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , &ThinListMapping),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TList object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , TPyThinList::Iter),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , TPyThinListMethods),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinListIterator interface
+//////////////////////////////////////////////////////////////////////////////
+struct TPyThinListIterator
+{
+ PyObject_HEAD;
+ TPyThinList::TPtr List;
+ const NUdf::TUnboxedValue* Elements;
+ Py_ssize_t Count;
+
+ inline static TPyThinListIterator* Cast(PyObject* o) {
+ return reinterpret_cast<TPyThinListIterator*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TListIterator>").Release();
+ }
+
+ static PyObject* New(TPyThinList* list);
+ static PyObject* Next(PyObject* self);
+};
+
+PyTypeObject PyThinListIteratorType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TListIterator"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyThinListIterator)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyThinListIterator::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyThinListIterator::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.ListIterator object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyThinListIterator::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinList implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyThinList::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ NUdf::IBoxedValuePtr value,
+ const NUdf::TUnboxedValue* elements,
+ Py_ssize_t length,
+ Py_ssize_t step)
+{
+ TPyThinList* list = new TPyThinList;
+ PyObject_INIT(list, &PyThinListType);
+
+ list->CastCtx = castCtx;
+ list->ItemType = itemType;
+ list->Value.Set(castCtx->PyCtx, value);
+ list->Elements = elements;
+ list->Length = length;
+ list->Step = step;
+
+ return reinterpret_cast<PyObject*>(list);
+}
+
+PyObject* TPyThinList::Repr(PyObject*)
+{
+ return PyRepr("<yql.TList>").Release();
+}
+
+PyObject* TPyThinList::Iter(PyObject* self)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ return TPyThinListIterator::New(list);
+ } PY_CATCH(nullptr)
+}
+
+Py_ssize_t TPyThinList::Len(PyObject* self)
+{
+ return Cast(self)->Length;
+}
+
+PyObject* TPyThinList::Subscript(PyObject* self, PyObject* slice)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const auto vb = list->CastCtx->ValueBuilder;
+
+ if (PyIndex_Check(slice)) {
+ Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError);
+
+ if (index < 0) {
+ index += list->Length;
+ }
+
+ if (index < 0 || index >= list->Length) {
+ const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length);
+ PyErr_SetObject(PyExc_IndexError, error.Get());
+ return nullptr;
+ }
+
+ if (list->Step > 0) {
+ index *= list->Step;
+ } else {
+ index = list->Length - ++index;
+ index *= -list->Step;
+ }
+
+ return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release();
+ }
+
+ if (PySlice_Check(slice)) {
+ Py_ssize_t start, stop, step, size;
+
+ if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) {
+ return nullptr;
+ }
+
+ if (!step) {
+ PyErr_SetString(PyExc_ValueError, "slice step cannot be zero");
+ return nullptr;
+ }
+
+ if (size > 0) {
+ const Py_ssize_t skip = list->Step * (list->Step > 0 ?
+ (step > 0 ? start : start + step * (size - 1)):
+ (step > 0 ? stop : start + 1) - list->Length);
+
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step);
+ } else {
+ return New(list->CastCtx, list->ItemType, list->Value.Get());
+ }
+ }
+
+ const TPyObjectPtr type = PyObject_Type(slice);
+ const TPyObjectPtr repr = PyObject_Repr(type.Get());
+ const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get());
+ PyErr_SetObject(PyExc_TypeError, error.Get());
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+#undef SLICEOBJ
+
+PyObject* TPyThinList::ToIndexDict(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get()));
+ return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release();
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::Reversed(PyObject* self, PyObject* /* arg */)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step);
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::Take(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const Py_ssize_t count = CastIndex(arg, "take");
+ if (count < 0) {
+ return nullptr;
+ }
+
+ if (const auto size = std::min(count, list->Length)) {
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step);
+ } else {
+ return New(list->CastCtx, list->ItemType, list->Value.Get());
+ }
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::Skip(PyObject* self, PyObject* arg)
+{
+ PY_TRY {
+ TPyThinList* list = Cast(self);
+ const Py_ssize_t count = CastIndex(arg, "skip");
+ if (count < 0) {
+ return nullptr;
+ }
+
+ if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) {
+ return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step);
+ } else {
+ return New(list->CastCtx, list->ItemType);
+ }
+ } PY_CATCH(nullptr)
+}
+
+PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */)
+{
+ Py_RETURN_TRUE;
+}
+
+PyObject* TPyThinList::HasItems(PyObject* self, PyObject* /* arg */)
+{
+ if (Cast(self)->Length > 0)
+ Py_RETURN_TRUE;
+ else
+ Py_RETURN_FALSE;
+}
+
+int TPyThinList::Bool(PyObject* self)
+{
+ return Cast(self)->Length > 0 ? 1 : 0;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyThinListIterator implementation
+//////////////////////////////////////////////////////////////////////////////
+PyObject* TPyThinListIterator::New(TPyThinList* list)
+{
+ TPyThinListIterator* listIter = new TPyThinListIterator;
+ PyObject_INIT(listIter, &PyThinListIteratorType);
+ listIter->List.Reset(list);
+ listIter->Elements = list->Step > 0 ? list->Elements - list->Step : list->Elements - list->Length * list->Step;
+ listIter->Count = list->Length;
+ return reinterpret_cast<PyObject*>(listIter);
+}
+
+PyObject* TPyThinListIterator::Next(PyObject* self)
+{
+ PY_TRY {
+ TPyThinListIterator* iter = Cast(self);
+
+ if (iter->Count) {
+ --iter->Count;
+ TPyThinList* list = iter->List.Get();
+ return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release();
+ }
+
+ return nullptr;
+ } PY_CATCH(nullptr)
+}
+
+TPyObjectPtr ToPyLazyList(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* itemType,
+ const NUdf::TUnboxedValuePod& value)
+{
+ if (const auto elements = value.GetElements()) {
+ return TPyThinList::New(castCtx, itemType, value.AsBoxed(), elements, value.GetListLength());
+ } else {
+ return TPyLazyList::New(castCtx, itemType, value.AsBoxed());
+ }
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_list.h b/yql/essentials/udfs/common/python/bindings/py_list.h
new file mode 100644
index 0000000000..9db170a795
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_list.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyLazyListIteratorType;
+extern PyTypeObject PyLazyListType;
+extern PyTypeObject PyThinListIteratorType;
+extern PyTypeObject PyThinListType;
+
+TPyObjectPtr ToPyLazyList(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* itemType,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyLazyGenerator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ TPyObjectPtr callableObj);
+
+NKikimr::NUdf::TUnboxedValue FromPyLazyIterable(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ TPyObjectPtr iterableObj);
+
+NKikimr::NUdf::TUnboxedValue FromPyLazyIterator(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ TPyObjectPtr iteratorObj);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp
new file mode 100644
index 0000000000..f16165fc54
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp
@@ -0,0 +1,1025 @@
+#include "ut3/py_test_engine.h"
+
+#include <yql/essentials/public/udf/udf_ut_helpers.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyListTest) {
+ Y_UNIT_TEST(FromPyEmptyList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test(): return []",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 0);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test(): return [1, 2, 3, 4]",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 4);
+ const auto it = value.GetListIterator();
+ NUdf::TUnboxedValue item;
+
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1);
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2);
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3);
+ UNIT_ASSERT(it.Next(item));
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4);
+ UNIT_ASSERT(false == it.Next(item));
+ });
+ }
+
+ Y_UNIT_TEST(ToPyEmptyList) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<char*>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewEmptyList();
+ },
+ "def Test(value):\n"
+ " assert value.has_fast_len()\n"
+ " assert len(value) == 0\n");
+ }
+
+ Y_UNIT_TEST(ToPyList) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(value):\n"
+ " assert value.has_fast_len()\n"
+ " assert len(value) == 3\n"
+ " assert all(isinstance(v, float) for v in value)\n"
+ " assert list(value) == [0.1, 0.2, 0.3]\n");
+ }
+
+ Y_UNIT_TEST(FromPyTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test(): return (1, 2, 3)",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 3);
+
+ ui32 expected = 1;
+ auto it = value.GetListIterator();
+ for (NUdf::TUnboxedValue item; it.Next(item);) {
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+ });
+ }
+
+ Y_UNIT_TEST(ThinListIteration) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(value):\n"
+ " assert '__iter__' in dir(value)\n"
+ " it = iter(value)\n"
+ " assert next(it) == 0.1\n"
+ " assert next(it) == 0.2\n"
+ " assert next(it) == 0.3\n"
+ " try:\n"
+ " next(it)\n"
+ " except StopIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListReversed) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__reversed__' in dir(v)\n"
+ " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListReversed) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " assert '__reversed__' in dir(v)\n"
+ " it = iter(reversed(v))\n"
+ " assert next(it) == 2\n"
+ " assert next(it) == 1\n"
+ " assert next(it) == 0\n"
+ " try:\n"
+ " next(it)\n"
+ " except StopIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListIteration) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(value):\n"
+ " assert '__iter__' in dir(value)\n"
+ " it = iter(value)\n"
+ " assert next(it) == 0\n"
+ " assert next(it) == 1\n"
+ " assert next(it) == 2\n"
+ " try:\n"
+ " next(it)\n"
+ " except StopIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListInvalidIndexType) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[{}])\n"
+ " except TypeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListInvalidIndexType) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[{}])\n"
+ " except TypeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListZeroSliceStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[::0])\n"
+ " except ValueError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListZeroSliceStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<double>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0.1),
+ NUdf::TUnboxedValuePod(0.2),
+ NUdf::TUnboxedValuePod(0.3)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[::0])\n"
+ " except ValueError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListSlice) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__len__' in dir(v)\n"
+ " assert list(v[::1]) == e[::1]\n"
+ " assert list(v[::-1]) == e[::-1]\n"
+ " assert list(v[1::1]) == e[1::1]\n"
+ " assert list(v[2::1]) == e[2::1]\n"
+ " assert list(v[3::1]) == e[3::1]\n"
+ " assert list(v[:-1:1]) == e[:-1:1]\n"
+ " assert list(v[:-2:1]) == e[:-2:1]\n"
+ " assert list(v[:-3:1]) == e[:-3:1]\n"
+ " assert list(v[1::-1]) == e[1::-1]\n"
+ " assert list(v[2::-1]) == e[2::-1]\n"
+ " assert list(v[3::-1]) == e[3::-1]\n"
+ " assert list(v[:-1:-1]) == e[:-1:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[:-3:-1]) == e[:-3:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[-12:-1:1]) == e[-12:-1:1]\n"
+ " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n"
+ " assert list(v[-5:-3:1]) == e[-5:-3:1]\n"
+ " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n"
+ " assert list(v[:7:1]) == e[:7:1]\n"
+ " assert list(v[-1:4]) == e[-1:4]\n"
+ " assert list(v[5:11]) == e[5:11]\n"
+ " assert list(v[4:1]) == e[4:1]\n"
+ " assert list(v[5:-2]) == e[5:-2]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListSliceOverReversed) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(x):\n"
+ " e = list(reversed(range(0, 10)))\n"
+ " v = reversed(x)\n"
+ " assert list(v[::1]) == e[::1]\n"
+ " assert list(v[::-1]) == e[::-1]\n"
+ " assert list(v[1::1]) == e[1::1]\n"
+ " assert list(v[2::1]) == e[2::1]\n"
+ " assert list(v[3::1]) == e[3::1]\n"
+ " assert list(v[:-1:1]) == e[:-1:1]\n"
+ " assert list(v[:-2:1]) == e[:-2:1]\n"
+ " assert list(v[:-3:1]) == e[:-3:1]\n"
+ " assert list(v[1::-1]) == e[1::-1]\n"
+ " assert list(v[2::-1]) == e[2::-1]\n"
+ " assert list(v[3::-1]) == e[3::-1]\n"
+ " assert list(v[:-1:-1]) == e[:-1:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[:-3:-1]) == e[:-3:-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[-12:-1:1]) == e[-12:-1:1]\n"
+ " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n"
+ " assert list(v[-5:-3:1]) == e[-5:-3:1]\n"
+ " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n"
+ " assert list(v[:7:1]) == e[:7:1]\n"
+ " assert list(v[-1:4]) == e[-1:4]\n"
+ " assert list(v[5:11]) == e[5:11]\n"
+ " assert list(v[4:1]) == e[4:1]\n"
+ " assert list(v[5:-2]) == e[5:-2]\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListSlice) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__len__' in dir(v)\n"
+ " assert len(v) == len(e)\n"
+ " assert list(v[::1]) == e[::1]\n"
+ " assert list(v[::-1]) == e[::-1]\n"
+ " assert list(v[3:]) == e[3:]\n"
+ " assert list(v[-2:]) == e[-2:]\n"
+ " assert list(v[2::-1]) == e[2::-1]\n"
+ " assert list(v[:-2:-1]) == e[:-2:-1]\n"
+ " assert list(v[-12:-1:1]) == e[-12:-1:1]\n"
+ " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n"
+ " assert list(v[-5:-3:1]) == e[-5:-3:1]\n"
+ " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n"
+ " assert list(v[:7:1]) == e[:7:1]\n"
+ " assert list(v[-1:4]) == e[-1:4]\n"
+ " assert list(v[5:11]) == e[5:11]\n"
+ " assert list(v[4:1]) == e[4:1]\n"
+ " assert list(v[5:-2]) == e[5:-2]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListIterateSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 20U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U),
+ NUdf::TUnboxedValuePod(10U),
+ NUdf::TUnboxedValuePod(11U),
+ NUdf::TUnboxedValuePod(12U),
+ NUdf::TUnboxedValuePod(13U),
+ NUdf::TUnboxedValuePod(14U),
+ NUdf::TUnboxedValuePod(15U),
+ NUdf::TUnboxedValuePod(16U),
+ NUdf::TUnboxedValuePod(17U),
+ NUdf::TUnboxedValuePod(18U),
+ NUdf::TUnboxedValuePod(19U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListIterateSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 20U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U),
+ NUdf::TUnboxedValuePod(10U),
+ NUdf::TUnboxedValuePod(11U),
+ NUdf::TUnboxedValuePod(12U),
+ NUdf::TUnboxedValuePod(13U),
+ NUdf::TUnboxedValuePod(14U),
+ NUdf::TUnboxedValuePod(15U),
+ NUdf::TUnboxedValuePod(16U),
+ NUdf::TUnboxedValuePod(17U),
+ NUdf::TUnboxedValuePod(18U),
+ NUdf::TUnboxedValuePod(19U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert v[::2][3] == e[::2][3]\n"
+ " assert v[::2][5] == e[::2][5]\n"
+ " assert v[::2][-3] == e[::2][-3]\n"
+ " assert v[::2][-7] == e[::2][-7]\n"
+ " assert v[2::2][4] == e[2::2][4]\n"
+ " assert v[2::2][5] == e[2::2][5]\n"
+ " assert v[2::2][-7] == e[2::2][-7]\n"
+ " assert v[2::2][-2] == e[2::2][-2]\n"
+ " assert v[:-3:2][2] == e[:-3:2][2]\n"
+ " assert v[:-3:2][4] == e[:-3:2][4]\n"
+ " assert v[:-3:2][-1] == e[:-3:2][-1]\n"
+ " assert v[:-3:2][-2] == e[:-3:2][-2]\n"
+ " assert v[:-4:3][2] == e[:-4:3][2]\n"
+ " assert v[:-4:3][4] == e[:-4:3][4]\n"
+ " assert v[:-4:3][-3] == e[:-4:3][-3]\n"
+ " assert v[:-4:3][-2] == e[:-4:3][-2]\n"
+ " assert v[-6::-3][1] == e[-6::-3][1]\n"
+ " assert v[-6::-3][3] == e[-6::-3][3]\n"
+ " assert v[-6::-3][-4] == e[-6::-3][-4]\n"
+ " assert v[-6::-3][-1] == e[-6::-3][-1]\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 20))\n"
+ " assert v[::2][3] == e[::2][3]\n"
+ " assert v[::2][5] == e[::2][5]\n"
+ " assert v[::2][-3] == e[::2][-3]\n"
+ " assert v[::2][-7] == e[::2][-7]\n"
+ " assert v[2::2][4] == e[2::2][4]\n"
+ " assert v[2::2][5] == e[2::2][5]\n"
+ " assert v[2::2][-7] == e[2::2][-7]\n"
+ " assert v[2::2][-2] == e[2::2][-2]\n"
+ " assert v[:-3:2][2] == e[:-3:2][2]\n"
+ " assert v[:-3:2][4] == e[:-3:2][4]\n"
+ " assert v[:-3:2][-1] == e[:-3:2][-1]\n"
+ " assert v[:-3:2][-2] == e[:-3:2][-2]\n"
+ " assert v[:-4:3][2] == e[:-4:3][2]\n"
+ " assert v[:-4:3][4] == e[:-4:3][4]\n"
+ " assert v[:-4:3][-3] == e[:-4:3][-3]\n"
+ " assert v[:-4:3][-2] == e[:-4:3][-2]\n"
+ " assert v[-6::-3][1] == e[-6::-3][1]\n"
+ " assert v[-6::-3][3] == e[-6::-3][3]\n"
+ " assert v[-6::-3][-4] == e[-6::-3][-4]\n"
+ " assert v[-6::-3][-1] == e[-6::-3][-1]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListByIndex) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__getitem__' in dir(v)\n"
+ " assert v[0] == e[0]\n"
+ " assert v[3] == e[3]\n"
+ " assert v[5] == e[5]\n"
+ " assert v[9] == e[9]\n"
+ " assert v[-1] == e[-1]\n"
+ " assert v[-4] == e[-4]\n"
+ " assert v[-9] == e[-9]\n"
+ " assert v[-10] == e[-10]\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListByIndex) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert '__getitem__' in dir(v)\n"
+ " assert v[0] == e[0]\n"
+ " assert v[3] == e[3]\n"
+ " assert v[5] == e[5]\n"
+ " assert v[9] == e[9]\n"
+ " assert v[-1] == e[-1]\n"
+ " assert v[-4] == e[-4]\n"
+ " assert v[-9] == e[-9]\n"
+ " assert v[-10] == e[-10]\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListIndexOutOfBounds) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 3U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[3])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ " try:\n"
+ " print(v[-4])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListIndexOutOfBounds) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3));
+ },
+ "def Test(v):\n"
+ " try:\n"
+ " print(v[3])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ " try:\n"
+ " print(v[-4])\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = range(0, 10)\n"
+ " assert '__len__' in dir(v)\n"
+ " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n"
+ " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n"
+ " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n"
+ " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListTakeSkip) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ std::array<NUdf::TUnboxedValue, 10U> list = {{
+ NUdf::TUnboxedValuePod(0U),
+ NUdf::TUnboxedValuePod(1U),
+ NUdf::TUnboxedValuePod(2U),
+ NUdf::TUnboxedValuePod(3U),
+ NUdf::TUnboxedValuePod(4U),
+ NUdf::TUnboxedValuePod(5U),
+ NUdf::TUnboxedValuePod(6U),
+ NUdf::TUnboxedValuePod(7U),
+ NUdf::TUnboxedValuePod(8U),
+ NUdf::TUnboxedValuePod(9U)
+ }};
+ return vb.NewList(list.data(), list.size());
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert len(v) == len(e)\n"
+ " assert list(v.skip(5)) == e[5:]\n"
+ " assert list(v.take(5)) == e[0:5]\n"
+ " assert list(v.skip(4).take(5)) == e[4:][:5]\n"
+ " try:\n"
+ " print(list(v.skip(-1)))\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListTakeSkip) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10));
+ },
+ "def Test(v):\n"
+ " e = list(range(0, 10))\n"
+ " assert list(v.skip(5)) == e[5:]\n"
+ " assert list(v.take(5)) == e[0:5]\n"
+ " assert list(v.skip(4).take(5)) == e[4:][:5]\n"
+ " try:\n"
+ " print(list(v.skip(-1)))\n"
+ " except IndexError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListToIndexDict) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " assert len(d) == 3\n"
+ " assert d[0] == 3\n"
+ " assert d[1] == 4\n"
+ " assert d[2] == 5\n"
+ " assert 3 not in d");
+ }
+
+ Y_UNIT_TEST(LazyListTrue) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ NUdf::TUnboxedValue *items = nullptr;
+ return vb.NewArray(1U, items);
+ },
+ "def Test(value):\n"
+ " assert value\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListFalse) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0));
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListTrue) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " assert value\n"
+ );
+ }
+
+ Y_UNIT_TEST(ThinListFalse) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type);
+ return vb.NewEmptyList();
+ },
+ "def Test(value):\n"
+ " assert not value\n"
+ );
+ }
+
+ Y_UNIT_TEST(LazyListHasItems) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " b = value.has_items()\n"
+ " assert b\n");
+ }
+
+ Y_UNIT_TEST(LazyListEmptyHasItems) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0));
+ },
+ "def Test(value):\n"
+ " b = value.has_items()\n"
+ " assert not b\n");
+ }
+
+ Y_UNIT_TEST(LazyIndexDictContains) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " assert 0 in d\n"
+ " assert 1 in d\n"
+ " assert 2 in d\n"
+ " assert 3 not in d\n"
+ " assert -1 not in d");
+ }
+
+ Y_UNIT_TEST(LazyIndexDictIter) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " i, j = 0, 3\n"
+ " for k, v in d.items():\n"
+ " assert i == k\n"
+ " assert j == v\n"
+ " i, j = i+1, j+1");
+ }
+
+ Y_UNIT_TEST(LazyIndexDictGet) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TListType<i32>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5));
+ },
+ "def Test(value):\n"
+ " d = value.to_index_dict()\n"
+ " assert d.get(1) == 4\n"
+ " assert d.get(5) == None\n"
+ " assert d.get(5, 10) == 10\n");
+ }
+
+ Y_UNIT_TEST(FromPyGeneratorFactory) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def first_10():\n"
+ " num = 0\n"
+ " while num < 10:\n"
+ " yield num\n"
+ " num += 1\n"
+ "def Test():\n"
+ " return first_10\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasFastListLength());
+ UNIT_ASSERT(value.HasListItems());
+
+ const auto it = value.GetListIterator();
+ ui32 expected = 0;
+ for (NUdf::TUnboxedValue item; it.Next(item);) {
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+
+ UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10);
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 10);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test():\n"
+#if PY_MAJOR_VERSION >= 3
+ " return range(10)\n",
+#else
+ " return xrange(10)\n",
+#endif
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(!value.HasFastListLength());
+ UNIT_ASSERT(value.HasListItems());
+
+ const auto it = value.GetListIterator();
+ ui32 expected = 0U;
+ for (NUdf::TUnboxedValue item; it.Next(item);) {
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++);
+ }
+
+ UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10);
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 10);
+ UNIT_ASSERT(value.HasFastListLength());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyCustomIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "class T:\n"
+ " def __init__(self, l):\n"
+ " self.l = l\n"
+ " def __len__(self):\n"
+ " return len(self.l)\n"
+ " def __nonzero__(self):\n"
+ " return bool(self.l)\n"
+ " def __iter__(self):\n"
+ " return iter(self.l)\n"
+ "\n"
+ "def Test():\n"
+ " return T([1, 2])\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(value.HasListItems());
+ UNIT_ASSERT_EQUAL(value.GetListLength(), 2);
+
+ auto it = value.GetListIterator();
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 1);
+ }
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 2);
+ }
+
+ UNIT_ASSERT(false == it.Skip());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyIterator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test():\n"
+ " return iter(range(2))\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(false == value.HasFastListLength());
+
+ auto it = value.GetListIterator();
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 0);
+ }
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 1);
+ }
+
+ UNIT_ASSERT(false == it.Skip());
+ });
+ }
+
+ Y_UNIT_TEST(FromPyGenerator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TListType<ui32>>(
+ "def Test():\n"
+ " yield 0\n"
+ " yield 1\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT(false == value.HasFastListLength());
+
+ auto it = value.GetListIterator();
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 0);
+ }
+ {
+ NUdf::TUnboxedValue item;
+ it.Next(item);
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, 1);
+ }
+
+ UNIT_ASSERT(false == it.Skip());
+ });
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp
new file mode 100644
index 0000000000..c55e25891d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp
@@ -0,0 +1,359 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#define PY_CHECKER(Name, PyType, AsType, Type) \
+ struct TPy##Name##Checker { \
+ void operator()(PyObject* pyVal, Type expected) { \
+ UNIT_ASSERT(Py##PyType##_Check(pyVal)); \
+ Type val = Py##PyType##_As##AsType(pyVal); \
+ UNIT_ASSERT(val != static_cast<Type>(-1) || !PyErr_Occurred()); \
+ UNIT_ASSERT_EQUAL(val, expected); \
+ } \
+ };
+
+#if PY_MAJOR_VERSION >= 3
+PY_CHECKER(Long, Long, Long, long)
+#else
+PY_CHECKER(Int, Int, Long, long)
+#endif
+
+#ifdef HAVE_LONG_LONG
+PY_CHECKER(LLong, Long, LongLong, long long)
+PY_CHECKER(Ulong, Long, UnsignedLongLong, unsigned long long)
+#else
+PY_CHECKER(LLong, Long, Long, long)
+PY_CHECKER(Ulong, Long, UnsignedLong, unsigned long)
+#endif
+
+PY_CHECKER(Float, Float, Double, long)
+
+#undef PY_CHECKER
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyNumberTest) {
+ template <typename T, typename TPyChecker>
+ void TestCastsInRange(T begin, T end) {
+ for (T i = begin; i < end; i++) {
+ TPyObjectPtr pyVal = PyCast<T>(i);
+ UNIT_ASSERT(pyVal.Get() != nullptr);
+
+ TPyChecker c;
+ c(pyVal.Get(), i);
+
+ T cppVal = PyCast<T>(pyVal.Get());
+ UNIT_ASSERT_EQUAL(cppVal, i);
+ }
+ }
+
+ template <typename T, typename TPyChecker, int range = 10>
+ void TestSignedCasts() {
+ TPythonTestEngine engine;
+ TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range);
+ TestCastsInRange<T, TPyChecker>(-range, range);
+ TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>());
+ }
+
+ template <typename T, typename TPyDownChecker,
+ typename TPyUpChecker = TPyDownChecker, int range = 10>
+ void TestUnsignedCasts() {
+ TPythonTestEngine engine;
+ TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range);
+ TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>());
+ }
+
+ Y_UNIT_TEST(Bool) {
+ TPythonTestEngine engine;
+ UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true);
+ UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false);
+
+ TPyObjectPtr list = PyList_New(0);
+ UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false);
+ bool res1;
+ UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1));
+ UNIT_ASSERT_EQUAL(res1, false);
+
+ PyList_Append(list.Get(), Py_None);
+ UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true);
+ bool res2;
+ UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2));
+ UNIT_ASSERT_EQUAL(res2, true);
+ }
+
+ Y_UNIT_TEST(Float) {
+ TestSignedCasts<float, TPyFloatChecker>();
+ }
+
+ Y_UNIT_TEST(Double) {
+ TestUnsignedCasts<double, TPyFloatChecker>();
+ }
+
+ Y_UNIT_TEST(I64) {
+ TestSignedCasts<i64, TPyLLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui64) {
+ TestUnsignedCasts<ui64, TPyUlongChecker>();
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ Y_UNIT_TEST(I8) {
+ TestSignedCasts<i8, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui8) {
+ TestUnsignedCasts<ui8, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(I16) {
+ TestSignedCasts<i16, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui16) {
+ TestUnsignedCasts<ui16, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(I32) {
+ TestSignedCasts<i32, TPyLongChecker>();
+ }
+
+ Y_UNIT_TEST(Ui32) {
+ TestUnsignedCasts<ui32, TPyLongChecker>();
+ }
+ Y_UNIT_TEST(ImplicitIntCasts) {
+ TPythonTestEngine engine;
+ const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>();
+ i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10);
+ TPyObjectPtr pyInt = PyLong_FromLong(expected);
+
+ { // signed
+ i64 actual = PyCast<i64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, expected);
+
+ bool isOk = TryPyCast<i64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, expected);
+ }
+
+ { // unsigned
+ ui64 actual = PyCast<ui64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+
+ bool isOk = TryPyCast<ui64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+ }
+
+ { // to float
+ float f = PyCast<float>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyInt.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyInt.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+
+ // expected overflow
+ i32 tmp;
+ UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp));
+ ui32 tmpu;
+ UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu));
+ }
+
+#else
+ Y_UNIT_TEST(I8) {
+ TestSignedCasts<i8, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(Ui8) {
+ TestUnsignedCasts<ui8, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(I16) {
+ TestSignedCasts<i16, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(Ui16) {
+ TestUnsignedCasts<ui16, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(I32) {
+ TestSignedCasts<i32, TPyIntChecker>();
+ }
+
+ Y_UNIT_TEST(Ui32) {
+ if (sizeof(long) == 4) {
+ TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>();
+ } else {
+ TestUnsignedCasts<ui32, TPyIntChecker>();
+ }
+ }
+
+ Y_UNIT_TEST(ImplicitIntCasts) {
+ TPythonTestEngine engine;
+ const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>();
+ i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10);
+ TPyObjectPtr pyInt = PyInt_FromLong(expected);
+
+ { // signed
+ i64 actual = PyCast<i64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, expected);
+
+ bool isOk = TryPyCast<i64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, expected);
+ }
+
+ { // unsigned
+ ui64 actual = PyCast<ui64>(pyInt.Get());
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+
+ bool isOk = TryPyCast<ui64>(pyInt.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+ }
+
+ { // to float
+ float f = PyCast<float>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyInt.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyInt.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyInt.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+
+ // expected overflow
+ i32 tmp;
+ UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp));
+ ui32 tmpu;
+ UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu));
+ }
+#endif
+
+
+ Y_UNIT_TEST(ImplicitLongCasts) {
+ TPythonTestEngine engine;
+ i64 expected = static_cast<i64>(Max<ui32>()) + 10;
+ TPyObjectPtr pyLong;
+ #ifdef HAVE_LONG_LONG
+ pyLong = PyLong_FromLongLong(expected);
+ #else
+ pyLong = PyLong_FromLong(expected)
+ #endif
+
+ { // signed
+ i64 actual = PyCast<i64>(pyLong.Get());
+ UNIT_ASSERT_EQUAL(actual, expected);
+
+ bool isOk = TryPyCast<i64>(pyLong.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, expected);
+ }
+
+ { // unsigned
+ ui64 actual = PyCast<ui64>(pyLong.Get());
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+
+ bool isOk = TryPyCast<ui64>(pyLong.Get(), actual);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected));
+ }
+
+ { // to float
+ float f = PyCast<float>(pyLong.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyLong.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyLong.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyLong.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+
+ // expected overflow
+ i8 tmp;
+ UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp));
+ }
+
+ Y_UNIT_TEST(HugeLongOverflow) {
+ TPythonTestEngine engine;
+ TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0);
+ TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()");
+ UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number
+
+ ui64 resUI64;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64));
+
+ i64 resI64;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64));
+
+ ui32 resUI32;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32));
+
+ i32 resI32;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32));
+
+ ui16 resUI16;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16));
+
+ i16 resI16;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16));
+
+ ui8 resUI8;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8));
+
+ i8 resI8;
+ UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8));
+ }
+
+ Y_UNIT_TEST(ImplicitFloatCasts) {
+ TPythonTestEngine engine;
+ double expected = 3.14159;
+ TPyObjectPtr pyFloat = PyFloat_FromDouble(expected);
+
+ { // to float
+ float f = PyCast<float>(pyFloat.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+
+ bool isOk = TryPyCast<float>(pyFloat.Get(), f);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001);
+ }
+
+ { // to double
+ double d = PyCast<double>(pyFloat.Get());
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+
+ bool isOk = TryPyCast<double>(pyFloat.Get(), d);
+ UNIT_ASSERT(isOk);
+ UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001);
+ }
+ }
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp
new file mode 100644
index 0000000000..d13ea65da6
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp
@@ -0,0 +1,56 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(FromPyNone) {
+ Y_UNIT_TEST(FromPyNone) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TOptional<ui32>>(
+ "def Test(): return None",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(!value);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyObject) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TOptional<ui32>>(
+ "def Test(): return 42",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42);
+ });
+ }
+
+ Y_UNIT_TEST(ToPyNone) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TOptional<char*>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod();
+ },
+ "def Test(value):\n"
+ " assert value == None\n");
+ }
+
+ Y_UNIT_TEST(ToPyFilledOptional) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ const TOptionalType* optType =
+ static_cast<const TOptionalType*>(type);
+ NUdf::TUnboxedValue* items = nullptr;
+ auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items);
+ items[0] = vb.NewString("test string");
+ items[1] = NUdf::TUnboxedValuePod(false);
+ return NUdf::TUnboxedValue(tuple);
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert len(value) == 2\n"
+ " assert value == ('test string', False)\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_ptr.h b/yql/essentials/udfs/common/python/bindings/py_ptr.h
new file mode 100644
index 0000000000..704629b86b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_ptr.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <Python.h> // PyObject
+
+#include <yql/essentials/public/udf/udf_ptr.h>
+
+namespace NPython {
+
+template <typename T>
+class TPyPtrOps
+{
+public:
+ static inline void Ref(T* t) {
+ Y_ASSERT(t);
+ Py_INCREF(t);
+ }
+
+ static inline void UnRef(T* t) {
+ Y_ASSERT(t);
+ Py_DECREF(t);
+ }
+
+ static inline ui32 RefCount(const T* t) {
+ Y_ASSERT(t);
+ return t->ob_refcnt;
+ }
+};
+
+class TPyObjectPtr:
+ public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>
+{
+ using TSelf = NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>;
+
+public:
+ inline TPyObjectPtr()
+ {
+ }
+
+ inline TPyObjectPtr(PyObject* p)
+ : TSelf(p, STEAL_REF) // do not increment refcounter by default
+ {
+ }
+
+ inline TPyObjectPtr(PyObject* p, AddRef)
+ : TSelf(p)
+ {
+ }
+
+ inline void ResetSteal(PyObject* p) {
+ TSelf::Reset(p, STEAL_REF);
+ }
+
+ inline void ResetAddRef(PyObject* p) {
+ TSelf::Reset(p);
+ }
+
+ inline void Reset() {
+ TSelf::Reset();
+ }
+
+ template <class T>
+ inline T* GetAs() const {
+ return reinterpret_cast<T*>(Get());
+ }
+
+ void Reset(PyObject* p) = delete;
+};
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.cpp b/yql/essentials/udfs/common/python/bindings/py_resource.cpp
new file mode 100644
index 0000000000..ebb096029a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_resource.cpp
@@ -0,0 +1,116 @@
+#include "py_resource.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+void DestroyResourceCapsule(PyObject* obj) {
+ if (auto* ptr = PyCapsule_GetPointer(obj, ResourceCapsuleName)) {
+ delete reinterpret_cast<NUdf::TUnboxedValue*>(ptr);
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// TResource
+/////////////////////////////////////////////////////////////////////////////
+class TResource final: public NUdf::TBoxedValue
+{
+public:
+ TResource(PyObject* value, const NUdf::TStringRef& tag)
+ : Value_(value, TPyObjectPtr::ADD_REF), Tag_(tag)
+ {
+ }
+
+ ~TResource() {
+ TPyGilLocker lock;
+ Value_.Reset();
+ }
+
+private:
+ NUdf::TStringRef GetResourceTag() const override {
+ return Tag_;
+ }
+
+ void* GetResource() final {
+ return Value_.Get();
+ }
+
+ TPyObjectPtr Value_;
+ const NUdf::TStringRef Tag_;
+};
+
+} // namespace
+
+const char ResourceCapsuleName[] = "YqlResourceCapsule";
+
+TPyObjectPtr ToPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+// TODO NILE-43
+#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15)
+ NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type);
+ auto tag = inpector.GetTag();
+ if (tag == ctx->PyCtx->ResourceTag) {
+ PyObject* p = reinterpret_cast<PyObject*>(value.GetResource());
+ return TPyObjectPtr(p, TPyObjectPtr::ADD_REF);
+ }
+#else
+ Y_UNUSED(type);
+ if (value.GetResourceTag() == ctx->PyCtx->ResourceTag) {
+ PyObject* p = reinterpret_cast<PyObject*>(value.GetResource());
+ return TPyObjectPtr(p, TPyObjectPtr::ADD_REF);
+ }
+#endif
+ auto resource = MakeHolder<NUdf::TUnboxedValue>(value);
+
+ return PyCapsule_New(resource.Release(), ResourceCapsuleName, &DestroyResourceCapsule);
+}
+
+NUdf::TUnboxedValue FromPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+// TODO NILE-43
+#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15)
+ NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type);
+ auto tag = inpector.GetTag();
+ if (tag == ctx->PyCtx->ResourceTag) {
+ return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag));
+ }
+
+ if (PyCapsule_IsValid(value, ResourceCapsuleName)) {
+ auto* resource = reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName));
+ auto valueTag = resource->GetResourceTag();
+ if (valueTag != tag) {
+ throw yexception() << "Mismatch of resource tag, expected: "
+ << tag << ", got: " << valueTag;
+ }
+
+ return *resource;
+ }
+
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is not a valid resource with tag " << tag;
+#else
+ Y_UNUSED(type);
+ if (PyCapsule_CheckExact(value)) {
+ if (!PyCapsule_IsValid(value, ResourceCapsuleName)) {
+ throw yexception() << "Python object " << PyObjectRepr(value) << " is not a valid resource capsule";
+ }
+ return *reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName));
+ }
+ return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag));
+#endif
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.h b/yql/essentials/udfs/common/python/bindings/py_resource.h
new file mode 100644
index 0000000000..b46b84c84b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_resource.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern const char ResourceCapsuleName[];
+
+TPyObjectPtr ToPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyResource(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp
new file mode 100644
index 0000000000..aaa9899c4f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp
@@ -0,0 +1,81 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+extern const char SimpleDataTag[] = "SimpleData";
+extern const char PythonTestTag[] = PYTHON_TEST_TAG;
+
+struct TSimpleData {
+ TString Name;
+ ui32 Age;
+
+ TSimpleData(const TString& name, ui32 age)
+ : Name(name)
+ , Age(age)
+ {}
+};
+
+using TSimpleDataResource = NUdf::TBoxedResource<TSimpleData, SimpleDataTag>;
+
+Y_UNIT_TEST_SUITE(TPyResourceTest) {
+ Y_UNIT_TEST(MkqlObject) {
+ TPythonTestEngine engine;
+ TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99));
+ },
+ "import yql\n"
+ "\n"
+ "def Test(value):\n"
+ " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n"
+ " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n"
+ " assert type(value).__name__ == 'PyCapsule'\n"
+ " return value\n");
+ UNIT_ASSERT(!!pyValue);
+
+ engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>(
+ pyValue.Get(),
+ "import yql\n"
+ "\n"
+ "def Test(value):\n"
+ " return value\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);;
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag);
+ auto simpleData =
+ reinterpret_cast<TSimpleData*>(value.GetResource());
+ UNIT_ASSERT_EQUAL(simpleData->Age, 99);
+ UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel");
+ });
+ }
+
+ Y_UNIT_TEST(PythonObject) {
+ TPythonTestEngine engine;
+ NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>(
+ "class CustomStruct:\n"
+ " def __init__(self, name, age):\n"
+ " self.name = name\n"
+ " self.age = age\n"
+ "\n"
+ "def Test():\n"
+ " return CustomStruct('Jamel', 97)\n");
+ UNIT_ASSERT(mkqlValue);
+ UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag);
+
+ TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>(
+ [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return mkqlValue;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, CustomStruct)\n"
+ " assert value.age, 97\n"
+ " assert value.name, 'Jamel'\n");
+ UNIT_ASSERT(!!pyValue);
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.cpp b/yql/essentials/udfs/common/python/bindings/py_stream.cpp
new file mode 100644
index 0000000000..3d9aecdc00
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_stream.cpp
@@ -0,0 +1,343 @@
+#include "py_stream.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+// will be initialized in InitYqlModule()
+PyObject* PyYieldIterationException = nullptr;
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyStream
+//////////////////////////////////////////////////////////////////////////////
+struct TPyStream {
+ PyObject_HEAD;
+ TPyCastContext::TPtr CastCtx;
+ TPyCleanupListItem<NUdf::IBoxedValuePtr> Value;
+ const NUdf::TType* ItemType;
+
+ inline static TPyStream* Cast(PyObject* o) {
+ return reinterpret_cast<TPyStream*>(o);
+ }
+
+ inline static void Dealloc(PyObject* self) {
+ delete Cast(self);
+ }
+
+ inline static PyObject* Repr(PyObject* self) {
+ Y_UNUSED(self);
+ return PyRepr("<yql.TStream>").Release();
+ }
+
+ static PyObject* New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ NUdf::IBoxedValuePtr value);
+
+ static PyObject* Next(PyObject* self);
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define Py_TPFLAGS_HAVE_ITER 0
+#endif
+
+PyTypeObject PyStreamType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.TStream"),
+ INIT_MEMBER(tp_basicsize , sizeof(TPyStream)),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , TPyStream::Dealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , TPyStream::Repr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER),
+ INIT_MEMBER(tp_doc , "yql.TStream object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , PyObject_SelfIter),
+ INIT_MEMBER(tp_iternext , TPyStream::Next),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+PyObject* TPyStream::New(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ NUdf::IBoxedValuePtr value)
+{
+ TPyStream* stream = new TPyStream;
+ PyObject_INIT(stream, &PyStreamType);
+ stream->CastCtx = castCtx;
+ stream->Value.Set(castCtx->PyCtx, value);
+
+ const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type);
+ stream->ItemType = inspector.GetItemType();
+
+ return reinterpret_cast<PyObject*>(stream);
+}
+
+PyObject* TPyStream::Next(PyObject* self) {
+ PY_TRY {
+ TPyStream* stream = Cast(self);
+
+ NUdf::TUnboxedValue item;
+ auto status = NUdf::TBoxedValueAccessor::Fetch(*stream->Value.Get(), item);
+
+ switch (status) {
+ case NUdf::EFetchStatus::Ok:
+ return ToPyObject(stream->CastCtx, stream->ItemType, item)
+ .Release();
+ case NUdf::EFetchStatus::Finish:
+ return nullptr;
+ case NUdf::EFetchStatus::Yield:
+ PyErr_SetNone(PyYieldIterationException);
+ return nullptr;
+ default:
+ Y_ABORT("Unknown stream status");
+ }
+ } PY_CATCH(nullptr)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TStreamOverPyIter
+//////////////////////////////////////////////////////////////////////////////
+class TStreamOverPyIter final: public NUdf::TBoxedValue {
+public:
+ TStreamOverPyIter(
+ TPyCastContext::TPtr castCtx,
+ const NUdf::TType* itemType,
+ TPyObjectPtr pyIter,
+ TPyObjectPtr pyIterable,
+ TPyObjectPtr pyGeneratorCallable,
+ TPyObjectPtr pyGeneratorCallableClosure,
+ TPyObjectPtr pyGeneratorCallableArgs)
+ : CastCtx_(std::move(castCtx))
+ , ItemType_(itemType)
+ , PyIter_(std::move(pyIter))
+ , PyIterable_(std::move(pyIterable))
+ , PyGeneratorCallable_(std::move(pyGeneratorCallable))
+ , PyGeneratorCallableClosure_(std::move(pyGeneratorCallableClosure))
+ , PyGeneratorCallableArgs_(std::move(pyGeneratorCallableArgs))
+ {
+ }
+
+ ~TStreamOverPyIter() {
+ TPyGilLocker lock;
+ PyIter_.Reset();
+ PyIterable_.Reset();
+ PyGeneratorCallableArgs_.Reset();
+ PyGeneratorCallableClosure_.Reset();
+ PyGeneratorCallable_.Reset();
+ }
+
+private:
+ NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override {
+ try {
+ TPyGilLocker lock;
+ TPyObjectPtr next(PyIter_Next(PyIter_.Get()));
+ if (next) {
+ if (PyErr_GivenExceptionMatches(next.Get(), PyYieldIterationException)) {
+ return NUdf::EFetchStatus::Yield;
+ }
+
+ result = FromPyObject(CastCtx_, ItemType_, next.Get());
+ return NUdf::EFetchStatus::Ok;
+ }
+
+ if (PyObject* ex = PyErr_Occurred()) {
+ if (PyErr_GivenExceptionMatches(ex, PyYieldIterationException)) {
+ PyErr_Clear();
+ TPyObjectPtr iterable;
+ TPyObjectPtr iter;
+ if (PyIterable_) {
+ PyIter_.Reset();
+ iterable = PyIterable_;
+ } else if (PyGeneratorCallable_) {
+ PyIter_.Reset();
+ TPyObjectPtr result(PyObject_CallObject(PyGeneratorCallable_.Get(), PyGeneratorCallableArgs_.Get()));
+ if (!result) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+
+ if (PyGen_Check(result.Get())) {
+ iterable = std::move(result);
+ } else if (PyIter_Check(result.Get())) {
+ iter = std::move(result);
+ } else {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected iterator or generator, but got " << PyObjectRepr(result.Get())).data());
+ }
+ } else {
+ return NUdf::EFetchStatus::Yield;
+ }
+
+ if (!iter) {
+ iter.ResetSteal(PyObject_GetIter(iterable.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ }
+
+ PyIter_.ResetAddRef(iter.Get());
+ return NUdf::EFetchStatus::Yield;
+ }
+
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ return NUdf::EFetchStatus::Finish;
+ }
+ catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data());
+ }
+ }
+
+private:
+ TPyCastContext::TPtr CastCtx_;
+ const NUdf::TType* ItemType_;
+ TPyObjectPtr PyIter_;
+ TPyObjectPtr PyIterable_;
+ TPyObjectPtr PyGeneratorCallable_;
+ TPyObjectPtr PyGeneratorCallableClosure_;
+ TPyObjectPtr PyGeneratorCallableArgs_;
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// public functions
+//////////////////////////////////////////////////////////////////////////////
+TPyObjectPtr ToPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value)
+{
+ return TPyStream::New(castCtx, type, value.AsBoxed());
+}
+
+NKikimr::NUdf::TUnboxedValue FromPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const TPyObjectPtr& value,
+ const TPyObjectPtr& originalCallable,
+ const TPyObjectPtr& originalCallableClosure,
+ const TPyObjectPtr& originalCallableArgs
+)
+{
+ const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type);
+ const NUdf::TType* itemType = inspector.GetItemType();
+
+ if (PyGen_Check(value.Get())) {
+ TPyObjectPtr iter(PyObject_GetIter(value.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr,
+ originalCallable, originalCallableClosure, originalCallableArgs));
+ }
+
+ if (PyIter_Check(value.Get())
+#if PY_MAJOR_VERSION < 3
+ // python 2 iterators must also implement "next" method
+ && 1 == PyObject_HasAttrString(value.Get(), "next")
+#endif
+ ) {
+ TPyObjectPtr iter(value.Get(), TPyObjectPtr::ADD_REF);
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr,
+ originalCallable, originalCallableClosure, originalCallableArgs));
+ }
+
+ // assume that this function will returns generator
+ if (PyCallable_Check(value.Get())) {
+ TPyObjectPtr generator(PyObject_CallObject(value.Get(), nullptr));
+ if (!generator || !PyGen_Check(generator.Get())) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected generator as a result of function call").data());
+ }
+ TPyObjectPtr iter(PyObject_GetIter(generator.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+
+ TPyObjectPtr callableClosure;
+ if (PyFunction_Check(value.Get())) {
+ PyObject* closure = PyFunction_GetClosure(value.Get());
+ if (closure) {
+ callableClosure = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF);
+ }
+ }
+
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr,
+ originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr));
+ }
+
+ // must be after checking for callable
+ if (PySequence_Check(value.Get()) || PyObject_HasAttrString(value.Get(), "__iter__")) {
+ TPyObjectPtr iter(PyObject_GetIter(value.Get()));
+ if (!iter) {
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data());
+ }
+ return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), originalCallable ? value : nullptr, nullptr, nullptr, nullptr));
+ }
+
+ UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected iterator, generator, generator factory, "
+ "or iterable object, but got " << PyObjectRepr(value.Get())).data());
+}
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.h b/yql/essentials/udfs/common/python/bindings/py_stream.h
new file mode 100644
index 0000000000..f677e23930
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_stream.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyStreamType;
+extern PyObject* PyYieldIterationException;
+
+TPyObjectPtr ToPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyStream(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const TPyObjectPtr& value,
+ const TPyObjectPtr& originalCallable,
+ const TPyObjectPtr& originalCallableClosure,
+ const TPyObjectPtr& originalCallableArgs);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp
new file mode 100644
index 0000000000..4a24dd1a13
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp
@@ -0,0 +1,208 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyStreamTest) {
+ void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+
+ NUdf::TUnboxedValue item;
+ ui32 expected = 0;
+ NUdf::EFetchStatus status;
+
+ while (true) {
+ status = value.Fetch(item);
+ if (status != NUdf::EFetchStatus::Ok) break;
+
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+
+ UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish);
+ UNIT_ASSERT_EQUAL(expected, 10);
+ }
+
+ struct TTestStream final: NUdf::TBoxedValue {
+ TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>())
+ : Current_(0)
+ , YieldOn_(yieldOn)
+ , MaxValue_(maxValue)
+ {
+ }
+
+ private:
+ NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override {
+ if (Current_ == YieldOn_) {
+ return NUdf::EFetchStatus::Yield;
+ } else if (Current_ >= MaxValue_) {
+ return NUdf::EFetchStatus::Finish;
+ }
+ result = NUdf::TUnboxedValuePod(Current_++);
+ return NUdf::EFetchStatus::Ok;
+ }
+
+ ui32 Current_, YieldOn_, MaxValue_;
+ };
+
+ Y_UNIT_TEST(FromGenerator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+ " num = 0\n"
+ " while num < 10:\n"
+ " yield num\n"
+ " num += 1\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromGeneratorFactory) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def first_10():\n"
+ " num = 0\n"
+ " while num < 10:\n"
+ " yield num\n"
+ " num += 1\n"
+ "def Test():\n"
+ " return first_10\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromIterator) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+ " return iter(range(10))\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+#if PY_MAJOR_VERSION >= 3
+ " return range(10)\n",
+#else
+ " return xrange(10)\n",
+#endif
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromCustomIterable) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "class T:\n"
+ " def __init__(self, l):\n"
+ " self.l = l\n"
+ " def __len__(self):\n"
+ " return len(self.l)\n"
+ " def __nonzero__(self):\n"
+ " return bool(self.l)\n"
+ " def __iter__(self):\n"
+ " return iter(self.l)\n"
+ "\n"
+ "def Test():\n"
+ " return T(list(range(10)))\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(FromList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "def Test():\n"
+ " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
+ Ui32StreamValidator);
+ }
+
+ Y_UNIT_TEST(ToPython) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TStream<ui32>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ return NUdf::TUnboxedValuePod(new TTestStream(10));
+ },
+ "def Test(value):\n"
+ " import yql\n"
+ " assert repr(value) == '<yql.TStream>'\n"
+ " assert type(value).__name__ == 'TStream'\n"
+ " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n");
+ }
+
+ Y_UNIT_TEST(ToPythonAndBackAsIs) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TStream<ui32>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ return NUdf::TUnboxedValuePod(new TTestStream(10));
+ },
+ "def Test(value): return value",
+ Ui32StreamValidator
+ );
+ }
+
+ Y_UNIT_TEST(YieldingStreamFromPython) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TStream<ui32>>(
+ "import yql\n"
+ "def Test():\n"
+ " yield 0\n"
+ " yield 1\n"
+ " yield yql.TYieldIteration\n"
+ " yield 2\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+
+ NUdf::TUnboxedValue item;
+ ui32 expected = 0;
+ NUdf::EFetchStatus status;
+
+ while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) {
+ ui32 actual = item.Get<ui32>();
+ UNIT_ASSERT_EQUAL(actual, expected);
+ expected++;
+ }
+
+ UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield);
+ UNIT_ASSERT_EQUAL(expected, 2);
+ });
+ }
+
+ Y_UNIT_TEST(YieldingStreamFromCpp) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TStream<ui32>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ return NUdf::TUnboxedValuePod(new TTestStream(5, 2));
+ },
+ "import yql\n"
+ "def Test(value):\n"
+ " assert repr(value) == '<yql.TStream>'\n"
+ " assert type(value).__name__ == 'TStream'\n"
+ " assert next(value) == 0\n"
+ " assert next(value) == 1\n"
+ " try:\n"
+ " next(value)\n"
+ " except yql.TYieldIteration:\n"
+ " pass\n"
+ " else:\n"
+ " assert False, 'Expected yql.TYieldIteration'\n");
+ }
+
+ Y_UNIT_TEST(FromCppListIterator) {
+ TPythonTestEngine engine;
+ engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>(
+ [](const TType*, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto a = vb.NewArray(10U, items);
+ ui32 i = 0U;
+ std::generate_n(items, 10U, [&i](){ return NUdf::TUnboxedValuePod(i++); });
+ return a;
+ },
+ "def Test(value): return iter(value)",
+ Ui32StreamValidator
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp
new file mode 100644
index 0000000000..444b7b0c5b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp
@@ -0,0 +1,98 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyStringTest) {
+ template <typename TStringType>
+ void TestStringCasts() {
+ TStringType testStr1(TStringBuf("test string"));
+ TStringBuf strBuf1 = testStr1;
+ TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data());
+ const auto value = PyCast<TStringType>(str1.Get());
+
+ UNIT_ASSERT_STRINGS_EQUAL(value, testStr1);
+
+ TStringType testStr2(TStringBuf("another test string"));
+ TStringBuf strBuf2 = testStr2;
+ TPyObjectPtr str2 = PyCast<TStringType>(testStr2);
+
+ Py_ssize_t size = 0U;
+ char* buf = nullptr;
+ const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size);
+ UNIT_ASSERT(rc >= 0);
+ UNIT_ASSERT(buf != nullptr);
+ UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size());
+ UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2);
+ }
+
+ template <typename TStringType>
+ void TestBinaryStringCasts() {
+ TStringType testStr1(TStringBuf("\xa0\xa1"sv));
+ TStringBuf strBuf1 = testStr1;
+ TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data());
+ const auto value = PyCast<TStringType>(str1.Get());
+
+ UNIT_ASSERT_STRINGS_EQUAL(value, testStr1);
+
+ TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv));
+ TStringBuf strBuf2 = testStr2;
+ TPyObjectPtr str2 = PyCast<TStringType>(testStr2);
+
+ Py_ssize_t size = 0U;
+ char* buf = nullptr;
+ const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size);
+ UNIT_ASSERT(rc >= 0);
+ UNIT_ASSERT(buf != nullptr);
+ UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size());
+ UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2);
+ }
+
+ template <typename TStringType>
+ void TestUtf8StringCasts() {
+ const TStringType testStr1(TStringBuf("тестовая строка"));
+ TStringBuf strBuf1 = testStr1;
+ const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data());
+ const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get());
+ const auto value = PyCast<TStringType>(utf8.Get());
+ UNIT_ASSERT_STRINGS_EQUAL(value, testStr1);
+
+ const TStringType testStr2(TStringBuf("еще одна тестовая строка"));
+ TStringBuf strBuf2 = testStr2;
+ const auto str2 = ToPyUnicode<TStringType>(testStr2);
+
+ UNIT_ASSERT(PyUnicode_Check(str2.Get()));
+
+ Py_ssize_t size = 0U;
+#if PY_MAJOR_VERSION >= 3
+ const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size);
+#else
+ char* buf = nullptr;
+ const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get());
+ const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size);
+ UNIT_ASSERT(rc >= 0);
+#endif
+ UNIT_ASSERT(buf != nullptr);
+ UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size());
+ UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2);
+ }
+
+ Y_UNIT_TEST(Simple) {
+ TestStringCasts<TString>();
+ TestStringCasts<TStringBuf>();
+ TestStringCasts<NUdf::TStringRef>();
+ }
+
+ Y_UNIT_TEST(Utf8) {
+ TestUtf8StringCasts<TString>();
+ TestUtf8StringCasts<TStringBuf>();
+ TestUtf8StringCasts<NUdf::TStringRef>();
+ }
+
+ Y_UNIT_TEST(Binary) {
+ TestBinaryStringCasts<TString>();
+ TestBinaryStringCasts<TStringBuf>();
+ TestBinaryStringCasts<NUdf::TStringRef>();
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.cpp b/yql/essentials/udfs/common/python/bindings/py_struct.cpp
new file mode 100644
index 0000000000..a4ab99ee32
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_struct.cpp
@@ -0,0 +1,188 @@
+#include "py_struct.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+#include <util/string/cast.h>
+#include <util/string/join.h>
+#include <util/string/builder.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+namespace {
+
+TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TType* type, const NUdf::TStructTypeInspector& inspector)
+{
+ const auto it = ctx->StructTypes.emplace(type, TPyObjectPtr());
+ if (it.second) {
+#if PY_MAJOR_VERSION >= 3
+ std::vector<PyStructSequence_Field> fields(inspector.GetMembersCount() + 1U);
+ for (ui32 i = 0U; i < inspector.GetMembersCount(); ++i) {
+ fields[i] = {const_cast<char*>(inspector.GetMemberName(i).Data()), nullptr};
+ }
+ fields.back() = {nullptr, nullptr};
+
+ PyStructSequence_Desc desc = {
+ INIT_MEMBER(name, "yql.Struct"),
+ INIT_MEMBER(doc, nullptr),
+ INIT_MEMBER(fields, fields.data()),
+ INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount()))
+ };
+
+ const auto typeObject = new PyTypeObject();
+ if (0 > PyStructSequence_InitType2(typeObject, &desc)) {
+ throw yexception() << "can't create struct type: " << GetLastErrorAsString();
+ }
+
+ it.first->second.ResetSteal(reinterpret_cast<PyObject*>(typeObject));
+ }
+
+ const TPyObjectPtr object = PyStructSequence_New(it.first->second.GetAs<PyTypeObject>());
+#else
+ const auto className = TString("yql.Struct_") += ToString(ctx->StructTypes.size());
+ PyObject* metaclass = (PyObject *) &PyClass_Type;
+ const TPyObjectPtr name = PyRepr(TStringBuf(className));
+ const TPyObjectPtr bases = PyTuple_New(0);
+ const TPyObjectPtr dict = PyDict_New();
+
+ TPyObjectPtr newClass = PyObject_CallFunctionObjArgs(
+ metaclass, name.Get(), bases.Get(), dict.Get(),
+ nullptr);
+ if (!newClass) {
+ throw yexception() << "can't create new type: " << GetLastErrorAsString();
+ }
+
+ it.first->second = std::move(newClass);
+ }
+
+ Y_UNUSED(inspector);
+ const TPyObjectPtr object = PyInstance_New(it.first->second.Get(), nullptr, nullptr);
+#endif
+ if (!object) {
+ throw yexception() << "can't struct instance: " << GetLastErrorAsString();
+ }
+ return object;
+}
+
+}
+
+TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const TPyObjectPtr object = CreateNewStrucInstance(ctx, type, inspector);
+ const auto membersCount = inspector.GetMembersCount();
+
+ if (auto ptr = value.GetElements()) {
+ for (Py_ssize_t i = 0; i < membersCount; ++i) {
+#if PY_MAJOR_VERSION >= 3
+ auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++);
+ PyStructSequence_SetItem(object.Get(), i, item.Release());
+#else
+ const TStringBuf name = inspector.GetMemberName(i);
+ const auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++);
+ if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) {
+ throw yexception()
+ << "Can't set attr '" << name << "' to python object: "
+ << GetLastErrorAsString();
+ }
+#endif
+ }
+ } else {
+ for (Py_ssize_t i = 0; i < membersCount; ++i) {
+#if PY_MAJOR_VERSION >= 3
+ auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i));
+ PyStructSequence_SetItem(object.Get(), i, item.Release());
+#else
+ const TStringBuf name = inspector.GetMemberName(i);
+ const auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i));
+ if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) {
+ throw yexception()
+ << "Can't set attr '" << name << "' to python object: "
+ << GetLastErrorAsString();
+ }
+#endif
+ }
+ }
+
+ return object;
+}
+
+NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value)
+{
+ NUdf::TUnboxedValue* items = nullptr;
+ const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto membersCount = inspector.GetMembersCount();
+ auto mkqlStruct = ctx->ValueBuilder->NewArray(membersCount, items);
+
+ TVector<TString> errors;
+ if (PyDict_Check(value)) {
+ for (ui32 i = 0; i < membersCount; i++) {
+ TStringBuf memberName = inspector.GetMemberName(i);
+ auto memberType = inspector.GetMemberType(i);
+ // borrowed reference - no need to manage ownership
+ PyObject* item = PyDict_GetItemString(value, memberName.data());
+ if (!item) {
+ TPyObjectPtr bytesMemberName = PyBytes_FromStringAndSize(memberName.data(), memberName.size());
+ item = PyDict_GetItem(value, bytesMemberName.Get());
+ }
+ if (!item) {
+ if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional) {
+ items[i] = NUdf::TUnboxedValue();
+ continue;
+ }
+
+ errors.push_back(TStringBuilder() << "Dict has no item '" << memberName << "'");
+ continue;
+ }
+
+ try {
+ items[i] = FromPyObject(ctx, inspector.GetMemberType(i), item);
+ } catch (const yexception& e) {
+ errors.push_back(TStringBuilder() << "Failed to convert dict item '" << memberName << "' - " << e.what());
+ }
+ }
+
+ if (!errors.empty()) {
+ throw yexception() << "Failed to convert dict to struct\n" << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value);
+ }
+ } else {
+ for (ui32 i = 0; i < membersCount; i++) {
+ TStringBuf memberName = inspector.GetMemberName(i);
+ auto memberType = inspector.GetMemberType(i);
+ TPyObjectPtr attr = PyObject_GetAttrString(value, memberName.data());
+ if (!attr) {
+ if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional &&
+ PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_Clear();
+ items[i] = NUdf::TUnboxedValue();
+ continue;
+ }
+
+ errors.push_back(TStringBuilder() << "Object has no attr '" << memberName << "' , error: " << GetLastErrorAsString());
+ continue;
+ }
+
+ try {
+ items[i] = FromPyObject(ctx, memberType, attr.Get());
+ } catch (const yexception& e) {
+ errors.push_back(TStringBuilder() << "Failed to convert object attr '" << memberName << "' - " << e.what());
+ }
+ }
+
+ if (!errors.empty()) {
+ throw yexception() << "Failed to convert object to struct\n" << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value);
+ }
+ }
+
+ return mkqlStruct;
+}
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.h b/yql/essentials/udfs/common/python/bindings/py_struct.h
new file mode 100644
index 0000000000..79a380283f
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_struct.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyStruct(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyStruct(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type, PyObject* value);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp
new file mode 100644
index 0000000000..a97507f549
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp
@@ -0,0 +1,307 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyStructTest) {
+ Y_UNIT_TEST(FromPyObject) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "class Person:\n"
+ " def __init__(self, age, name):\n"
+ " self.age = age\n"
+ " self.name = name\n"
+ "\n"
+ "def Test():\n"
+ " return Person(99, 'Jamel')\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel");
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyObjectMissingOptionalField) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build();
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField("name", optionalStringType, &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "class Person:\n"
+ " def __init__(self, age):\n"
+ " self.age = age\n"
+ "\n"
+ "def Test():\n"
+ " return Person(99)\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT(!name);
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDict) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "def Test():\n"
+ " return { 'name': 'Jamel', 'age': 99 }\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel");
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDictMissingOptionalField) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build();
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField("name", optionalStringType, &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "def Test():\n"
+ " return { 'age': 99 }\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT(!name);
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("a\0ge", &ageIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "def Test():\n"
+ " return { b'a\\0ge': 99 }\n",
+ [ageIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyNamedTuple) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "from collections import namedtuple\n"
+ "def Test():\n"
+ " Person = namedtuple('Person', 'name age')\n"
+ " return Person(age=13, name='Tony')\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony");
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build();
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<int>("age", &ageIdx)
+ .AddField("name", optionalStringType, &nameIdx)
+ .Build();
+
+ engine.ToMiniKQL(personType,
+ "from collections import namedtuple\n"
+ "def Test():\n"
+ " Pers = namedtuple('Person', 'name age')\n"
+ " return Pers(name=None, age=15)\n",
+ [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ auto name = value.GetElement(nameIdx);
+ UNIT_ASSERT(!name);
+ auto age = value.GetElement(ageIdx);
+ UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyEmptyStruct) {
+ TPythonTestEngine engine;
+ auto emptyStruct = engine.GetTypeBuilder().Struct()->Build();
+
+ engine.ToMiniKQL(emptyStruct,
+ "class Empty: pass\n"
+ "\n"
+ "def Test():\n"
+ " return Empty()\n",
+ [](const NUdf::TUnboxedValuePod&) {});
+ }
+
+ Y_UNIT_TEST(ToPyObject) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0;
+ auto addressType = engine.GetTypeBuilder().Struct()->
+ AddField<NUdf::TUtf8>("city", &cityIdx)
+ .AddField<NUdf::TUtf8>("street", &streetIdx)
+ .AddField<ui16>("building", &buildingIdx)
+ .Build();
+
+ auto personType = engine.GetTypeBuilder().Struct()->
+ AddField<ui16>("age", &ageIdx)
+ .AddField<NUdf::TUtf8>("name", &nameIdx)
+ .AddField("address", addressType, &addressIdx)
+ .Build();
+
+
+ engine.ToPython(personType,
+ [=](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items);
+ items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97));
+ items[nameIdx] = vb.NewString("Jamel");
+ NUdf::TUnboxedValue* items2 = nullptr;
+ items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2);
+ items2[cityIdx] = vb.NewString("Moscow");;
+ items2[streetIdx] = vb.NewString("L'va Tolstogo");
+ items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16));
+ return new_struct;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, object)\n"
+ " assert value.name == 'Jamel'\n"
+ " assert value.age == 97\n"
+ " assert value.address.city == 'Moscow'\n"
+ " assert value.address.building == 16\n"
+ );
+ }
+
+ Y_UNIT_TEST(ToPyObjectKeywordsAsFields) {
+ TPythonTestEngine engine;
+
+ ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0;
+ auto structType = engine.GetTypeBuilder().Struct()->
+ AddField<NUdf::TUtf8>("pass", &passIdx)
+ .AddField<NUdf::TUtf8>("while", &whileIdx)
+ .AddField<NUdf::TUtf8>("if", &ifIdx)
+ .AddField<NUdf::TUtf8>("not", &notIdx)
+ .Build();
+
+ engine.ToPython(structType,
+ [=](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items);
+ items[ifIdx] = vb.NewString("You");
+ items[whileIdx] = vb.NewString("Shall");
+ items[notIdx] = vb.NewString("Not");
+ items[passIdx] = vb.NewString("Pass");
+ return new_struct;
+ },
+ "def Test(value):\n"
+ " assert getattr(value, 'if') == 'You'\n"
+ " assert getattr(value, 'while') == 'Shall'\n"
+ " assert getattr(value, 'not') == 'Not'\n"
+ " assert getattr(value, 'pass') == 'Pass'\n"
+ );
+ }
+
+#if PY_MAJOR_VERSION >= 3 // TODO: Fix for python 2
+ Y_UNIT_TEST(ToPyObjectTryModify) {
+ TPythonTestEngine engine;
+
+ ui32 field1Idx = 0, field2Idx = 0;
+ auto structType = engine.GetTypeBuilder().Struct()->
+ AddField<NUdf::TUtf8>("field1", &field1Idx)
+ .AddField<NUdf::TUtf8>("field2", &field2Idx)
+ .Build();
+
+ engine.ToPython(structType,
+ [=](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items);
+ items[field1Idx] = NUdf::TUnboxedValuePod::Zero();
+ items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty");
+ return new_struct;
+ },
+ "def Test(value):\n"
+ " try:\n"
+ " setattr(value, 'field1', 17)\n"
+ " except AttributeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ " try:\n"
+ " value.field2 = 18\n"
+ " except AttributeError:\n"
+ " pass\n"
+ " else:\n"
+ " assert False\n"
+ );
+ }
+#endif
+
+ Y_UNIT_TEST(ToPyObjectEmptyStruct) {
+ TPythonTestEngine engine;
+
+ auto personType = engine.GetTypeBuilder().Struct()->Build();
+
+ engine.ToPython(personType,
+ [](const TType*, const NUdf::IValueBuilder& vb) {
+ return vb.NewEmptyList();
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, object)\n"
+#if PY_MAJOR_VERSION >= 3
+ " assert len(value) == 0\n"
+#endif
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp
new file mode 100644
index 0000000000..6cef25ea47
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp
@@ -0,0 +1,61 @@
+#include "py_tuple.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+
+TPyObjectPtr ToPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto elementsCount = inspector.GetElementsCount();
+
+ const TPyObjectPtr tuple(PyTuple_New(elementsCount));
+
+ if (auto ptr = value.GetElements()) {
+ for (ui32 i = 0U; i < elementsCount; ++i) {
+ auto item = ToPyObject(ctx, inspector.GetElementType(i), *ptr++);
+ PyTuple_SET_ITEM(tuple.Get(), i, item.Release());
+ }
+ } else {
+ for (ui32 i = 0U; i < elementsCount; ++i) {
+ auto item = ToPyObject(ctx, inspector.GetElementType(i), value.GetElement(i));
+ PyTuple_SET_ITEM(tuple.Get(), i, item.Release());
+ }
+ }
+
+ return tuple;
+}
+
+NUdf::TUnboxedValue FromPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ if (const TPyObjectPtr fast = PySequence_Fast(value, "Expected tuple or list.")) {
+ const Py_ssize_t itemsCount = PySequence_Fast_GET_SIZE(fast.Get());
+
+ if (itemsCount < 0 || inspector.GetElementsCount() != itemsCount) {
+ throw yexception() << "Tuple elements count mismatch.";
+ }
+
+ NUdf::TUnboxedValue* tuple_items = nullptr;
+ const auto tuple = ctx->ValueBuilder->NewArray(inspector.GetElementsCount(), tuple_items);
+ for (Py_ssize_t i = 0; i < itemsCount; i++) {
+ const auto item = PySequence_Fast_GET_ITEM(fast.Get(), i);
+ *tuple_items++ = FromPyObject(ctx, inspector.GetElementType(i), item);
+ }
+
+ return tuple;
+ }
+
+ throw yexception() << "Expected Tuple or Sequence but got: " << PyObjectRepr(value);
+}
+
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.h b/yql/essentials/udfs/common/python/bindings/py_tuple.h
new file mode 100644
index 0000000000..7d66af9b01
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tuple.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyTuple(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyTuple(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type, PyObject* value);
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp
new file mode 100644
index 0000000000..a6b9b6cc3e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp
@@ -0,0 +1,108 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyTupleTest) {
+ Y_UNIT_TEST(FromPyEmptyTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<>>(
+ "def Test(): return ()",
+ [](const NUdf::TUnboxedValuePod&) {});
+ }
+
+ Y_UNIT_TEST(FromPyList) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<int, int, int>>(
+ "def Test(): return [1, 2, 3]",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1);
+ UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2);
+ UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyIter) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<int, int, int>>(
+ "def Test(): return iter({1, 2, 3})",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1);
+ UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2);
+ UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>(
+ "def Test(): return (1, float(2.3), '4')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1);
+ auto second = value.GetElement(1);
+ UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001);
+ const auto third = value.GetElement(2);
+ UNIT_ASSERT_EQUAL(third.AsStringRef(), "4");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyTupleInTuple) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>(
+ "def Test(): return (1, (2, float(3.4)), '5')",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(value.IsBoxed());
+ UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1);
+
+ auto second = value.GetElement(1);
+ UNIT_ASSERT(second);
+ UNIT_ASSERT(second.IsBoxed());
+ UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2);
+ UNIT_ASSERT_DOUBLES_EQUAL(
+ second.GetElement(1).Get<float>(), 3.4, 0.0001);
+
+ const auto third = value.GetElement(2);
+ UNIT_ASSERT_EQUAL(third.AsStringRef(), "5");
+ });
+ }
+
+ Y_UNIT_TEST(ToPyEmptyTuple) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTuple<>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items);
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert len(value) == 0\n"
+ " assert value == ()\n");
+ }
+
+ Y_UNIT_TEST(ToPyTuple) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ NUdf::TUnboxedValue* items = nullptr;
+ auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items);
+ items[0] = vb.NewString("111");
+ items[1] = NUdf::TUnboxedValuePod((ui64) 2);
+ items[2] = NUdf::TUnboxedValuePod((ui8) 3);
+ items[3] = NUdf::TUnboxedValuePod((float) 4.5);
+ return tuple;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert len(value) == 4\n"
+ " assert value == ('111', 2, 3, 4.5)\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp
new file mode 100644
index 0000000000..e9f5971c78
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp
@@ -0,0 +1,85 @@
+#include "py_variant.h"
+#include "ut3/py_test_engine.h"
+#include <yql/essentials/minikql/mkql_type_ops.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyTzDateTest) {
+ Y_UNIT_TEST(FromDate) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTzDate>(
+ "def Test():\n"
+ " return (2, 'Europe/Moscow')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ });
+ }
+
+ Y_UNIT_TEST(FromDatetime) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTzDatetime>(
+ "def Test():\n"
+ " return (2, 'Europe/Moscow')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_VALUES_EQUAL(value.Get<ui32>(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ });
+ }
+
+ Y_UNIT_TEST(FromTimestamp) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TTzTimestamp>(
+ "def Test():\n"
+ " return (2, 'Europe/Moscow')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_VALUES_EQUAL(value.Get<ui64>(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ });
+ }
+
+ Y_UNIT_TEST(ToDate) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTzDate>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ auto ret = NUdf::TUnboxedValuePod((ui16)2);
+ ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ return ret;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (2, 'Europe/Moscow')\n");
+ }
+
+ Y_UNIT_TEST(ToDatetime) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTzDatetime>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ auto ret = NUdf::TUnboxedValuePod((ui32)2);
+ ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ return ret;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (2, 'Europe/Moscow')\n");
+ }
+
+ Y_UNIT_TEST(ToTimestamp) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TTzTimestamp>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) {
+ auto ret = NUdf::TUnboxedValuePod((ui64)2);
+ ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow"));
+ return ret;
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (2, 'Europe/Moscow')\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.cpp b/yql/essentials/udfs/common/python/bindings/py_utils.cpp
new file mode 100644
index 0000000000..d1e0e8b484
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_utils.cpp
@@ -0,0 +1,89 @@
+#include "py_utils.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_gil.h"
+
+#include <util/generic/yexception.h>
+#include <util/string/split.h>
+
+#include <regex>
+
+
+namespace NPython {
+
+TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern) {
+ for (auto c : asciiStr) {
+ Y_ABORT_UNLESS((c&0x80) == 0, "expected ascii");
+ }
+
+ Py_ssize_t size = static_cast<Py_ssize_t>(asciiStr.size());
+#if PY_MAJOR_VERSION >= 3
+ TPyObjectPtr pyStr = PyUnicode_FromStringAndSize(asciiStr.data(), size);
+#else
+ TPyObjectPtr pyStr = PyString_FromStringAndSize(asciiStr.data(), size);
+#endif
+ Y_ABORT_UNLESS(pyStr, "Can't get repr string");
+ if (!intern) {
+ return pyStr;
+ }
+
+ PyObject* tmp = pyStr.Release();
+#if PY_MAJOR_VERSION >= 3
+ PyUnicode_InternInPlace(&tmp);
+#else
+ PyString_InternInPlace(&tmp);
+#endif
+ return TPyObjectPtr(tmp);
+}
+
+TString PyObjectRepr(PyObject* value) {
+ static constexpr size_t maxLen = 1000;
+ static constexpr std::string_view truncSuffix = "(truncated)";
+ const TPyObjectPtr repr(PyObject_Repr(value));
+ if (!repr) {
+ return TString("repr error: ") + GetLastErrorAsString();
+ }
+
+ TString string;
+ if (!TryPyCast(repr.Get(), string)) {
+ string = "can't get repr as string";
+ }
+ if (string.size() > maxLen) {
+ string.resize(maxLen - truncSuffix.size());
+ string += truncSuffix;
+ }
+ return string;
+}
+
+bool HasEncodingCookie(const TString& source) {
+ //
+ // To define a source code encoding, a magic comment must be placed
+ // into the source files either as first or second line in the file.
+ //
+ // See https://www.python.org/dev/peps/pep-0263 for more details.
+ //
+
+ static std::regex encodingRe(
+ "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*");
+
+ int i = 0;
+ for (const auto& it: StringSplitter(source).Split('\n')) {
+ if (i++ == 2) break;
+
+ TStringBuf line = it.Token();
+ if (std::regex_match(line.begin(), line.end(), encodingRe)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void PyCleanup() {
+ TPyGilLocker lock;
+ PyErr_Clear();
+ PySys_SetObject("last_type", Py_None);
+ PySys_SetObject("last_value", Py_None);
+ PySys_SetObject("last_traceback", Py_None);
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.h b/yql/essentials/udfs/common/python/bindings/py_utils.h
new file mode 100644
index 0000000000..0c5ef058f1
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_utils.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "py_ptr.h"
+
+#include <util/generic/strbuf.h>
+
+#ifdef _win_
+#define INIT_MEMBER(member, value) value //member
+#else
+#define INIT_MEMBER(member, value) .member = (value)
+#endif
+
+namespace NPython {
+
+TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern = false);
+
+template <size_t size>
+TPyObjectPtr PyRepr(const char(&str)[size]) {
+ return PyRepr(TStringBuf(str, size - 1), true);
+}
+
+TString PyObjectRepr(PyObject* value);
+
+bool HasEncodingCookie(const TString& source);
+
+void PyCleanup();
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp
new file mode 100644
index 0000000000..ce521689b4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp
@@ -0,0 +1,37 @@
+#include "py_utils.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyUtilsTest) {
+
+ Y_UNIT_TEST(EncodingCookie) {
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-"));
+ UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-"));
+ UNIT_ASSERT(HasEncodingCookie(
+ "# This Python file uses the following encoding: utf-8"));
+
+ // encoding commend on second line
+ UNIT_ASSERT(HasEncodingCookie(
+ "#!/usr/local/bin/python\n"
+ "# -*- coding: iso-8859-15 -*-\n"
+ "print 'hello'"));
+
+ // missing "coding:" prefix
+ UNIT_ASSERT(false == HasEncodingCookie("# latin-1"));
+
+ // encoding comment not on line 1 or 2
+ UNIT_ASSERT(false == HasEncodingCookie(
+ "#!/usr/local/bin/python\n"
+ "#\n"
+ "# -*- coding: latin-1 -*-\n"));
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.cpp b/yql/essentials/udfs/common/python/bindings/py_variant.cpp
new file mode 100644
index 0000000000..ab222b3432
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_variant.cpp
@@ -0,0 +1,97 @@
+#include "py_variant.h"
+#include "py_cast.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+
+
+using namespace NKikimr;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// public functions
+//////////////////////////////////////////////////////////////////////////////
+TPyObjectPtr ToPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ auto& th = *castCtx->PyCtx->TypeInfoHelper;
+ NUdf::TVariantTypeInspector varInsp(th, type);
+ const NUdf::TType* subType = varInsp.GetUnderlyingType();
+ ui32 index = value.GetVariantIndex();
+ auto item = value.GetVariantItem();
+
+ const NUdf::TType* itemType = nullptr;
+ if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) {
+ itemType = tupleInsp.GetElementType(index);
+ TPyObjectPtr pyIndex = PyCast<ui32>(index);
+ TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item);
+ return PyTuple_Pack(2, pyIndex.Get(), pyItem.Get());
+ } else if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) {
+ itemType = structInsp.GetMemberType(index);
+ TPyObjectPtr pyName = ToPyUnicode<NUdf::TStringRef>(
+ structInsp.GetMemberName(index));
+ TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item);
+ return PyTuple_Pack(2, pyName.Get(), pyItem.Get());
+ }
+
+ throw yexception() << "Cannot get Variant item type";
+}
+
+NUdf::TUnboxedValue FromPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NUdf::TType* type,
+ PyObject* value)
+{
+ PY_ENSURE(PyTuple_Check(value),
+ "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name);
+
+ Py_ssize_t tupleSize = PyTuple_GET_SIZE(value);
+ PY_ENSURE(tupleSize == 2,
+ "Expected to get Tuple with 2 elements, but got "
+ << tupleSize << " elements");
+
+ auto& th = *castCtx->PyCtx->TypeInfoHelper;
+ NUdf::TVariantTypeInspector varInsp(th, type);
+ const NUdf::TType* subType = varInsp.GetUnderlyingType();
+
+ PyObject* el0 = PyTuple_GET_ITEM(value, 0);
+ PyObject* el1 = PyTuple_GET_ITEM(value, 1);
+
+ ui32 index;
+ NUdf::TStringRef name;
+ if (TryPyCast(el0, index)) {
+ if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) {
+ PY_ENSURE(index < tupleInsp.GetElementsCount(),
+ "Index must be < " << tupleInsp.GetElementsCount()
+ << ", but got " << index);
+ auto* itemType = tupleInsp.GetElementType(index);
+ return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1));
+ } else {
+ throw yexception() << "Cannot convert " << PyObjectRepr(value)
+ << " underlying Variant type is not a Tuple";
+ }
+ } else if (TryPyCast(el0, name)) {
+ if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) {
+ ui32 index = structInsp.GetMemberIndex(name);
+ PY_ENSURE(index < structInsp.GetMembersCount(),
+ "Unknown member name: " << TStringBuf(name));
+ auto* itemType = structInsp.GetMemberType(index);
+ return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1));
+ } else {
+ throw yexception() << "Cannot convert " << PyObjectRepr(value)
+ << " underlying Variant type is not a Struct";
+ }
+ } else {
+ throw yexception()
+ << "Expected first Tuple element to either be an int "
+ "or a str, but got " << Py_TYPE(el0)->tp_name;
+ }
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.h b/yql/essentials/udfs/common/python/bindings/py_variant.h
new file mode 100644
index 0000000000..ca97123183
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_variant.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "py_ctx.h"
+
+namespace NPython {
+
+TPyObjectPtr ToPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyVariant(
+ const TPyCastContext::TPtr& castCtx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp
new file mode 100644
index 0000000000..77ab9bc6e8
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp
@@ -0,0 +1,101 @@
+#include "py_variant.h"
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyVariantTest) {
+ Y_UNIT_TEST(FromPyWithIndex) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>(
+ "def Test():\n"
+ " return (2, 'hello')\n",
+ [](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2);
+ auto item = value.GetVariantItem();
+ UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello");
+ });
+ }
+
+ Y_UNIT_TEST(FromPyWithName) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ NUdf::TType* personType = engine.GetTypeBuilder().Struct()->
+ AddField<ui32>("age", &ageIdx)
+ .AddField<char*>("name", &nameIdx)
+ .Build();
+
+ NUdf::TType* variantType = engine.GetTypeBuilder()
+ .Variant()->Over(personType).Build();
+
+ engine.ToMiniKQL(
+ variantType,
+ "def Test():\n"
+ " return ('age', 99)\n",
+ [ageIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx);
+ auto item = value.GetVariantItem();
+ UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99);
+ });
+
+ engine.ToMiniKQL(
+ variantType,
+ "def Test():\n"
+ " return ('name', 'Jamel')\n",
+ [nameIdx](const NUdf::TUnboxedValuePod& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx);
+ auto item = value.GetVariantItem();
+ UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel");
+ });
+ }
+
+ Y_UNIT_TEST(ToPyWithIndex) {
+ TPythonTestEngine engine;
+ engine.ToPython<NUdf::TVariant<float, ui32, char*>>(
+ [](const TType* /*type*/, const NUdf::IValueBuilder& vb) {
+ return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32) 42));
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == (1, 42)\n");
+ }
+
+ Y_UNIT_TEST(ToPyWithName) {
+ TPythonTestEngine engine;
+
+ ui32 ageIdx = 0, nameIdx = 0;
+ NUdf::TType* personType = engine.GetTypeBuilder().Struct()->
+ AddField<ui32>("age", &ageIdx)
+ .AddField<NUdf::TUtf8>("name", &nameIdx)
+ .Build();
+
+ NUdf::TType* variantType = engine.GetTypeBuilder()
+ .Variant()->Over(personType).Build();
+
+ engine.ToPython(
+ variantType,
+ [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) {
+ return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32) 99));
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == ('age', 99)\n"
+ );
+
+ engine.ToPython(
+ variantType,
+ [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) {
+ return vb.NewVariant(nameIdx, vb.NewString("Jamel"));
+ },
+ "def Test(value):\n"
+ " assert isinstance(value, tuple)\n"
+ " assert value == ('name', 'Jamel')\n"
+ );
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_void.cpp b/yql/essentials/udfs/common/python/bindings/py_void.cpp
new file mode 100644
index 0000000000..ef72c052fb
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_void.cpp
@@ -0,0 +1,117 @@
+#include "py_void.h"
+#include "py_errors.h"
+#include "py_utils.h"
+
+#include <yql/essentials/public/udf/udf_value.h>
+
+using namespace NKikimr;
+
+namespace NPython {
+namespace {
+
+static PyObject* VoidRepr(PyObject*) {
+ return PyRepr("yql.Void").Release();
+}
+
+static void VoidDealloc(PyObject*) {
+ Py_FatalError("Deallocating yql.Void");
+}
+
+} // namespace
+
+PyTypeObject PyVoidType = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ INIT_MEMBER(tp_name , "yql.Void"),
+ INIT_MEMBER(tp_basicsize , 0),
+ INIT_MEMBER(tp_itemsize , 0),
+ INIT_MEMBER(tp_dealloc , VoidDealloc),
+#if PY_VERSION_HEX < 0x030800b4
+ INIT_MEMBER(tp_print , nullptr),
+#else
+ INIT_MEMBER(tp_vectorcall_offset, 0),
+#endif
+ INIT_MEMBER(tp_getattr , nullptr),
+ INIT_MEMBER(tp_setattr , nullptr),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_as_async , nullptr),
+#else
+ INIT_MEMBER(tp_compare , nullptr),
+#endif
+ INIT_MEMBER(tp_repr , VoidRepr),
+ INIT_MEMBER(tp_as_number , nullptr),
+ INIT_MEMBER(tp_as_sequence , nullptr),
+ INIT_MEMBER(tp_as_mapping , nullptr),
+ INIT_MEMBER(tp_hash , nullptr),
+ INIT_MEMBER(tp_call , nullptr),
+ INIT_MEMBER(tp_str , nullptr),
+ INIT_MEMBER(tp_getattro , nullptr),
+ INIT_MEMBER(tp_setattro , nullptr),
+ INIT_MEMBER(tp_as_buffer , nullptr),
+ INIT_MEMBER(tp_flags , 0),
+ INIT_MEMBER(tp_doc , "yql.Void object"),
+ INIT_MEMBER(tp_traverse , nullptr),
+ INIT_MEMBER(tp_clear , nullptr),
+ INIT_MEMBER(tp_richcompare , nullptr),
+ INIT_MEMBER(tp_weaklistoffset , 0),
+ INIT_MEMBER(tp_iter , nullptr),
+ INIT_MEMBER(tp_iternext , nullptr),
+ INIT_MEMBER(tp_methods , nullptr),
+ INIT_MEMBER(tp_members , nullptr),
+ INIT_MEMBER(tp_getset , nullptr),
+ INIT_MEMBER(tp_base , nullptr),
+ INIT_MEMBER(tp_dict , nullptr),
+ INIT_MEMBER(tp_descr_get , nullptr),
+ INIT_MEMBER(tp_descr_set , nullptr),
+ INIT_MEMBER(tp_dictoffset , 0),
+ INIT_MEMBER(tp_init , nullptr),
+ INIT_MEMBER(tp_alloc , nullptr),
+ INIT_MEMBER(tp_new , nullptr),
+ INIT_MEMBER(tp_free , nullptr),
+ INIT_MEMBER(tp_is_gc , nullptr),
+ INIT_MEMBER(tp_bases , nullptr),
+ INIT_MEMBER(tp_mro , nullptr),
+ INIT_MEMBER(tp_cache , nullptr),
+ INIT_MEMBER(tp_subclasses , nullptr),
+ INIT_MEMBER(tp_weaklist , nullptr),
+ INIT_MEMBER(tp_del , nullptr),
+ INIT_MEMBER(tp_version_tag , 0),
+#if PY_MAJOR_VERSION >= 3
+ INIT_MEMBER(tp_finalize , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ INIT_MEMBER(tp_vectorcall , nullptr),
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ INIT_MEMBER(tp_print , nullptr),
+#endif
+};
+
+PyObject PyVoidObject = {
+ _PyObject_EXTRA_INIT
+ 1, &PyVoidType
+};
+
+TPyObjectPtr ToPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ Y_UNUSED(ctx);
+ Y_UNUSED(type);
+ Y_UNUSED(value);
+ return TPyObjectPtr(&PyVoidObject, TPyObjectPtr::ADD_REF);
+}
+
+NUdf::TUnboxedValue FromPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ PyObject* value)
+{
+ Y_UNUSED(ctx);
+ Y_UNUSED(type);
+ Y_UNUSED(value);
+ PY_ENSURE(value == &PyVoidObject, "Expected object of yql.Void type");
+ return NUdf::TUnboxedValue::Void();
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_void.h b/yql/essentials/udfs/common/python/bindings/py_void.h
new file mode 100644
index 0000000000..3c8203ab6e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_void.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "py_ptr.h"
+#include "py_ctx.h"
+
+namespace NPython {
+
+extern PyTypeObject PyVoidType;
+extern PyObject PyVoidObject;
+
+TPyObjectPtr ToPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ const NKikimr::NUdf::TUnboxedValuePod& value);
+
+NKikimr::NUdf::TUnboxedValue FromPyVoid(
+ const TPyCastContext::TPtr& ctx,
+ const NKikimr::NUdf::TType* type,
+ PyObject* value);
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp
new file mode 100644
index 0000000000..7fbeca2043
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp
@@ -0,0 +1,37 @@
+#include "ut3/py_test_engine.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+
+using namespace NPython;
+
+Y_UNIT_TEST_SUITE(TPyVoidTest) {
+ Y_UNIT_TEST(FromPython) {
+ TPythonTestEngine engine;
+ engine.ToMiniKQL<void>(
+ "import yql\n"
+ "\n"
+ "def Test():\n"
+ " return yql.Void\n",
+ [](const NUdf::TUnboxedValue& value) {
+ UNIT_ASSERT(value);
+ UNIT_ASSERT(false == value.IsBoxed());
+ });
+ }
+
+ Y_UNIT_TEST(ToPython) {
+ TPythonTestEngine engine;
+ engine.ToPython<void>(
+ [](const TType* type, const NUdf::IValueBuilder& vb) {
+ Y_UNUSED(type); Y_UNUSED(vb);
+ return NUdf::TUnboxedValue::Void();
+ },
+ "import yql\n"
+ "\n"
+ "def Test(value):\n"
+ " assert str(value) == 'yql.Void'\n"
+ " assert repr(value) == 'yql.Void'\n"
+ " assert isinstance(value, yql.TVoid)\n"
+ " assert value is yql.Void\n");
+ }
+}
diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp
new file mode 100644
index 0000000000..5d1497f7c7
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp
@@ -0,0 +1,251 @@
+#include "py_yql_module.h"
+
+#include "py_void.h"
+#include "py_iterator.h"
+#include "py_list.h"
+#include "py_dict.h"
+#include "py_stream.h"
+#include "py_utils.h"
+#include "py_callable.h"
+
+#include <library/cpp/resource/resource.h>
+#include <yql/essentials/udfs/common/python/python_udf/python_udf.h>
+
+namespace NPython {
+
+static PyMethodDef ModuleMethods[] = {
+ { nullptr, nullptr, 0, nullptr } /* sentinel */
+};
+
+#define MODULE_NAME "yql"
+
+#if PY_MAJOR_VERSION >= 3
+#define MODULE_NAME_TYPING "yql.typing"
+#endif
+
+#define MODULE_INITIALIZED_ATTRIBUTE "_initialized"
+
+PyDoc_STRVAR(ModuleDoc,
+ "This module provides YQL specific types for Python.");
+
+#if PY_MAJOR_VERSION >= 3
+PyDoc_STRVAR(ModuleDocTyping,
+ "This module provides annotations for YQL types for Python.");
+#endif
+
+PyDoc_STRVAR(StopIterationException_doc,
+ "Can be throwed to yield stream iteration.");
+
+#define PREPARE_TYPE(Name, Type) \
+ do { \
+ if (PyType_Ready(Type) < 0) { \
+ throw yexception() << "Can't prepare type: " << (Name); \
+ } \
+ } while (0)
+
+#define REGISTER_TYPE(Name, Type) \
+ do { \
+ PREPARE_TYPE(Name, Type); \
+ Py_INCREF(Type); \
+ if (PyModule_AddObject(module, (Name), (PyObject*) Type) < 0) { \
+ throw yexception() << "Can't add type: " << (Name); \
+ } \
+ } while (0)
+
+#define REGISTER_OBJECT(Name, Object) \
+ do { \
+ if (PyDict_SetItemString(dict, (Name), (PyObject *) (Object)) < 0) \
+ throw yexception() << "Can't register object: " << (Name); \
+ } while (0)
+
+#define REGISTER_EXCEPTION(Name, Object, Doc) \
+ do { \
+ if (!Object) { \
+ Object = PyErr_NewExceptionWithDoc((char*) MODULE_NAME "." Name, Doc, nullptr, nullptr); \
+ if (!Object) { \
+ throw yexception() << "Can't register exception: " << (Name); \
+ } \
+ REGISTER_OBJECT(Name, Object); \
+ } \
+ } while (0)
+
+#if PY_MAJOR_VERSION >= 3
+static PyModuleDef ModuleDefinition = {
+ PyModuleDef_HEAD_INIT,
+ INIT_MEMBER(m_name, MODULE_NAME),
+ INIT_MEMBER(m_doc, ModuleDoc),
+ INIT_MEMBER(m_size, -1),
+ INIT_MEMBER(m_methods, ModuleMethods),
+ INIT_MEMBER(m_slots, nullptr),
+ INIT_MEMBER(m_traverse, nullptr),
+ INIT_MEMBER(m_clear, nullptr),
+ INIT_MEMBER(m_free, nullptr),
+};
+
+static PyModuleDef ModuleDefinitionTyping = {
+ PyModuleDef_HEAD_INIT,
+ INIT_MEMBER(m_name, MODULE_NAME_TYPING),
+ INIT_MEMBER(m_doc, ModuleDocTyping),
+ INIT_MEMBER(m_size, -1),
+ INIT_MEMBER(m_methods, nullptr),
+ INIT_MEMBER(m_slots, nullptr),
+ INIT_MEMBER(m_traverse, nullptr),
+ INIT_MEMBER(m_clear, nullptr),
+ INIT_MEMBER(m_free, nullptr),
+};
+
+PyMODINIT_FUNC PyInit_YQL(void)
+{
+ auto mod = PyModule_Create(&ModuleDefinition);
+ PyModule_AddObject(mod, "__path__", Py_BuildValue("()"));
+ return mod;
+}
+
+void go_throw();
+
+PyMODINIT_FUNC PyInit_YQLTyping(void)
+{
+ return PyModule_Create(&ModuleDefinitionTyping);
+}
+#else
+PyMODINIT_FUNC PyInit_YQL(void)
+{
+ Py_InitModule3(MODULE_NAME, ModuleMethods, ModuleDoc);
+}
+#endif
+
+void PrepareYqlModule() {
+ PyImport_AppendInittab(MODULE_NAME, &PyInit_YQL);
+#if PY_MAJOR_VERSION >= 3
+ PyImport_AppendInittab(MODULE_NAME_TYPING, &PyInit_YQLTyping);
+#endif
+}
+
+#if PY_MAJOR_VERSION >= 3
+void RegisterRuntimeModule(const char* name, PyObject* module) {
+ if (!module || !PyModule_Check(module)) {
+ throw yexception() << "Invalid object for module " << name;
+ }
+
+ // borrowed reference
+ PyObject* modules = PyImport_GetModuleDict();
+ if (!modules || !PyDict_CheckExact(modules)) {
+ throw yexception() << "Can't get sys.modules dictionary";
+ }
+
+ if (PyDict_SetItemString(modules, name, module) < 0) {
+ throw yexception() << "Can't register module " << name;
+ }
+}
+#endif
+
+void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone) {
+ TPyObjectPtr m = PyImport_ImportModule(MODULE_NAME);
+ if (!standalone && !m) {
+ PyErr_Clear();
+#if PY_MAJOR_VERSION >= 3
+ m = PyInit_YQL();
+ RegisterRuntimeModule(MODULE_NAME, m.Get());
+#else
+ PyInit_YQL();
+#endif
+ m = PyImport_ImportModule(MODULE_NAME);
+ }
+
+ PyObject* module = m.Get();
+
+ if (!module) {
+ throw yexception() << "Can't get YQL module.";
+ }
+
+ TPyObjectPtr initialized = PyObject_GetAttrString(module, MODULE_INITIALIZED_ATTRIBUTE);
+ if (!initialized) {
+ PyErr_Clear();
+ } else if (initialized.Get() == Py_True) {
+ return;
+ }
+
+ PyObject* dict = PyModule_GetDict(module);
+
+ REGISTER_TYPE("TVoid", &PyVoidType);
+ REGISTER_OBJECT("Void", &PyVoidObject);
+
+ PREPARE_TYPE("TIterator", &PyIteratorType);
+ PREPARE_TYPE("TPairIterator", &PyPairIteratorType);
+
+ PREPARE_TYPE("TDict", &PyLazyDictType);
+ PREPARE_TYPE("TSet", &PyLazySetType);
+
+ PREPARE_TYPE("TLazyListIterator", &PyLazyListIteratorType);
+ PREPARE_TYPE("TLazyList", &PyLazyListType);
+ PREPARE_TYPE("TThinListIterator", &PyThinListIteratorType);
+ PREPARE_TYPE("TThinList", &PyThinListType);
+
+ PREPARE_TYPE("TStream", &PyStreamType);
+ PREPARE_TYPE("TCallable", &PyCallableType);
+
+ REGISTER_EXCEPTION("TYieldIteration", PyYieldIterationException, StopIterationException_doc);
+
+#if PY_MAJOR_VERSION >= 3
+ if (pythonFlavor == NYql::NUdf::EPythonFlavor::Arcadia) {
+ if (!standalone) {
+ TPyObjectPtr typingModule = PyImport_ImportModule(MODULE_NAME_TYPING);
+ if (!typingModule) {
+ PyErr_Clear();
+ typingModule = PyInit_YQLTyping();
+ RegisterRuntimeModule(MODULE_NAME_TYPING, typingModule.Get());
+ }
+ }
+
+ const auto typing = NResource::Find(TStringBuf("typing.py"));
+ const auto rc = PyRun_SimpleStringFlags(typing.c_str(), nullptr);
+
+ if (rc < 0) {
+ // Not sure if PyErr_Print() works after PyRun_SimpleStringFlags,
+ // but just in case...
+ PyErr_Print();
+ ythrow yexception() << "Can't parse YQL type annotations module";
+ }
+
+ auto processError = [&] (PyObject* obj, TStringBuf message) {
+ if (obj) {
+ return;
+ }
+ PyObject *ptype, *pvalue, *ptraceback;
+ PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+ if (pvalue) {
+ auto pstr = PyObject_Str(pvalue);
+ if (pstr) {
+ if (auto err_msg = PyUnicode_AsUTF8(pstr)) {
+ Cerr << err_msg << Endl;
+ }
+ }
+ PyErr_Restore(ptype, pvalue, ptraceback);
+ }
+ ythrow yexception() << "Can't setup YQL type annotations module: " << message;
+ };
+
+ auto main = PyImport_ImportModule("__main__");
+ processError(main, "PyImport_ImportModule");
+ auto function = PyObject_GetAttrString(main, "main");
+ processError(function, "PyObject_GetAttrString");
+ auto args = PyTuple_New(0);
+ processError(args, "PyTuple_New");
+ auto result = PyObject_CallObject(function, args);
+ processError(result, "PyObject_CallObject");
+
+ Py_DECREF(result);
+ Py_DECREF(args);
+ Py_DECREF(function);
+ Py_DECREF(main);
+ }
+#endif
+
+ REGISTER_OBJECT(MODULE_INITIALIZED_ATTRIBUTE, Py_True);
+}
+
+void TermYqlModule() {
+ PyYieldIterationException = nullptr;
+}
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.h b/yql/essentials/udfs/common/python/bindings/py_yql_module.h
new file mode 100644
index 0000000000..970471d029
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <yql/essentials/udfs/common/python/python_udf/python_udf.h>
+
+namespace NPython {
+
+void PrepareYqlModule();
+void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone = true);
+void TermYqlModule();
+
+} // namspace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/typing.py b/yql/essentials/udfs/common/python/bindings/typing.py
new file mode 100644
index 0000000000..0e53ad1e0a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/typing.py
@@ -0,0 +1,188 @@
+def main():
+ import importlib.abc
+ import importlib.machinery
+ import sys
+
+ class Finder(importlib.abc.MetaPathFinder):
+ def find_spec(self, fullname, path, target=None):
+ if fullname in sys.builtin_module_names:
+ return importlib.machinery.ModuleSpec(
+ fullname,
+ importlib.machinery.BuiltinImporter,
+ )
+
+ sys.meta_path.append(Finder())
+
+ try:
+ import yandex.type_info.type_base as ti_base
+ import yandex.type_info.typing as ti_typing
+ import six
+ except ImportError as e:
+ raise ImportError(
+ str(e) + ". Make sure that library/python/type_info is in your PEERDIR list"
+ )
+
+ from yql import typing
+
+ AutoMap = ti_base.make_primitive_type("AutoMap")
+
+ def _format_arg(arg):
+ res = []
+ if arg[0]:
+ res.append("{}:".format(ti_base.quote_string(arg[0])))
+ res.append(str(arg[1]))
+ if arg[2]:
+ res.append("{Flags:")
+ res.append(",".join(str(x) for x in sorted(list(arg[2]))))
+ res.append("}")
+ return "".join(res)
+
+ Stream = ti_typing._SingleArgumentGeneric("Stream")
+
+ @six.python_2_unicode_compatible
+ class GenericResourceAlias(ti_base.Type):
+ REQUIRED_ATTRS = ti_base.Type.REQUIRED_ATTRS + ["tag"]
+
+ def __str__(self):
+ return u"{}<{}>".format(self.name, ti_base.quote_string(self.tag))
+
+ def to_yson_type(self):
+ return {"type_name": self.yt_type_name, "tag": self.tag}
+
+ class GenericResource(ti_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, str):
+ raise ValueError("Expected str, but got: {}".format(ti_base._with_type(params)))
+
+ attrs = {
+ "name": self.name,
+ "yt_type_name": self.yt_type_name,
+ "tag": params,
+ }
+
+ return GenericResourceAlias(attrs)
+
+ def from_dict(self):
+ raise NotImplementedError()
+
+ Resource = GenericResource("Resource")
+
+ def _extract_arg_info(param):
+ name = ""
+ arg_type = param
+ flags = set()
+ if isinstance(param, slice):
+ name = param.start
+ if name is None:
+ name = ""
+ if not isinstance(name, str):
+ raise ValueError("Expected str as argument name but got: {}".format(ti_base._with_type(name)))
+ arg_type = param.stop
+ ti_base.validate_type(arg_type)
+ if param.step is not None:
+ for x in param.step:
+ if x != AutoMap:
+ raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x)))
+ flags.add(x)
+ else:
+ ti_base.validate_type(arg_type)
+ return (name, arg_type, flags)
+
+ @six.python_2_unicode_compatible
+ class GenericCallableAlias(ti_base.Type):
+ def __str__(self):
+ return ("Callable<(" +
+ ",".join(_format_arg(x) for x in self.args[:len(self.args)-self.optional_args]) +
+ ("," if len(self.args) > self.optional_args and self.optional_args else "") +
+ ("[" if self.optional_args else "") +
+ ",".join(_format_arg(x) for x in self.args[len(self.args)-self.optional_args:]) +
+ ("]" if self.optional_args else "") +
+ ")->" + str(getattr(self, "return")) + ">")
+
+ def to_yson_type(self):
+ yson_repr = {
+ "optional_args": self.optional_args,
+ "return": getattr(self, "return"),
+ "args": self.args,
+ "type_name": self.yt_type_name,
+ }
+ return yson_repr
+
+
+ class GenericCallable(ti_base.Generic):
+ def __getitem__(self, params):
+ if not isinstance(params, tuple) or len(params) < 2 or not isinstance(params[0], int) or not ti_typing.is_valid_type(params[1]):
+ raise ValueError("Expected at least two arguments (integer and type of return value) but got: {}".format(ti_base._with_type(params)))
+ args = []
+ for param in params[2:]:
+ name, arg_type, flags = _extract_arg_info(param)
+ args.append((name, arg_type, flags))
+
+ if params[0] < 0 or params[0] > len(args):
+ raise ValueError("Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]")
+
+ attrs = {
+ "optional_args": params[0],
+ "return": params[1],
+ "args": args,
+ "name": "Tagged",
+ "yt_type_name": "tagged",
+ }
+
+ return GenericCallableAlias(attrs)
+
+ def from_dict(self):
+ raise NotImplementedError()
+
+ Callable = GenericCallable("Callable")
+
+ def parse_slice_arg(arg):
+ try:
+ return _format_arg(_extract_arg_info(arg))
+ except ValueError:
+ pass
+
+ typing.Type = ti_base.Type
+ typing.is_valid_type = ti_base.is_valid_type
+ typing.parse_slice_arg = parse_slice_arg
+
+ typing.Bool = ti_typing.Bool
+ typing.Int8 = ti_typing.Int8
+ typing.Uint8 = ti_typing.Uint8
+ typing.Int16 = ti_typing.Int16
+ typing.Uint16 = ti_typing.Uint16
+ typing.Int32 = ti_typing.Int32
+ typing.Uint32 = ti_typing.Uint32
+ typing.Int64 = ti_typing.Int64
+ typing.Uint64 = ti_typing.Uint64
+ typing.Float = ti_typing.Float
+ typing.Double = ti_typing.Double
+ typing.String = ti_typing.String
+ typing.Utf8 = ti_typing.Utf8
+ typing.Yson = ti_typing.Yson
+ typing.Json = ti_typing.Json
+ typing.Uuid = ti_typing.Uuid
+ typing.Date = ti_typing.Date
+ typing.Datetime = ti_typing.Datetime
+ typing.Timestamp = ti_typing.Timestamp
+ typing.Interval = ti_typing.Interval
+ typing.TzDate = ti_typing.TzDate
+ typing.TzDatetime = ti_typing.TzDatetime
+ typing.TzTimestamp = ti_typing.TzTimestamp
+ typing.Void = ti_typing.Void
+ typing.Null = ti_typing.Null
+ typing.EmptyTuple = ti_typing.EmptyTuple
+ typing.EmptyStruct = ti_typing.EmptyStruct
+ typing.Optional = ti_typing.Optional
+ typing.List = ti_typing.List
+ typing.Dict = ti_typing.Dict
+ typing.Tuple = ti_typing.Tuple
+ typing.Struct = ti_typing.Struct
+ typing.Variant = ti_typing.Variant
+ typing.Tagged = ti_typing.Tagged
+ typing.Decimal = ti_typing.Decimal
+
+ typing.Stream = Stream
+ typing.Resource = Resource
+ typing.Callable = Callable
+ typing.AutoMap = AutoMap
diff --git a/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h
new file mode 100644
index 0000000000..a36e19fa32
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h
@@ -0,0 +1,227 @@
+#pragma once
+
+#include "py_cast.h"
+#include "py_yql_module.h"
+#include "py_utils.h"
+
+#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
+#include <yql/essentials/minikql/mkql_type_builder.h>
+#include <yql/essentials/minikql/computation/mkql_value_builder.h>
+#include <yql/essentials/udfs/common/python/python_udf/python_udf.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#define PYTHON_TEST_TAG "Python2Test"
+
+
+using namespace NKikimr;
+using namespace NMiniKQL;
+
+namespace NPython {
+
+//////////////////////////////////////////////////////////////////////////////
+// TPyInitializer
+//////////////////////////////////////////////////////////////////////////////
+struct TPyInitializer {
+ TPyInitializer() {
+ PrepareYqlModule();
+ Py_Initialize();
+ InitYqlModule(NYql::NUdf::EPythonFlavor::Arcadia);
+ }
+ ~TPyInitializer() {
+ TermYqlModule();
+ Py_Finalize();
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TPythonTestEngine
+//////////////////////////////////////////////////////////////////////////////
+class TPythonTestEngine {
+public:
+ TPythonTestEngine()
+ : MemInfo_("Memory")
+ , Alloc_(__LOCATION__)
+ , Env_(Alloc_)
+ , TypeInfoHelper_(new TTypeInfoHelper)
+ , FunctionInfoBuilder_(Env_, TypeInfoHelper_, "", nullptr, {})
+ {
+ HolderFactory_ = MakeHolder<THolderFactory>(
+ Alloc_.Ref(),
+ MemInfo_,
+ nullptr);
+ ValueBuilder_ = MakeHolder<TDefaultValueBuilder>(*HolderFactory_, NUdf::EValidatePolicy::Exception);
+ BindTerminator_ = MakeHolder<TBindTerminator>(ValueBuilder_.Get());
+ Singleton<TPyInitializer>();
+ CastCtx_ = MakeIntrusive<TPyCastContext>(&GetValueBuilder(),
+ MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition())
+ );
+ }
+
+ ~TPythonTestEngine() {
+ PyCleanup();
+ }
+
+ NUdf::IFunctionTypeInfoBuilder& GetTypeBuilder() {
+ return FunctionInfoBuilder_;
+ }
+
+ const NUdf::IValueBuilder& GetValueBuilder() const {
+ return *ValueBuilder_;
+ }
+
+ template <typename TChecker>
+ void ToMiniKQL(NUdf::TType* udfType, const TStringBuf& script, TChecker&& checker) {
+ TPyObjectPtr result = RunPythonFunction(script);
+ UNIT_ASSERT_C(!!result, script);
+
+ TType* type = static_cast<TType*>(udfType);
+ auto value = FromPyObject(CastCtx_, type, result.Get());
+ checker(value);
+ }
+
+ template <typename TExpectedType, typename TChecker>
+ void ToMiniKQL(const TStringBuf& script, TChecker&& checker) {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ ToMiniKQL<TChecker>(type, script, std::move(checker));
+ }
+
+ template <typename TChecker>
+ void ToMiniKQLWithArg(
+ NUdf::TType* udfType, PyObject* argValue,
+ const TStringBuf& script, TChecker&& checker)
+ {
+ TPyObjectPtr args = Py_BuildValue("(O)", argValue);
+
+ auto result = RunPythonFunction(script, args.Get());
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+
+ TType* type = static_cast<TType*>(udfType);
+ auto value = FromPyObject(CastCtx_, type, result.Get());
+ checker(value);
+ }
+
+ template <typename TExpectedType, typename TChecker>
+ void ToMiniKQLWithArg(
+ PyObject* argValue,
+ const TStringBuf& script, TChecker&& checker)
+ {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ ToMiniKQLWithArg<TChecker>(type, argValue, script, std::move(checker));
+ }
+
+ template <typename TMiniKQLValueBuilder>
+ TPyObjectPtr ToPython(
+ NUdf::TType* udfType,
+ TMiniKQLValueBuilder&& builder,
+ const TStringBuf& script)
+ {
+ try {
+ TType* type = static_cast<TType*>(udfType);
+ NUdf::TUnboxedValue value = builder(type, GetValueBuilder());
+ TPyObjectPtr pyValue = ToPyObject(CastCtx_, type, value);
+ if (!pyValue || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("object execution error");
+ }
+ TPyObjectPtr args = Py_BuildValue("(O)", pyValue.Get());
+
+ auto result = RunPythonFunction(script, args.Get());
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+ return result;
+ } catch (const yexception& e) {
+ Cerr << e << Endl;
+ UNIT_FAIL("cast error");
+ }
+
+ Py_RETURN_NONE;
+ }
+
+ template <typename TExpectedType, typename TMiniKQLValueBuilder>
+ TPyObjectPtr ToPython(TMiniKQLValueBuilder&& builder, const TStringBuf& script) {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ return ToPython<TMiniKQLValueBuilder>(type, std::move(builder), script);
+ }
+
+ NUdf::TUnboxedValue FromPython(NUdf::TType* udfType, const TStringBuf& script) {
+ auto result = RunPythonFunction(script);
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+
+ TType* type = static_cast<TType*>(udfType);
+ return FromPyObject(CastCtx_, type, result.Get());
+ }
+
+ template <typename TExpectedType>
+ NUdf::TUnboxedValue FromPython(const TStringBuf& script) {
+ auto type = GetTypeBuilder().SimpleType<TExpectedType>();
+ return FromPython(type, script);
+ }
+
+ template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder>
+ NUdf::TUnboxedValue ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script) {
+ const auto aType = GetTypeBuilder().SimpleType<TArgumentType>();
+ const auto result = ToPython<TMiniKQLValueBuilder>(aType, std::move(builder), script);
+
+ if (!result || PyErr_Occurred()) {
+ PyErr_Print();
+ UNIT_FAIL("function execution error");
+ }
+
+ const auto rType = static_cast<TType*>(GetTypeBuilder().SimpleType<TReturnType>());
+ return FromPyObject(CastCtx_, rType, result.Get());
+ }
+
+ template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder, typename TChecker>
+ void ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script, TChecker&& checker) {
+ const auto result = ToPythonAndBack<TArgumentType, TReturnType, TMiniKQLValueBuilder>(std::move(builder), script);
+ checker(result);
+ }
+
+private:
+ TPyObjectPtr RunPythonFunction(
+ const TStringBuf& script, PyObject* args = nullptr)
+ {
+ TString filename(TStringBuf("embedded:test.py"));
+ TPyObjectPtr code(Py_CompileString(script.data(), filename.data(), Py_file_input));
+ if (!code) {
+ PyErr_Print();
+ UNIT_FAIL("can't compile python script");
+ }
+
+ TString moduleName(TStringBuf("py_cast_ut"));
+ TPyObjectPtr module(PyImport_ExecCodeModule(moduleName.begin(), code.Get()));
+ if (!module) {
+ PyErr_Print();
+ UNIT_FAIL("can't create python module");
+ }
+
+ TPyObjectPtr function(PyObject_GetAttrString(module.Get(), "Test"));
+ if (!function) {
+ PyErr_Print();
+ UNIT_FAIL("function 'Test' is not found in module");
+ }
+ return PyObject_CallObject(function.Get(), args);
+ }
+
+private:
+ TMemoryUsageInfo MemInfo_;
+ TScopedAlloc Alloc_;
+ TTypeEnvironment Env_;
+ const NUdf::ITypeInfoHelper::TPtr TypeInfoHelper_;
+ TFunctionTypeInfoBuilder FunctionInfoBuilder_;
+ THolder<THolderFactory> HolderFactory_;
+ THolder<TDefaultValueBuilder> ValueBuilder_;
+ THolder<TBindTerminator> BindTerminator_;
+ TPyCastContext::TPtr CastCtx_;
+};
+
+} // namespace NPython
diff --git a/yql/essentials/udfs/common/python/bindings/ut3/ya.make b/yql/essentials/udfs/common/python/bindings/ut3/ya.make
new file mode 100644
index 0000000000..b9d500938c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/ut3/ya.make
@@ -0,0 +1,37 @@
+IF (OS_LINUX)
+ IF (NOT WITH_VALGRIND)
+ UNITTEST_FOR(yql/essentials/udfs/common/python/bindings)
+
+ SRCS(
+ py_callable_ut.cpp
+ py_cast_ut.cpp
+ py_dict_ut.cpp
+ py_list_ut.cpp
+ py_decimal_ut.cpp
+ py_number_ut.cpp
+ py_optional_ut.cpp
+ py_resource_ut.cpp
+ py_stream_ut.cpp
+ py_string_ut.cpp
+ py_struct_ut.cpp
+ py_tuple_ut.cpp
+ py_tzdate_ut.cpp
+ py_utils_ut.cpp
+ py_variant_ut.cpp
+ py_void_ut.cpp
+ )
+
+ USE_PYTHON3()
+
+ PEERDIR(
+ library/python/type_info
+ yql/essentials/minikql/computation/llvm14
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/sql/pg_dummy
+ )
+
+ YQL_LAST_ABI_VERSION()
+
+ END()
+ ENDIF()
+ENDIF()
diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make
new file mode 100644
index 0000000000..efb5b475c4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/ya.make
@@ -0,0 +1,54 @@
+PY23_NATIVE_LIBRARY()
+
+YQL_ABI_VERSION(2 27 0)
+
+SRCS(
+ py_callable.cpp
+ py_cast.cpp
+ py_decimal.cpp
+ py_errors.cpp
+ py_dict.cpp
+ py_list.cpp
+ py_lazy_mkql_dict.cpp
+ py_lazy_mkql_list.cpp
+ py_iterator.cpp
+ py_resource.cpp
+ py_stream.cpp
+ py_struct.cpp
+ py_tuple.cpp
+ py_utils.cpp
+ py_variant.cpp
+ py_void.cpp
+ py_yql_module.cpp
+)
+
+IF (USE_SYSTEM_PYTHON AND _SYSTEM_PYTHON27)
+ # we should be able to run on python 2.7.X versions
+ # with X ranging from 3 to (at least) 15
+ #
+ # for now bindings already use some functionality from 2.7.15,
+ # which doesn't exist earlier versions
+ # (according symbols won't be loaded from system python)
+ #
+ # so we provide backported implementation for this scenario to work as intended
+ SRCS(
+ py27_backports.c
+ )
+ENDIF()
+
+RESOURCE(
+ typing.py typing.py
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/utils
+)
+
+NO_COMPILER_WARNINGS()
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut3
+)
diff --git a/yql/essentials/udfs/common/python/main_py3/__main__.pyx b/yql/essentials/udfs/common/python/main_py3/__main__.pyx
new file mode 100644
index 0000000000..6f4ca94358
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/__main__.pyx
@@ -0,0 +1,50 @@
+import os
+import runpy
+import importlib
+
+import __res
+
+
+cdef env_entry_point = 'Y_PYTHON_ENTRY_POINT'
+
+
+cdef extern from 'main.h':
+ pass
+
+
+def find_pymain():
+ py_main = __res.find('PY_MAIN')
+
+ if isinstance(py_main, bytes):
+ py_main = py_main.decode('utf8')
+
+ if isinstance(py_main, unicode):
+ return py_main
+
+ return None
+
+
+def run_main():
+ entry_point = os.environ.pop(env_entry_point, None)
+
+ if entry_point is None:
+ entry_point = find_pymain()
+
+ if entry_point is None:
+ raise RuntimeError('No entry point found')
+
+ module_name, colon, func_name = entry_point.partition(':')
+
+ if not colon:
+ runpy._run_module_as_main(module_name, alter_argv=False)
+ return
+
+ if not module_name:
+ module_name = 'library.python.runtime_py3.entry_points'
+
+ module = importlib.import_module(module_name)
+ func = getattr(module, func_name)
+ func()
+
+
+run_main()
diff --git a/yql/essentials/udfs/common/python/main_py3/include/main.h b/yql/essentials/udfs/common/python/main_py3/include/main.h
new file mode 100644
index 0000000000..c96402004e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/include/main.h
@@ -0,0 +1,12 @@
+#pragma once
+#include <util/system/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+Y_PUBLIC
+int RunPython(int argc, char** argv);
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/yql/essentials/udfs/common/python/main_py3/main.cpp b/yql/essentials/udfs/common/python/main_py3/main.cpp
new file mode 100644
index 0000000000..edc3c89ca5
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/main.cpp
@@ -0,0 +1,9 @@
+#include "main.h"
+
+extern "C"
+int RunPythonImpl(int argc, char** argv);
+
+extern "C"
+int RunPython(int argc, char** argv) {
+ return RunPythonImpl(argc, argv);
+}
diff --git a/yql/essentials/udfs/common/python/main_py3/ya.make b/yql/essentials/udfs/common/python/main_py3/ya.make
new file mode 100644
index 0000000000..cc13fb77e4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/main_py3/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+USE_PYTHON3()
+
+ADDINCL(
+ yql/essentials/udfs/common/python/main_py3/include
+)
+
+SRCS(GLOBAL main.cpp)
+
+BUILDWITH_CYTHON_C(__main__.pyx --embed=RunPythonImpl)
+
+END()
diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json
new file mode 100644
index 0000000000..dd55da78b5
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json
@@ -0,0 +1,61 @@
+{
+ "test.test[Annotations]": [
+ {
+ "checksum": "19c6d906cb8617cf9d2b5d484e09caf8",
+ "size": 7570,
+ "uri": "https://{canondata_backend}/212715/49b4751c22bd43fa7057cc92ae5cbedb40404f40/resource.tar.gz#test.test_Annotations_/results.txt"
+ }
+ ],
+ "test.test[BytesDecodeModeStrict]": [
+ {
+ "checksum": "f8534cff0843faaf876c41e0875dcf05",
+ "size": 3120,
+ "uri": "https://{canondata_backend}/1775319/4c4fed0942b33bcc70d44f7dd2972a8e05c6db97/resource.tar.gz#test.test_BytesDecodeModeStrict_/results.txt"
+ }
+ ],
+ "test.test[Cleanup]": [
+ {
+ "checksum": "036e77892757e48fa3fb319ed324b019",
+ "size": 954,
+ "uri": "https://{canondata_backend}/1871182/9909e0b25b15bb1f21d5def23fb072d64c82f07e/resource.tar.gz#test.test_Cleanup_/results.txt"
+ }
+ ],
+ "test.test[CustomYsonConverter]": [
+ {
+ "checksum": "7716204e544d2fcb9313412c3919e66d",
+ "size": 1625,
+ "uri": "https://{canondata_backend}/1130705/576535b56a4e74992911431865e5edd0f7d55520/resource.tar.gz#test.test_CustomYsonConverter_/results.txt"
+ }
+ ],
+ "test.test[Data]": [
+ {
+ "checksum": "f40e83806b294be420681fdfbf2133e8",
+ "size": 25268,
+ "uri": "https://{canondata_backend}/1031349/7065a0985fe0cd26a754a5bee7a4c808836a4692/resource.tar.gz#test.test_Data_/results.txt"
+ }
+ ],
+ "test.test[Excepthook]": [
+ {
+ "uri": "file://test.test_Excepthook_/extracted"
+ }
+ ],
+ "test.test[GreedyInputContainers]": [
+ {
+ "checksum": "02a619c86f180e8a4c536087d64bab6d",
+ "size": 1328,
+ "uri": "https://{canondata_backend}/995452/085d43bbd16f44afc51d6cafed42465a3d20215c/resource.tar.gz#test.test_GreedyInputContainers_/results.txt"
+ }
+ ],
+ "test.test[OptionalNested]": [
+ {
+ "uri": "file://test.test_OptionalNested_/extracted"
+ }
+ ],
+ "test.test[Switch]": [
+ {
+ "checksum": "e60320702512bdcecd5c663f387ee939",
+ "size": 9172,
+ "uri": "https://{canondata_backend}/1130705/493ee46b1e8f2e848ab928f97913d332cb4fffc7/resource.tar.gz#test.test_Switch_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted
new file mode 100644
index 0000000000..b260fe7616
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted
@@ -0,0 +1,15 @@
+<tmp_path>/program.sql:<main>: Fatal: Execution
+
+ <tmp_path>/program.sql:<main>:44:1: Fatal: Execution of node: Result
+ SELECT $udf(@@{"abc":1}@@);
+ ^
+ <tmp_path>/program.sql:<main>:40:17: Fatal: Failed to execute:
+CUSTOM_EXCEPTHOOK
+True
+Traceback (most recent call last):
+ File "embedded:f", line 31, in f
+Exception
+
+
+ $udf = Python3::f(Callable<(String)->String>, $script);
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted
new file mode 100644
index 0000000000..413eb2f4ec
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted
@@ -0,0 +1,14 @@
+<tmp_path>/program.sql:<main>: Error: Type annotation
+
+ <tmp_path>/program.sql:<main>:12:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem
+ SELECT $optOptList("42");
+ ^
+ <tmp_path>/program.sql:<main>:12:8: Error: At function: Apply
+ SELECT $optOptList("42");
+ ^
+ <tmp_path>/program.sql:<main>:2:24: Error: At function: ScriptUdf
+ $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@
+ ^
+ <tmp_path>/program.sql:<main>:2:24: Error: Nested optionals are unsupported in script UDF
+ $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@
+ ^ \ No newline at end of file
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql
new file mode 100644
index 0000000000..3f845322e2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql
@@ -0,0 +1,67 @@
+--sanitizer ignore memory
+$script = @@
+from yql.typing import *
+
+def primitive(a0:Bool,a1:Int8,a2:Uint8,a3:Int16,a4:Uint16,a5:Int32,a6:Uint32,
+ a7:Int64,a8:Uint64,a9:Float,a10:Double,a11:String,a12:Utf8,a13:Yson,a14:Json,
+ a15:Uuid,a16:Date,a17:Datetime,a18:Timestamp,a19:Interval,a20:TzDate,
+ a21:TzDatetime,a22:TzTimestamp)->Decimal(10,3):
+ pass
+
+def singletons(a0:Void,a1:Null,a2:EmptyStruct,a3:EmptyTuple)->Void:
+ pass
+
+def containers(a0:Optional[Int32],a1:List[List[Bool]],a2:Stream[String],a3:Dict[Int32,String],
+ a4:Tuple[Int32,String],a5:Tuple[Int32],a6:Struct["a":Int32,"b":String],a7:Struct["a":Int32],
+ a8:Variant[Int32,String],a9:Variant[Int32],a10:Variant["a":Int32,"b":String],a11:Variant["a":Int32])->List[String]:
+ pass
+
+def special(a0:Resource["Python3"],a1:Tagged[Int32,"foo"])->Void:
+ pass
+
+def c0()->Callable[0,Int32]: pass
+def c1()->Callable[1,Int32,Optional[List[Int32]]]: pass
+def c2()->Callable[1,Int32,Int32,Optional[List[Int32]]]: pass
+def c3()->Callable[0,Int32,"a":Int32:{AutoMap}]: pass
+def c4()->Callable[0,Int32,"":Int32:{AutoMap}]: pass
+def c5()->Callable[0,Int32,"":Int32:{}]: pass
+def c6()->Callable[0,Int32,"foo":Int32]: pass
+
+def f0(x:Optional[Int32]=None,y:Optional[Int32]=None)->Void: pass
+def f1(x:Optional[Int32],y:Optional[Int32]=None)->Void: pass
+def f2(x:Optional[Int32],y:Optional[Int32])->Void: pass
+def f3(x:slice("",Int32,{AutoMap}), y:slice("name",String))->Void: pass
+
+@@;
+
+$t = ($name)->{
+ return FormatType(EvaluateType(
+ ParseTypeHandle(Core::PythonFuncSignature(AsAtom("Python3"), $script, $name))));
+};
+
+-- Singletons
+
+select $t("primitive");
+select $t("singletons");
+
+-- Containers & Special
+
+select $t("containers");
+select $t("special");
+
+-- Callable
+select
+ $t("c0") as c0,
+ $t("c1") as c1,
+ $t("c2") as c2,
+ $t("c3") as c3,
+ $t("c4") as c4,
+ $t("c5") as c5,
+ $t("c6") as c6;
+
+-- Top level
+select
+ $t("f0") as f0,
+ $t("f1") as f1,
+ $t("f2") as f2,
+ $t("f3") as f3;
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql
new file mode 100644
index 0000000000..e540dbf38a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql
@@ -0,0 +1,11 @@
+--sanitizer ignore memory
+$script = @@
+def f(string, uuid, yson):
+ return (string, str(type(string)), uuid, str(type(uuid)), yson, str(type(yson)))
+
+f._yql_bytes_decode_mode = 'strict'
+@@;
+
+$udf = Python3::f(Callable<(String?, UUid?, Yson?)->Tuple<String?, String, UUid?, String, Yson?, String>>, $script);
+
+SELECT $udf("string", UUid('1812bc18-5838-4cde-98aa-287302697b90'), cast(@@{"abc"=1}@@ as yson));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in
new file mode 100644
index 0000000000..d5ddcb4083
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in
@@ -0,0 +1 @@
+{"key"="1";"subkey"="2";"value"="3"};
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql
new file mode 100644
index 0000000000..9db9840292
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql
@@ -0,0 +1,12 @@
+--sanitizer ignore memory
+$udfScript = @@
+import yql
+def mapper(records):
+ yql.g = records
+ for record in records:
+ yield dict(yid=b"bla", rnd=0.)
+@@;
+
+$udf = Python3::mapper(Callable<(Stream<Struct<key:String, subkey:String, value:String>>)->Stream<Struct<yid:String, rnd:Double>>>, $udfScript);
+
+PROCESS Input using $udf(TableRows());
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql
new file mode 100644
index 0000000000..43dd00cb3d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql
@@ -0,0 +1,20 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$script = @@
+import json
+
+def yloads(z):
+ return json.loads(str(z, 'latin-1').replace("=",":"))
+
+def ydumps(z):
+ return bytes(json.dumps(z).replace(":","="), 'latin-1')
+
+def f(s):
+ return (s.get("abc",0),s)
+
+f._yql_convert_yson = (yloads,ydumps)
+@@;
+
+$udf = Python3::f(Callable<(Yson?)->Tuple<Int64, Yson?>>, $script);
+
+SELECT $udf(cast(@@{"abc"=1}@@ as yson));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql
new file mode 100644
index 0000000000..3f7de07d5c
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql
@@ -0,0 +1,61 @@
+--sanitizer ignore memory
+$data = AsTuple(
+ Bool("true"),
+ Bool("FalsE"),
+ Int8("-128"),
+ Int8("127"),
+ Uint8("0"),
+ Uint8("255"),
+ Int16("-32768"),
+ Int16("32767"),
+ Uint16("0"),
+ Uint16("65535"),
+ Int32("-2147483648"),
+ Int32("2147483647"),
+ Uint32("0"),
+ Uint32("4294967295"),
+ Int64("-9223372036854775808"),
+ Int64("9223372036854775807"),
+ Uint64("0"),
+ Uint64("18446744073709551615"),
+ Float("0"),
+ Float("1"),
+ Float("-1e30"),
+ Float("-inf"),
+ Float("+inf"),
+ Float("nan"),
+ Double("0"),
+ Double("1"),
+ Double("-1e300"),
+ Double("-inf"),
+ Double("+inf"),
+ Double("nan"),
+ String("foo\xffbar"),
+ Utf8("привет"),
+ Yson("<a=1>[3;%false]"),
+ Json(@@{"a":1,"b":null}@@),
+ Date("2000-01-01"),
+ Datetime("2000-01-01T01:02:03Z"),
+ Timestamp("2000-01-01T01:02:03.4Z"),
+ Interval("P1DT12H"),
+ TzDate("2000-01-01,Europe/Moscow"),
+ TzDatetime("2000-01-01T01:02:03,Europe/Moscow"),
+ TzTimestamp("2000-01-01T01:02:03.4,Europe/Moscow"),
+ Uuid('31323334-3536-3738-393a-3b3c3d3e3f40'),
+ Decimal('3.1415926535897932384626433832795029', 35, 34),
+ Decimal('-.00000000000000000000000000000000001', 35, 35),
+ Decimal('NAN', 10, 5),
+ Decimal('-iNf', 1, 0)
+);
+
+$type = CallableType(0,
+ TypeOf($data),
+ TypeOf($data)
+);
+
+$f = Python3::f($type, @@
+def f(x):
+ return x
+@@);
+
+select $data, $f($data);
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg
new file mode 100644
index 0000000000..5dae597903
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg
@@ -0,0 +1 @@
+xfail
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql
new file mode 100644
index 0000000000..100086c9e4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql
@@ -0,0 +1,23 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$script = @@
+import sys
+import traceback
+
+
+def excepthook(*args):
+ print('CUSTOM_EXCEPTHOOK', file=sys.stderr)
+ print(all(_ for _ in args), file=sys.stderr)
+ print("".join(traceback.format_exception(*args)), file=sys.stderr)
+
+
+sys.excepthook = excepthook
+
+
+def f(string):
+ raise Exception()
+@@;
+
+$udf = Python3::f(Callable<(String)->String>, $script);
+
+SELECT $udf(@@{"abc":1}@@);
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql
new file mode 100644
index 0000000000..a43af8791d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql
@@ -0,0 +1,19 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$s = @@
+def list_func(lst):
+ return lst.count(1)
+list_func._yql_lazy_input = False
+@@;
+
+$u = Python3::list_func(Callable<(List<Int32>)->Int32>, $s);
+select $u(AsList(1,2,3));
+
+$s = @@
+def dict_func(dict):
+ return list(dict.values()).count(b"b")
+dict_func._yql_lazy_input = False
+@@;
+
+$v = Python3::dict_func(Callable<(Dict<Int32, String>)->Int32>, $s);
+select $v(AsDict(AsTuple(1,"a"),AsTuple(2,"b")));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg
new file mode 100644
index 0000000000..5dae597903
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg
@@ -0,0 +1 @@
+xfail
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql
new file mode 100644
index 0000000000..33396f036a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql
@@ -0,0 +1,7 @@
+--sanitizer ignore memory
+$optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@
+def opt_opt_list(in_str):
+ return [in_str] if len(in_str) % 2 == 0 else None
+@@);
+
+SELECT $optOptList("42");
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in
diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql
new file mode 100644
index 0000000000..c2576a72e4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql
@@ -0,0 +1,92 @@
+--sanitizer ignore memory
+/* syntax version 1 */
+$x = AsList(1,2,3);
+
+$s1 = @@
+def f(input):
+ for x in input:
+ yield x
+@@;
+
+$s2 = @@
+class Iter:
+ def __init__(self, input):
+ self.input = input
+
+ def __next__(self):
+ return next(self.input)
+@@;
+
+$s3 = @@
+class CallableIter:
+ def __init__(self, input):
+ self.input = input
+
+ def __call__(self):
+ def f(input):
+ for x in input:
+ yield x
+
+ return f(self.input)
+@@;
+
+$s4 = @@
+class Iterable:
+ def __init__(self, input):
+ self.input = input
+
+ def __iter__(self):
+ return iter(self.input)
+@@;
+
+$f1 = Python3::f(Callable<(Stream<Int32>)->Stream<Int32>>, $s1);
+
+$f2 = Python3::Iter(Callable<(Stream<Int32>)->Stream<Int32>>, $s2);
+
+$f3 = Python3::CallableIter(Callable<(Stream<Int32>)->Stream<Int32>>, $s3);
+
+$f4 = Python3::Iterable(Callable<(Stream<Int32>)->Stream<Int32>>, $s4);
+
+$g = ($stream)->{
+ return $stream;
+};
+
+select Yql::Collect($g(Yql::Iterator($x, Yql::DependsOn("A1"))));
+
+select Yql::Collect($f1(Yql::Iterator($x, Yql::DependsOn("A2"))));
+
+select Yql::Collect($f2(Yql::Iterator($x, Yql::DependsOn("A3"))));
+
+select Yql::Collect($f3(Yql::Iterator($x, Yql::DependsOn("A4"))));
+
+select Yql::Collect($f4(Yql::Iterator($x, Yql::DependsOn("A5"))));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B1")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $g));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B2")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f1));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B3")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f2));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B4")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f3));
+
+select Yql::Collect(Yql::Switch(
+ Yql::Iterator($x, Yql::DependsOn("B5")),
+ AsAtom('0'),
+ AsTuple(AsAtom('0')),
+ $f4));
diff --git a/yql/essentials/udfs/common/python/python3_small/test/ya.make b/yql/essentials/udfs/common/python/python3_small/test/ya.make
new file mode 100644
index 0000000000..ac03d94668
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/test/ya.make
@@ -0,0 +1,10 @@
+YQL_UDF_TEST_CONTRIB()
+
+TIMEOUT(300)
+SIZE(MEDIUM)
+
+DEPENDS(
+ yql/essentials/udfs/common/python/python3_small
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/python3_small/ya.make b/yql/essentials/udfs/common/python/python3_small/ya.make
new file mode 100644
index 0000000000..f815fa8d75
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python3_small/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(python3_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME Python3
+ RESOURCE_NAME Python3
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h
new file mode 100644
index 0000000000..a4e393b486
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_value.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_builder.h>
+#include <yql/essentials/public/udf/udf_registrator.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+#include <yql/essentials/udfs/common/python/bindings/py_ptr.h>
+#include <yql/essentials/udfs/common/python/bindings/py_callable.h>
+#include <yql/essentials/udfs/common/python/bindings/py_cast.h>
+#include <yql/essentials/udfs/common/python/bindings/py_errors.h>
+#include <yql/essentials/udfs/common/python/bindings/py_gil.h>
+#include <yql/essentials/udfs/common/python/bindings/py_utils.h>
+#include <yql/essentials/udfs/common/python/bindings/py_yql_module.h>
+
+#include <util/generic/yexception.h>
+#include <util/stream/str.h>
+#include <util/stream/printf.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+using namespace NYql::NUdf;
+using namespace NPython;
+
+//////////////////////////////////////////////////////////////////////////////
+// TPythonFunctionFactory
+//////////////////////////////////////////////////////////////////////////////
+class TPythonFunctionFactory: public TBoxedValue
+{
+public:
+ TPythonFunctionFactory(
+ const TStringRef& name,
+ const TStringRef& tag,
+ const TType* functionType,
+ ITypeInfoHelper::TPtr&& helper,
+ const NYql::NUdf::TSourcePosition& pos)
+ : Ctx(new TPyContext(helper, tag, pos))
+ , FunctionName(name)
+ , FunctionType_(functionType)
+ {
+ }
+
+ ~TPythonFunctionFactory() {
+ Ctx->Cleanup();
+ PyCleanup();
+ }
+
+private:
+ TUnboxedValue Run(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValuePod* args) const override
+ {
+ TPyCastContext::TPtr castCtx = MakeIntrusive<TPyCastContext>(valueBuilder, Ctx);
+
+ // for get propper c-compatible null-terminating string
+ TString source(args[0].AsStringRef());
+
+ TPyGilLocker lock;
+ TPyObjectPtr module = CompileModule(FunctionName, source);
+ if (!module) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to compile module: " << GetLastErrorAsString()).data());
+ }
+
+ TPyObjectPtr function(PyObject_GetAttrString(module.Get(), FunctionName.data()));
+ if (!function) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to find entry point: " << GetLastErrorAsString()).data());
+ }
+
+ if (!PyCallable_Check(function.Get())) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Entry point is not a callable").data());
+ }
+
+ try {
+ SetupCallableSettings(castCtx, function.Get());
+ } catch (const yexception& e) {
+ UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to setup callable settings: "
+ << e.what()).data());
+ }
+ return FromPyCallable(castCtx, FunctionType_, function.Release());
+ }
+
+ static TPyObjectPtr CompileModule(const TString& name, const TString& source) {
+ unsigned int moduleNum = AtomicCounter++;
+ TString filename(TStringBuf("embedded:"));
+ filename += name;
+
+ TPyObjectPtr module, code;
+ if (HasEncodingCookie(source)) {
+ code.ResetSteal(Py_CompileString(source.data(), filename.data(), Py_file_input));
+ } else {
+ PyCompilerFlags cflags;
+ cflags.cf_flags = PyCF_SOURCE_IS_UTF8;
+
+ code.ResetSteal(Py_CompileStringFlags(
+ source.data(), filename.data(), Py_file_input, &cflags));
+ }
+
+ if (code) {
+ TString nameWithNum = name + ToString(moduleNum);
+ char* moduleName = const_cast<char*>(nameWithNum.data());
+ module.ResetSteal(PyImport_ExecCodeModule(moduleName, code.Get()));
+ }
+
+ return module;
+ }
+
+ const TPyContext::TPtr Ctx;
+ const TString FunctionName;
+ const TType* FunctionType_;
+ inline static std::atomic_uint AtomicCounter = 0;
+};
diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp
new file mode 100644
index 0000000000..b1739a1775
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp
@@ -0,0 +1,232 @@
+#include "python_udf.h"
+#include "python_function_factory.h"
+
+#include <yql/essentials/public/udf/udf_version.h>
+#include <yql/essentials/udfs/common/python/bindings/py_utils.h>
+
+#include <util/generic/vector.h>
+#include <util/system/execpath.h>
+
+namespace {
+
+#if PY_MAJOR_VERSION >= 3
+#define PYTHON_PROGRAMM_NAME L"YQL::Python3"
+#else
+#define PYTHON_PROGRAMM_NAME "YQL::Python2"
+#endif
+
+int AddToPythonPath(const TVector<TStringBuf>& pathVals)
+{
+ char pathVar[] = "path"; // PySys_{Get,Set}Object take a non-const char* arg
+
+ TPyObjectPtr sysPath(PySys_GetObject(pathVar), TPyObjectPtr::ADD_REF);
+ if (!sysPath) return -1;
+
+ for (const auto& val: pathVals) {
+ TPyObjectPtr pyStr = PyRepr(val.data());
+ int rc = PyList_Append(sysPath.Get(), pyStr.Get());
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ return PySys_SetObject(pathVar, sysPath.Get());
+}
+
+void InitArcadiaPythonRuntime()
+{
+ // Arcadia static python import hook resides in __res module
+ // It modifies sys.meta_path upon import
+
+ TPyObjectPtr mod(PyImport_ImportModule("__res"));
+ Y_ABORT_UNLESS(mod, "Can't import arcadia python runtime");
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TPythonModule
+//////////////////////////////////////////////////////////////////////////////
+class TPythonModule: public IUdfModule
+{
+public:
+ TPythonModule(const TString& resourceName, EPythonFlavor pythonFlavor, bool standalone = true)
+ : ResourceName(resourceName), Standalone(standalone)
+ {
+ if (Standalone) {
+ Py_SetProgramName(PYTHON_PROGRAMM_NAME);
+ PrepareYqlModule();
+ Py_Initialize();
+ }
+
+ InitYqlModule(pythonFlavor, standalone);
+
+ const auto rc = PyRun_SimpleString(R"(
+# numpy on import may find installed openblas library and load it,
+# which in turn causes it to start CPUCOUNT threads
+# with approx. 40Mb memory reserved for each thread;
+#
+# See more detailed explanation here: https://st.yandex-team.ru/STATLIBS-1715#5bfc68ecbbc039001cec572a
+#
+# Thus, we reduce negative effects as much as possible
+import os
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+
+
+# Following part allows us later to format tracebacks via sys.excepthook
+# in thread-safe manner
+import sys
+import threading
+if sys.version_info >= (3, 0):
+ from io import StringIO, TextIOWrapper as SysStderrType
+else:
+ from cStringIO import StringIO
+ SysStderrType = file
+
+class StderrLocal(threading.local):
+
+ def __init__(self):
+ self.is_real_mode = True
+ self.buffer = StringIO()
+
+
+class StderrProxy(object):
+ def __init__(self, stderr):
+ self._stderr = stderr
+ self._tls = StderrLocal()
+
+ def _toggle_real_mode(self):
+ self._tls.is_real_mode = not self._tls.is_real_mode
+ if not self._tls.is_real_mode:
+ self._tls.buffer.clear()
+
+ def _get_value(self):
+ assert not self._tls.is_real_mode
+ return self._tls.buffer.getvalue()
+
+ def __getattr__(self, attr):
+ target = self._stderr
+ if not self._tls.is_real_mode:
+ target = self._tls.buffer
+
+ return getattr(target, attr)
+
+if isinstance(sys.stderr, SysStderrType):
+ sys.stderr = StderrProxy(sys.stderr)
+)");
+ Y_ABORT_UNLESS(rc >= 0, "Can't setup module");
+
+ if (pythonFlavor == EPythonFlavor::Arcadia) {
+ InitArcadiaPythonRuntime();
+ }
+
+#ifndef _win_
+ if (Standalone) {
+ TVector<TStringBuf> paths;
+ if (pythonFlavor == EPythonFlavor::System) {
+ paths.push_back(TStringBuf("/usr/lib/python2.7/dist-packages"));
+ }
+ paths.push_back(TStringBuf("."));
+ const auto r = AddToPythonPath(paths);
+ Y_ABORT_UNLESS(r >= 0, "Can't add dist-packages into sys.path");
+ }
+#endif
+
+ char executableVar[] = "executable"; // PySys_{Get,Set}Object take a non-const char* arg
+ TPyObjectPtr pyExecutableStr = PyRepr(GetExecPath().data());
+ Y_ABORT_UNLESS(PySys_SetObject(executableVar, pyExecutableStr.Get()) >= 0, "Can't set sys.executable");
+
+ if (Standalone) {
+ PyEval_InitThreads();
+ MainThreadState_ = PyEval_SaveThread();
+ }
+ }
+
+ ~TPythonModule() {
+ if (Standalone) {
+ PyEval_RestoreThread(MainThreadState_);
+ Py_Finalize();
+ }
+ }
+
+ void CleanupOnTerminate() const final {
+ PyCleanup();
+ }
+
+ void GetAllFunctions(IFunctionsSink&) const final {}
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final
+ {
+ Y_UNUSED(typeConfig);
+
+ if (flags & TFlags::TypesOnly) {
+ return;
+ }
+
+ try {
+ auto typeHelper = builder.TypeInfoHelper();
+ if (ETypeKind::Callable != typeHelper->GetTypeKind(userType)) {
+ return builder.SetError(TStringRef::Of("Expected callable type"));
+ }
+
+ const auto pos = builder.GetSourcePosition();
+ builder.Implementation(new TPythonFunctionFactory(name, ResourceName, userType, std::move(typeHelper), pos));
+ } catch (const yexception& e) {
+ builder.SetError(TStringBuf(e.what()));
+ }
+ }
+
+private:
+ TString ResourceName;
+ bool Standalone;
+ PyThreadState* MainThreadState_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// TStubModule
+//////////////////////////////////////////////////////////////////////////////
+class TStubModule: public IUdfModule {
+ void GetAllFunctions(IFunctionsSink&) const final {}
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& /*name*/,
+ TType* /*userType*/,
+ const TStringRef& /*typeConfig*/,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& /*builder*/) const final
+ {
+ Y_DEBUG_ABORT_UNLESS(flags & TFlags::TypesOnly,
+ "in stub module this function can be called only for types loading");
+ }
+
+ void CleanupOnTerminate() const final {}
+};
+
+} // namespace
+
+void NKikimr::NUdf::RegisterYqlPythonUdf(
+ IRegistrator& registrator,
+ ui32 flags,
+ TStringBuf moduleName,
+ TStringBuf resourceName,
+ EPythonFlavor pythonFlavor)
+{
+ if (flags & IRegistrator::TFlags::TypesOnly) {
+ registrator.AddModule(moduleName, new TStubModule);
+ } else {
+ registrator.AddModule(
+ moduleName,
+ NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true)
+ );
+ }
+}
+
+TUniquePtr<NKikimr::NUdf::IUdfModule> NKikimr::NUdf::GetYqlPythonUdfModule(
+ TStringBuf resourceName, NKikimr::NUdf::EPythonFlavor pythonFlavor,
+ bool standalone
+) {
+ return new TPythonModule(TString(resourceName), pythonFlavor, standalone);
+}
diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h
new file mode 100644
index 0000000000..16d7da096d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <yql/essentials/public/udf/udf_registrator.h>
+
+namespace NYql {
+namespace NUdf {
+
+enum class EPythonFlavor {
+ System,
+ Arcadia,
+};
+
+void RegisterYqlPythonUdf(
+ IRegistrator& registrator,
+ ui32 flags,
+ TStringBuf moduleName,
+ TStringBuf resourceName,
+ EPythonFlavor pythonFlavor);
+
+TUniquePtr<IUdfModule> GetYqlPythonUdfModule(
+ TStringBuf resourceName,
+ EPythonFlavor pythonFlavor,
+ bool standalone);
+
+} // namespace NUdf
+} // namespace NYql
diff --git a/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports
new file mode 100644
index 0000000000..2ffd6f54b5
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports
@@ -0,0 +1,5 @@
+C Register
+C AbiVersion
+C RunPython
+C BindSymbols
+C SetBackTraceCallback
diff --git a/yql/essentials/udfs/common/python/python_udf/ya.make b/yql/essentials/udfs/common/python/python_udf/ya.make
new file mode 100644
index 0000000000..9a2090665a
--- /dev/null
+++ b/yql/essentials/udfs/common/python/python_udf/ya.make
@@ -0,0 +1,20 @@
+PY23_NATIVE_LIBRARY()
+
+YQL_ABI_VERSION(2 27 0)
+
+SRCS(
+ python_udf.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf
+ yql/essentials/udfs/common/python/bindings
+)
+
+CFLAGS(
+ -DDISABLE_PYDEBUG
+)
+
+NO_COMPILER_WARNINGS()
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/README.MD b/yql/essentials/udfs/common/python/system_python/README.MD
new file mode 100644
index 0000000000..16d46fd51d
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/README.MD
@@ -0,0 +1,7 @@
+python3_N folders here are mirrors of python3_small, adjusted for system python (Name Python3 -> SystemPython3_N, LDFLAGS(-lpython3.N))
+
+They are supposed to be built with local python: `ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.N -DPYTHON_CONFIG=python3.N-config python3.N`
+
+One way to get all pythons on the same machine is `sudo add-apt-repository ppa:deadsnakes/ppa` and `sudo apt install python3.N-dev`
+
+Use build_system_python_udfs.sh to build all python udfs with system pythons(local pythons)
diff --git a/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh
new file mode 100755
index 0000000000..8dd2245230
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -eux
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.8 -DPYTHON_CONFIG=python3.8-config python3_8
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.9 -DPYTHON_CONFIG=python3.9-config python3_9
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.10 -DPYTHON_CONFIG=python3.10-config python3_10
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.11 -DPYTHON_CONFIG=python3.11-config python3_11
+ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.12 -DPYTHON_CONFIG=python3.12-config python3_12
diff --git a/yql/essentials/udfs/common/python/system_python/python3_10/ya.make b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make
new file mode 100644
index 0000000000..12068a33a1
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_10_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_10
+ RESOURCE_NAME SystemPython3_10
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.10")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_11/ya.make b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make
new file mode 100644
index 0000000000..483432b9b9
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_11_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_11
+ RESOURCE_NAME SystemPython3_11
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.11")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_12/ya.make b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make
new file mode 100644
index 0000000000..8220fda0ea
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_12_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_12
+ RESOURCE_NAME SystemPython3_12
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.12")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_8/ya.make b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make
new file mode 100644
index 0000000000..df447bacb4
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_8_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_8
+ RESOURCE_NAME SystemPython3_8
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.8")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/python3_9/ya.make b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make
new file mode 100644
index 0000000000..ea3e5d849e
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make
@@ -0,0 +1,16 @@
+YQL_PYTHON3_UDF(systempython3_9_udf)
+
+REGISTER_YQL_PYTHON_UDF(
+ NAME SystemPython3_9
+ RESOURCE_NAME SystemPython3_9
+)
+
+IF (USE_LOCAL_PYTHON)
+ LDFLAGS("-lpython3.9")
+ENDIF()
+
+PEERDIR(
+ yql/essentials/public/udf
+)
+
+END()
diff --git a/yql/essentials/udfs/common/python/system_python/ya.make b/yql/essentials/udfs/common/python/system_python/ya.make
new file mode 100644
index 0000000000..3afc7796bd
--- /dev/null
+++ b/yql/essentials/udfs/common/python/system_python/ya.make
@@ -0,0 +1,7 @@
+RECURSE(
+ python3_8
+ python3_9
+ python3_10
+ python3_11
+ python3_12
+)
diff --git a/yql/essentials/udfs/common/python/ya.make b/yql/essentials/udfs/common/python/ya.make
new file mode 100644
index 0000000000..bb6a4c8d5b
--- /dev/null
+++ b/yql/essentials/udfs/common/python/ya.make
@@ -0,0 +1,10 @@
+# This module should not be exported under CMake since it requires Python build
+NO_BUILD_IF(STRICT EXPORT_CMAKE)
+
+RECURSE(
+ bindings
+ main_py3
+ python3_small
+ python_udf
+ system_python
+)