aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common/python/bindings/py_cast.cpp
diff options
context:
space:
mode:
authorimunkin <imunkin@yandex-team.com>2024-11-08 10:00:23 +0300
committerimunkin <imunkin@yandex-team.com>2024-11-08 10:12:13 +0300
commita784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch)
tree05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs/common/python/bindings/py_cast.cpp
parentd70137a7b530ccaa52834274913bbb5a3d1ca06e (diff)
downloadydb-a784a2f943d6e15caa6241e2e96d80aac6dbf375.tar.gz
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories: * clickhouse/client * datetime * knn * roaring commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs/common/python/bindings/py_cast.cpp')
-rw-r--r--yql/essentials/udfs/common/python/bindings/py_cast.cpp955
1 files changed, 955 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp
new file mode 100644
index 0000000000..3aa5537b21
--- /dev/null
+++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp
@@ -0,0 +1,955 @@
+#include "py_cast.h"
+#include "py_ptr.h"
+#include "py_errors.h"
+#include "py_callable.h"
+#include "py_dict.h"
+#include "py_list.h"
+#include "py_gil.h"
+#include "py_utils.h"
+#include "py_void.h"
+#include "py_resource.h"
+#include "py_stream.h"
+#include "py_struct.h"
+#include "py_tuple.h"
+#include "py_variant.h"
+#include "py_decimal.h"
+
+#include <yql/essentials/public/udf/udf_value_builder.h>
+#include <yql/essentials/public/udf/udf_type_inspection.h>
+#include <yql/essentials/public/udf/udf_type_printer.h>
+#include <yql/essentials/public/udf/udf_terminator.h>
+#include <yql/essentials/utils/utf8.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/string/join.h>
+#include <util/string/builder.h>
+
+#ifdef HAVE_LONG_LONG
+# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask
+# define YQL_PyLong_Asi64 PyLong_AsLongLong
+# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong
+#else
+# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask
+# define YQL_PyLong_Asi64 PyLong_AsLong
+# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong
+#endif
+
+#define TO_PYTHON(Format, Type) \
+ template <> \
+ ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \
+ return Py_BuildValue(Format, value); \
+ }
+
+#define TO_PYTHON_BYTES(Type) \
+ template <> \
+ ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \
+ TStringBuf value = val; \
+ if (value.data() == nullptr) \
+ Py_RETURN_NONE; \
+ const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
+ return PyBytes_FromStringAndSize(value.data(), size); \
+ }
+
+#define TO_PYTHON_UNICODE(Type) \
+ template <> \
+ ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \
+ TStringBuf value = val; \
+ if (value.data() == nullptr) \
+ Py_RETURN_NONE; \
+ Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \
+ return PyUnicode_FromStringAndSize(value.data(), size); \
+ }
+
+#define PY_ENSURE_TYPE(Type, Value, Message) \
+ do { \
+ if (!Py##Type##_Check(Value)) { \
+ throw yexception() << Message << " " #Type "; Object repr: " \
+ << PyObjectRepr(Value); \
+ } \
+ } while (0)
+
+#define FROM_PYTHON_FLOAT(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ double result = PyFloat_AsDouble(value); \
+ if (result == -1.0 && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Float"); \
+ } \
+ return static_cast<Type>(result); \
+ }
+
+#define FROM_PYTHON_LONG(Type, BigType) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } \
+ ThrowCastTypeException(value, "Long"); \
+ }
+
+#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyInt_Check(value)) { \
+ long result = PyInt_AsLong(value); \
+ if (result == -1L && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if ( \
+ static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \
+ static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \
+ ) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } else if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ ThrowCastException(value, "Long"); \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for " << #Type; \
+ } \
+ return static_cast<Type>(result); \
+ } \
+ ThrowCastTypeException(value, "Long"); \
+ }
+
+#define FROM_PYTHON_BYTES_OR_UTF(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ if (PyUnicode_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
+ if (!str || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ } else if (PyBytes_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ char *str = nullptr; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ } \
+ ThrowCastTypeException(value, "String"); \
+ }
+
+#define FROM_PYTHON_BYTES(Type) \
+ template <> \
+ Type PyCast<Type>(PyObject* value) { \
+ PY_ENSURE_TYPE(Bytes, value, "Expected"); \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ return Type(str, size_t(size)); \
+ }
+
+#define TRY_FROM_PYTHON_FLOAT(Type) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& result) { \
+ double v = PyFloat_AsDouble(value); \
+ if (v == -1.0 && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ result = static_cast<Type>(v); \
+ return true; \
+ }
+
+#define TRY_FROM_PYTHON_LONG(Type, BigType) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& res) { \
+ if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \
+ template <> \
+ bool TryPyCast<Type>(PyObject* value, Type& res) { \
+ if (PyInt_Check(value)) { \
+ long result = PyInt_AsLong(value); \
+ if (result == -1L && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \
+ return false; \
+ } \
+ return true; \
+ } else if (PyLong_Check(value)) { \
+ auto result = YQL_PyLong_As##BigType(value); \
+ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \
+ PyErr_Clear(); \
+ return false; \
+ } \
+ if (result < Min<Type>() || result > Max<Type>()) { \
+ return false; \
+ } \
+ res = static_cast<Type>(result); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \
+ template <> \
+ bool TryPyCast(PyObject* value, Type& result) { \
+ if (PyUnicode_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ const auto str = PyUnicode_AsUTF8AndSize(value, &size); \
+ if (!str || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else if (PyBytes_Check(value)) { \
+ Py_ssize_t size = 0U; \
+ char *str = nullptr; \
+ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ ThrowCastTypeException(value, "String"); \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } \
+ return false; \
+ }
+
+#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \
+ template <> \
+ bool TryPyCast(PyObject* value, Type& result) { \
+ if (PyUnicode_Check(value)) { \
+ const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \
+ if (rc == -1 || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else if (PyBytes_Check(value)) { \
+ char* str = nullptr; \
+ Py_ssize_t size = 0; \
+ int rc = PyBytes_AsStringAndSize(value, &str, &size); \
+ if (rc == -1 || size < 0) { \
+ return false; \
+ } \
+ result = Type(str, size_t(size)); \
+ return true; \
+ } else { \
+ return false; \
+ } \
+ }
+
+namespace NPython {
+
+using namespace NKikimr;
+
+inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) {
+ throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType
+ << "; Object repr: " << PyObjectRepr(value);
+}
+
+inline void ThrowCastException(PyObject* value, TStringBuf toType) {
+ throw yexception() << "Cast error object " << PyObjectRepr(value) << " to " << toType << ": "
+ << GetLastErrorAsString();
+}
+
+
+template <>
+bool TryPyCast<bool>(PyObject* value, bool& result)
+{
+ int isTrue = PyObject_IsTrue(value);
+ if (isTrue == -1) {
+ return false;
+ }
+ result = (isTrue == 1);
+ return true;
+}
+
+#if PY_MAJOR_VERSION >= 3
+TRY_FROM_PYTHON_LONG(i8, i64)
+TRY_FROM_PYTHON_LONG(ui8, ui64)
+TRY_FROM_PYTHON_LONG(i16, i64)
+TRY_FROM_PYTHON_LONG(ui16, ui64)
+TRY_FROM_PYTHON_LONG(i32, i64)
+TRY_FROM_PYTHON_LONG(ui32, ui64)
+TRY_FROM_PYTHON_LONG(i64, i64)
+TRY_FROM_PYTHON_LONG(ui64, ui64)
+TRY_FROM_PYTHON_BYTES_OR_UTF(TString)
+TRY_FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
+#else
+TRY_FROM_PYTHON_INT_OR_LONG(i8, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui8, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i16, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui16, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i32, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui32, ui64)
+TRY_FROM_PYTHON_INT_OR_LONG(i64, i64)
+TRY_FROM_PYTHON_INT_OR_LONG(ui64, ui64)
+TRY_FROM_PYTHON_STR_OR_UTF(TString)
+TRY_FROM_PYTHON_STR_OR_UTF(NUdf::TStringRef)
+#endif
+
+TRY_FROM_PYTHON_FLOAT(float)
+TRY_FROM_PYTHON_FLOAT(double)
+
+template <>
+bool PyCast<bool>(PyObject* value)
+{
+ int res = PyObject_IsTrue(value);
+ if (res == -1) {
+ throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to bool. "
+ << GetLastErrorAsString();
+ }
+ return res == 1;
+}
+
+#if PY_MAJOR_VERSION >= 3
+FROM_PYTHON_LONG(i8, i64)
+FROM_PYTHON_LONG(ui8, ui64)
+FROM_PYTHON_LONG(i16, i64)
+FROM_PYTHON_LONG(ui16, ui64)
+FROM_PYTHON_LONG(i32, i64)
+FROM_PYTHON_LONG(ui32, ui64)
+FROM_PYTHON_LONG(i64, i64)
+FROM_PYTHON_LONG(ui64, ui64)
+FROM_PYTHON_BYTES_OR_UTF(TString)
+FROM_PYTHON_BYTES_OR_UTF(TStringBuf)
+FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef)
+#else
+FROM_PYTHON_INT_OR_LONG(i8, i64)
+FROM_PYTHON_INT_OR_LONG(ui8, ui64)
+FROM_PYTHON_INT_OR_LONG(i16, i64)
+FROM_PYTHON_INT_OR_LONG(ui16, ui64)
+FROM_PYTHON_INT_OR_LONG(i32, i64)
+FROM_PYTHON_INT_OR_LONG(ui32, ui64)
+FROM_PYTHON_INT_OR_LONG(i64, i64)
+FROM_PYTHON_INT_OR_LONG(ui64, ui64)
+FROM_PYTHON_BYTES(TString)
+FROM_PYTHON_BYTES(TStringBuf)
+FROM_PYTHON_BYTES(NUdf::TStringRef)
+#endif
+
+FROM_PYTHON_FLOAT(float)
+FROM_PYTHON_FLOAT(double)
+
+template <>
+TPyObjectPtr PyCast<bool>(bool value)
+{
+ PyObject* res = value ? Py_True : Py_False;
+ return TPyObjectPtr(res, TPyObjectPtr::ADD_REF);
+}
+
+TO_PYTHON("b", i8)
+TO_PYTHON("B", ui8)
+TO_PYTHON("h", i16)
+TO_PYTHON("H", ui16)
+TO_PYTHON("i", i32)
+TO_PYTHON("I", ui32)
+#ifdef HAVE_LONG_LONG
+TO_PYTHON("L", i64)
+TO_PYTHON("K", ui64)
+#else
+TO_PYTHON("l", i64)
+TO_PYTHON("k", ui64)
+#endif
+
+TO_PYTHON_BYTES(TString)
+TO_PYTHON_BYTES(TStringBuf)
+TO_PYTHON_BYTES(NUdf::TStringRef)
+TO_PYTHON_UNICODE(TString)
+TO_PYTHON_UNICODE(TStringBuf)
+TO_PYTHON_UNICODE(NUdf::TStringRef)
+
+template <typename T>
+NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) {
+ PY_ENSURE(PyTuple_Check(value),
+ "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name);
+
+ Py_ssize_t tupleSize = PyTuple_GET_SIZE(value);
+ PY_ENSURE(tupleSize == 2,
+ "Expected to get Tuple with 2 elements, but got "
+ << tupleSize << " elements");
+
+ PyObject* el0 = PyTuple_GET_ITEM(value, 0);
+ PyObject* el1 = PyTuple_GET_ITEM(value, 1);
+ auto num = PyCast<T>(el0);
+ if (num >= limit) {
+ throw yexception() << "Python object " << PyObjectRepr(el0) \
+ << " is out of range for " << typeName;
+ }
+
+ auto name = PyCast<NUdf::TStringRef>(el1);
+ auto ret = NUdf::TUnboxedValuePod(num);
+ ui32 tzId;
+ if (!ctx->ValueBuilder->GetDateBuilder().FindTimezoneId(name, tzId)) {
+ throw yexception() << "Unknown timezone: " << TStringBuf(name);
+ }
+
+ ret.SetTimezoneId(tzId);
+ return ret;
+}
+
+TO_PYTHON("f", float)
+TO_PYTHON("d", double)
+
+namespace {
+
+TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto typeId = inspector.GetTypeId();
+
+ switch (typeId) {
+ case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>());
+ case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>());
+ case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>());
+ case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>());
+ case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>());
+ case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>());
+ case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>());
+ case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>());
+ case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>());
+ case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>());
+ case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>());
+ case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
+ case NUdf::TDataType<const char*>::Id: {
+ if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) {
+ return PyCast<NUdf::TStringRef>(value.AsStringRef());
+ } else {
+ auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
+ if (!pyObj) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos <<
+ "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" <<
+ GetLastErrorAsString()).data()
+ );
+ }
+ return pyObj;
+ }
+ }
+ case NUdf::TDataType<NUdf::TYson>::Id: {
+ auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef());
+ if (ctx->YsonConverterIn) {
+ TPyObjectPtr pyArgs(PyTuple_New(1));
+ PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release());
+ pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get());
+ if (!pyObj) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+ }
+
+ return pyObj;
+ }
+ case NUdf::TDataType<NUdf::TUuid>::Id:
+ return PyCast<NUdf::TStringRef>(value.AsStringRef());
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ case NUdf::TDataType<NUdf::TUtf8>::Id:
+ return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef());
+ case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>());
+ case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>());
+ case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>());
+ case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>());
+ case NUdf::TDataType<NUdf::TTzDate>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ case NUdf::TDataType<NUdf::TTzDatetime>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ case NUdf::TDataType<NUdf::TTzTimestamp>::Id: {
+ TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>());
+ auto tzId = value.GetTimezoneId();
+ auto tzName = ctx->GetTimezoneName(tzId);
+ return PyTuple_Pack(2, pyValue.Get(), tzName.Get());
+ }
+ }
+
+ throw yexception()
+ << "Unsupported type " << typeId;
+}
+
+NUdf::TUnboxedValue FromPyData(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto typeId = inspector.GetTypeId();
+
+ switch (typeId) {
+ case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value));
+ case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value));
+ case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value));
+ case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value));
+ case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value));
+ case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value));
+ case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value));
+ case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value));
+ case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value));
+ case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value));
+ case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value));
+ case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale());
+ case NUdf::TDataType<NUdf::TYson>::Id: {
+ if (ctx->YsonConverterOut) {
+ TPyObjectPtr input(value, TPyObjectPtr::ADD_REF);
+ TPyObjectPtr pyArgs(PyTuple_New(1));
+ // PyTuple_SET_ITEM steals reference, so pass ownership to it
+ PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release());
+ input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get()));
+ if (!input) {
+ UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data());
+ }
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get()));
+ }
+ }
+#if PY_MAJOR_VERSION >= 3
+ case NUdf::TDataType<const char*>::Id:
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ case NUdf::TDataType<NUdf::TUtf8>::Id:
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ if (PyUnicode_Check(value)) {
+ const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value));
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get()));
+ }
+ throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode";
+#else
+ case NUdf::TDataType<const char*>::Id:
+ case NUdf::TDataType<NUdf::TJson>::Id:
+ case NUdf::TDataType<NUdf::TUtf8>::Id: {
+ if (PyUnicode_Check(value)) {
+ const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value));
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get()));
+ }
+
+ if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) &&
+ PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) {
+ throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json";
+ }
+
+ return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ }
+#endif
+ case NUdf::TDataType<NUdf::TUuid>::Id: {
+ const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value));
+ if (ret.AsStringRef().Size() != 16) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " has invalid value for Uuid";
+ }
+
+ return ret;
+ }
+ case NUdf::TDataType<NUdf::TDate>::Id: {
+ auto num = PyCast<ui16>(value);
+ if (num >= NUdf::MAX_DATE) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Date";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TDatetime>::Id: {
+ auto num = PyCast<ui32>(value);
+ if (num >= NUdf::MAX_DATETIME) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Datetime";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TTimestamp>::Id: {
+ auto num = PyCast<ui64>(value);
+ if (num >= NUdf::MAX_TIMESTAMP) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Timestamp";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TInterval>::Id: {
+ auto num = PyCast<i64>(value);
+ if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) {
+ throw yexception() << "Python object " << PyObjectRepr(value) \
+ << " is out of range for Interval";
+ }
+
+ return NUdf::TUnboxedValuePod(num);
+ }
+
+ case NUdf::TDataType<NUdf::TTzDate>::Id:
+ return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx);
+ case NUdf::TDataType<NUdf::TTzDatetime>::Id:
+ return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx);
+ case NUdf::TDataType<NUdf::TTzTimestamp>::Id:
+ return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx);
+ }
+
+ throw yexception()
+ << "Unsupported type " << typeId;
+}
+
+TPyObjectPtr ToPyList(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto itemType = inspector.GetItemType();
+
+ if (ctx->LazyInputObjects) {
+ return ToPyLazyList(ctx, itemType, value);
+ }
+
+ TPyObjectPtr list(PyList_New(0));
+ const auto iterator = value.GetListIterator();
+ for (NUdf::TUnboxedValue item; iterator.Next(item);) {
+ auto pyItem = ToPyObject(ctx, itemType, item);
+ if (PyList_Append(list.Get(), pyItem.Get()) < 0) {
+ throw yexception() << "Can't append item to list"
+ << GetLastErrorAsString();
+ }
+ }
+
+ return list;
+}
+
+NUdf::TUnboxedValue FromPyList(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+
+ if (PyList_Check(value)) {
+ // eager list to list conversion
+ auto itemType = inspector.GetItemType();
+ Py_ssize_t cnt = PyList_GET_SIZE(value);
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto list = ctx->ValueBuilder->NewArray(cnt, items);
+ for (Py_ssize_t i = 0; i < cnt; ++i) {
+ PyObject *item = PyList_GET_ITEM(value, i);
+ *items++ = FromPyObject(ctx, itemType, item);
+ }
+ return list;
+ }
+
+ if (PyTuple_Check(value)) {
+ // eager tuple to list conversion
+ auto itemType = inspector.GetItemType();
+ Py_ssize_t cnt = PyTuple_GET_SIZE(value);
+ NUdf::TUnboxedValue *items = nullptr;
+ const auto list = ctx->ValueBuilder->NewArray(cnt, items);
+ for (Py_ssize_t i = 0; i < cnt; ++i) {
+ PyObject *item = PyTuple_GET_ITEM(value, i);
+ *items++ = FromPyObject(ctx, itemType, item);
+ }
+ return list;
+ }
+
+ if (PyGen_Check(value)) {
+ TPyObjectPtr valuePtr(PyObject_GetIter(value));
+ return FromPyLazyIterator(ctx, type, std::move(valuePtr));
+ }
+
+ if (PyIter_Check(value)
+#if PY_MAJOR_VERSION < 3
+ // python 2 iterators must also implement "next" method
+ && 1 == PyObject_HasAttrString(value, "next")
+#endif
+ ) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyIterator(ctx, type, std::move(valuePtr));
+ }
+
+ // assume that this function will returns generator
+ if (PyCallable_Check(value)) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyGenerator(ctx, type, std::move(valuePtr));
+ }
+
+ if (PySequence_Check(value) || PyObject_HasAttrString(value, "__iter__")) {
+ TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF);
+ return FromPyLazyIterable(ctx, type, std::move(valuePtr));
+ }
+
+ throw yexception() << "Expected list, tuple, generator, generator factory, "
+ "iterator or iterable object, but got: " << PyObjectRepr(value);
+}
+
+TPyObjectPtr ToPyOptional(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ if (!value) {
+ return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF);
+ }
+
+ const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ return ToPyObject(ctx, inspector.GetItemType(), value);
+}
+
+NUdf::TUnboxedValue FromPyOptional(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ if (value == Py_None) {
+ return NUdf::TUnboxedValue();
+ }
+
+ const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ return FromPyObject(ctx, inspector.GetItemType(), value).Release().MakeOptional();
+}
+
+TPyObjectPtr ToPyDict(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod& value)
+{
+ const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto keyType = inspector.GetKeyType();
+ const auto valueType = inspector.GetValueType();
+
+ if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
+ if (ctx->LazyInputObjects) { // TODO
+ return ToPyLazySet(ctx, keyType, value);
+ }
+
+ const TPyObjectPtr set(PyFrozenSet_New(nullptr));
+ const auto iterator = value.GetKeysIterator();
+ for (NUdf::TUnboxedValue key; iterator.Next(key);) {
+ auto pyKey = ToPyObject(ctx, keyType, key);
+ if (PySet_Add(set.Get(), pyKey.Get()) < 0) {
+ throw yexception() << "Can't add item to set" << GetLastErrorAsString();
+ }
+ }
+
+ return set;
+ } else {
+ if (ctx->LazyInputObjects) {
+ return ToPyLazyDict(ctx, keyType, valueType, value);
+ }
+
+ const TPyObjectPtr dict(PyDict_New());
+ const auto iterator = value.GetDictIterator();
+ for (NUdf::TUnboxedValue key, valueObj; iterator.NextPair(key, valueObj);) {
+ auto pyKey = ToPyObject(ctx, keyType, key);
+ auto pyValue = ToPyObject(ctx, valueType, valueObj);
+ if (PyDict_SetItem(dict.Get(), pyKey.Get(), pyValue.Get()) < 0) {
+ throw yexception() << "Can't add item to dict" << GetLastErrorAsString();
+ }
+ }
+
+ return dict;
+ }
+}
+
+NUdf::TUnboxedValue FromPyDict(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type);
+ const auto keyType = inspector.GetKeyType();
+ const auto valueType = inspector.GetValueType();
+
+ if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType)
+ && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) {
+ const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType);
+ if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) {
+ return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value);
+ }
+ } else if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) {
+ if (PyAnySet_Check(value)) {
+ return FromPySet(ctx, keyType, value);
+ } else if (value->ob_type->tp_as_sequence && value->ob_type->tp_as_sequence->sq_contains) {
+ return FromPySequence(ctx, keyType, value);
+ }
+ } else if (PyDict_Check(value)) {
+ return FromPyDict(ctx, keyType, valueType, value);
+ } else if (PyMapping_Check(value)) {
+ return FromPyMapping(ctx, keyType, valueType, value);
+ }
+
+ throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict.";
+}
+
+} // namespace
+
+TPyObjectPtr ToPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, const NUdf::TUnboxedValuePod& value)
+{
+ switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
+ case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value);
+ case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value);
+ case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value);
+ case NUdf::ETypeKind::List: return ToPyList(ctx, type, value);
+ case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value);
+ case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value);
+ case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value);
+ case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value);
+ case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value);
+ case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value);
+ case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value);
+ default: {
+ ::TStringBuilder sb;
+ sb << "Failed to export: ";
+ NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
+ throw yexception() << sb;
+ }
+ }
+}
+
+NUdf::TUnboxedValue FromPyObject(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type, PyObject* value)
+{
+ switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) {
+ case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value);
+ case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value);
+ case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value);
+ case NUdf::ETypeKind::List: return FromPyList(ctx, type, value);
+ case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value);
+ case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value);
+ case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value);
+ case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value);
+ case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value);
+ case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr);
+ case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value);
+ default: {
+ ::TStringBuilder sb;
+ sb << "Failed to import: ";
+ NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out);
+ throw yexception() << sb;
+ }
+ }
+}
+
+TPyObjectPtr ToPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ const NUdf::TUnboxedValuePod* args,
+ const NUdf::TCallableTypeInspector& inspector)
+{
+ const auto argsCount = inspector.GetArgsCount();
+ TPyObjectPtr tuple(PyTuple_New(argsCount));
+
+ for (ui32 i = 0; i < argsCount; i++) {
+ auto arg = ToPyObject(ctx, inspector.GetArgType(i), args[i]);
+ PyTuple_SET_ITEM(tuple.Get(), i, arg.Release());
+ }
+
+ return tuple;
+}
+
+void FromPyArgs(
+ const TPyCastContext::TPtr& ctx,
+ const NUdf::TType* type,
+ PyObject* pyArgs,
+ NUdf::TUnboxedValue* cArgs,
+ const NUdf::TCallableTypeInspector& inspector)
+{
+ PY_ENSURE_TYPE(Tuple, pyArgs, "Expected");
+
+ const auto argsCount = inspector.GetArgsCount();
+ const auto optArgsCount = inspector.GetOptionalArgsCount();
+
+ ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs));
+ PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount,
+ "arguments count missmatch: "
+ "min " << (argsCount - optArgsCount) << ", max " << argsCount
+ << ", got " << pyArgsCount);
+
+ for (ui32 i = 0; i < pyArgsCount; i++) {
+ PyObject* item = PyTuple_GET_ITEM(pyArgs, i);
+ cArgs[i] = FromPyObject(ctx, inspector.GetArgType(i), item);
+ }
+
+ for (ui32 i = pyArgsCount; i < argsCount; i++) {
+ cArgs[i] = NUdf::TUnboxedValuePod();
+ }
+}
+
+class TDummyMemoryLock : public IMemoryLock {
+public:
+ void Acquire() override {}
+ void Release() override {}
+};
+
+TPyCastContext::TPyCastContext(
+ const NKikimr::NUdf::IValueBuilder* builder,
+ TPyContext::TPtr pyCtx,
+ THolder<IMemoryLock> memoryLock)
+ : ValueBuilder(builder)
+ , PyCtx(std::move(pyCtx))
+ , MemoryLock(std::move(memoryLock))
+{
+ if (!MemoryLock) {
+ MemoryLock = MakeHolder<TDummyMemoryLock>();
+ }
+}
+
+TPyCastContext::~TPyCastContext() {
+ TPyGilLocker locker;
+ StructTypes.clear();
+ YsonConverterIn.Reset();
+ YsonConverterOut.Reset();
+ TimezoneNames.clear();
+}
+
+const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) {
+ auto& x = TimezoneNames[id];
+ if (!x) {
+ NKikimr::NUdf::TStringRef ref;
+ if (!ValueBuilder->GetDateBuilder().FindTimezoneName(id, ref)) {
+ throw yexception() << "Unknown timezone id: " << id;
+ }
+
+ x = PyRepr(ref);
+ }
+
+ return x;
+}
+
+} // namspace NPython