diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-28 23:59:20 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-03-01 00:13:10 +0300 |
commit | b04e2faf41bf366d5f501c976bda00eb32d55660 (patch) | |
tree | f922ace378c0c471d912c33f2c0231144f898d78 /yql | |
parent | 9ba742f4d36b4a3d879b4cf8d9234165413f4a0d (diff) | |
download | ydb-b04e2faf41bf366d5f501c976bda00eb32d55660.tar.gz |
Intermediate changes
commit_hash:e2da3ad430fabaa84a74178b1f2103b09ac69ae7
Diffstat (limited to 'yql')
24 files changed, 171 insertions, 100 deletions
diff --git a/yql/essentials/tests/common/test_framework/yql_utils.py b/yql/essentials/tests/common/test_framework/yql_utils.py index 8314349358..2136729be9 100644 --- a/yql/essentials/tests/common/test_framework/yql_utils.py +++ b/yql/essentials/tests/common/test_framework/yql_utils.py @@ -52,7 +52,7 @@ def do_custom_query_check(res, sql_query): def do_custom_error_check(res, sql_query): err_string = None - custom_error = re.search(r"/\* custom error:(.*)\*/", sql_query) + custom_error = re.search(r"/\* custom error:(.*?)\*/", sql_query, re.DOTALL) if custom_error: err_string = custom_error.group(1).strip() assert err_string, 'Expected custom error check in test.\nTest error: %s' % res.std_err diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp index 1c58d7b371..36cc13a1da 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp index 078239962e..ac44ac42e5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp @@ -22,6 +22,7 @@ #include <library/cpp/containers/stack_vector/stack_vec.h> +#include <util/generic/scope.h> #include <util/string/join.h> #include <util/string/builder.h> @@ -252,7 +253,7 @@ template <> \ bool TryPyCast(PyObject* value, Type& result) { \ if (PyUnicode_Check(value)) { \ - const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \ + const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); \ char* str = nullptr; \ Py_ssize_t size = 0; \ int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \ @@ -279,6 +280,22 @@ namespace NPython { using namespace NKikimr; +namespace { + +NPython::TPyObjectPtr AsUtf8StringOrThrow(PyObject* obj) { + auto* utf8String = PyUnicode_AsUTF8String(obj); + if (!utf8String) { + Y_ENSURE(PyErr_Occurred()); + Y_DEFER { + PyErr_Clear(); + }; + throw yexception() << "Failed to convert the string to UTF-8 format. Original message is:\n" << GetLastErrorAsString() << "\n"; + } + return NPython::TPyObjectPtr(utf8String); +} + +} // namespace + inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) { throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType << "; Object repr: " << PyObjectRepr(value); @@ -548,7 +565,7 @@ NUdf::TUnboxedValue FromPyData( case NUdf::TDataType<NUdf::TUtf8>::Id: case NUdf::TDataType<NUdf::TJson>::Id: if (PyUnicode_Check(value)) { - const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value)); + const TPyObjectPtr uif8(AsUtf8StringOrThrow(value)); return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); } throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; @@ -557,7 +574,7 @@ NUdf::TUnboxedValue FromPyData( case NUdf::TDataType<NUdf::TJson>::Id: case NUdf::TDataType<NUdf::TUtf8>::Id: { if (PyUnicode_Check(value)) { - const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); + const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); } diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp index 47f65ab6fa..d4ee90278c 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp @@ -1,9 +1,39 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> +#include <util/string/strip.h> using namespace NPython; +namespace { +template <typename TType> +void TestBadUtf8Encode() { +#if PY_MAJOR_VERSION == 2 + // In Python 2, strings can encode single surrogate pairs, so this issue does not occur there. + return; +#endif // PY_MAJOR_VERSION == 2 + + TPythonTestEngine engine; + + constexpr char programToRun[] = R"( +def Test(): + return "\uDC00" +)"; + constexpr char expectedError[] = R"( +Failed to convert the string to UTF-8 format. Original message is: +UnicodeEncodeError: 'utf-8' codec can't encode character '\udc00' in position 0: surrogates not allowed +)"; + + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<TType>( + StripString(TString(programToRun)), + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, StripString(TString(expectedError))); +} +} // namespace + Y_UNIT_TEST_SUITE(TPyCastTest) { Y_UNIT_TEST(FromPyStrToInt) { TPythonTestEngine engine; @@ -87,4 +117,11 @@ Y_UNIT_TEST_SUITE(TPyCastTest) { yexception, "Cast error object " RETVAL " to Long"); } -} + Y_UNIT_TEST(BadFromPythonUtf8) { + TestBadUtf8Encode<NUdf::TUtf8>(); + } + + Y_UNIT_TEST(BadFromPythonJson) { + TestBadUtf8Encode<NUdf::TJson>(); + } +} // Y_UNIT_TEST_SUITE(TPyCastTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp index 8388c110f3..3f0298013a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp index 9ac9627ebb..edb3d36e8c 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <yql/essentials/public/udf/udf_ut_helpers.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp index f16165fc54..b2e9a640d4 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <yql/essentials/public/udf/udf_ut_helpers.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp index c55e25891d..35c94d5e8e 100644 --- a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp index d13ea65da6..4cc45f1184 100644 --- a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp index aaa9899c4f..25b43cbf6a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp index 4a24dd1a13..4a36f7b8f3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp index 444b7b0c5b..b1f5a13786 100644 --- a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp index a97507f549..9b232d99a3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/py_test_engine.h index a36e19fa32..c86febf779 100644 --- a/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h +++ b/yql/essentials/udfs/common/python/bindings/py_test_engine.h @@ -27,6 +27,8 @@ struct TPyInitializer { PrepareYqlModule(); Py_Initialize(); InitYqlModule(NYql::NUdf::EPythonFlavor::Arcadia); + const auto rc = PyRun_SimpleString(NYql::NUdf::STANDART_STREAM_PROXY_INJECTION_SCRIPT); + Y_ENSURE(rc >= 0, "Can't setup module"); } ~TPyInitializer() { TermYqlModule(); diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp index a6b9b6cc3e..f465f0ebb6 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp index e9f5971c78..08b6b78b16 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp @@ -1,5 +1,5 @@ #include "py_variant.h" -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <yql/essentials/minikql/mkql_type_ops.h> #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp index 77ab9bc6e8..d792449d82 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp @@ -1,5 +1,5 @@ #include "py_variant.h" -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp index 7fbeca2043..e6e8a72768 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp @@ -1,4 +1,4 @@ -#include "ut3/py_test_engine.h" +#include "py_test_engine.h" #include <library/cpp/testing/unittest/registar.h> diff --git a/yql/essentials/udfs/common/python/bindings/ut2/ya.make b/yql/essentials/udfs/common/python/bindings/ut2/ya.make new file mode 100644 index 0000000000..b289e188bb --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ut2/ya.make @@ -0,0 +1,9 @@ +IF (OS_LINUX) + IF (NOT WITH_VALGRIND) + UNITTEST_FOR(yql/essentials/udfs/common/python/bindings) + + INCLUDE(../ya.make.test.inc) + USE_PYTHON2() + END() + ENDIF() +ENDIF() diff --git a/yql/essentials/udfs/common/python/bindings/ut3/ya.make b/yql/essentials/udfs/common/python/bindings/ut3/ya.make index cfce3cb9be..3a3c52dfbf 100644 --- a/yql/essentials/udfs/common/python/bindings/ut3/ya.make +++ b/yql/essentials/udfs/common/python/bindings/ut3/ya.make @@ -2,36 +2,11 @@ IF (OS_LINUX) IF (NOT WITH_VALGRIND) UNITTEST_FOR(yql/essentials/udfs/common/python/bindings) - SRCS( - py_callable_ut.cpp - py_cast_ut.cpp - py_dict_ut.cpp - py_list_ut.cpp - py_decimal_ut.cpp - py_number_ut.cpp - py_optional_ut.cpp - py_resource_ut.cpp - py_stream_ut.cpp - py_string_ut.cpp - py_struct_ut.cpp - py_tuple_ut.cpp - py_tzdate_ut.cpp - py_utils_ut.cpp - py_variant_ut.cpp - py_void_ut.cpp - ) - - USE_PYTHON3() - + INCLUDE(../ya.make.test.inc) PEERDIR( library/python/type_info - yql/essentials/minikql/computation/llvm16 - yql/essentials/public/udf/service/exception_policy - yql/essentials/sql/pg_dummy ) - - YQL_LAST_ABI_VERSION() - + USE_PYTHON3() END() ENDIF() ENDIF() diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make index 83380ce4c4..aea3e54717 100644 --- a/yql/essentials/udfs/common/python/bindings/ya.make +++ b/yql/essentials/udfs/common/python/bindings/ya.make @@ -51,6 +51,7 @@ END() IF (NOT EXPORT_CMAKE) RECURSE_FOR_TESTS( + ut2 ut3 ) ENDIF() diff --git a/yql/essentials/udfs/common/python/bindings/ya.make.test.inc b/yql/essentials/udfs/common/python/bindings/ya.make.test.inc new file mode 100644 index 0000000000..67803ad18f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ya.make.test.inc @@ -0,0 +1,27 @@ +SRCS( + py_callable_ut.cpp + py_cast_ut.cpp + py_dict_ut.cpp + py_list_ut.cpp + py_decimal_ut.cpp + py_number_ut.cpp + py_optional_ut.cpp + py_resource_ut.cpp + py_stream_ut.cpp + py_string_ut.cpp + py_struct_ut.cpp + py_tuple_ut.cpp + py_tzdate_ut.cpp + py_utils_ut.cpp + py_variant_ut.cpp + py_void_ut.cpp +) + +PEERDIR( + yql/essentials/minikql/computation/llvm16 + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql/pg_dummy + yql/essentials/udfs/common/python/python_udf +) + +YQL_LAST_ABI_VERSION() diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp index b1739a1775..1007c75edc 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp @@ -59,59 +59,7 @@ public: InitYqlModule(pythonFlavor, standalone); - const auto rc = PyRun_SimpleString(R"( -# numpy on import may find installed openblas library and load it, -# which in turn causes it to start CPUCOUNT threads -# with approx. 40Mb memory reserved for each thread; -# -# See more detailed explanation here: https://st.yandex-team.ru/STATLIBS-1715#5bfc68ecbbc039001cec572a -# -# Thus, we reduce negative effects as much as possible -import os -os.environ['OPENBLAS_NUM_THREADS'] = '1' - - -# Following part allows us later to format tracebacks via sys.excepthook -# in thread-safe manner -import sys -import threading -if sys.version_info >= (3, 0): - from io import StringIO, TextIOWrapper as SysStderrType -else: - from cStringIO import StringIO - SysStderrType = file - -class StderrLocal(threading.local): - - def __init__(self): - self.is_real_mode = True - self.buffer = StringIO() - - -class StderrProxy(object): - def __init__(self, stderr): - self._stderr = stderr - self._tls = StderrLocal() - - def _toggle_real_mode(self): - self._tls.is_real_mode = not self._tls.is_real_mode - if not self._tls.is_real_mode: - self._tls.buffer.clear() - - def _get_value(self): - assert not self._tls.is_real_mode - return self._tls.buffer.getvalue() - - def __getattr__(self, attr): - target = self._stderr - if not self._tls.is_real_mode: - target = self._tls.buffer - - return getattr(target, attr) - -if isinstance(sys.stderr, SysStderrType): - sys.stderr = StderrProxy(sys.stderr) -)"); + const auto rc = PyRun_SimpleString(STANDART_STREAM_PROXY_INJECTION_SCRIPT); Y_ABORT_UNLESS(rc >= 0, "Can't setup module"); if (pythonFlavor == EPythonFlavor::Arcadia) { diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h index 16d7da096d..83b3bb86e6 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_udf.h +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h @@ -5,6 +5,61 @@ namespace NYql { namespace NUdf { +inline constexpr char STANDART_STREAM_PROXY_INJECTION_SCRIPT[] = +R"( +# numpy on import may find installed openblas library and load it, +# which in turn causes it to start CPUCOUNT threads +# with approx. 40Mb memory reserved for each thread; +# +# See more detailed explanation here: https://st.yandex-team.ru/STATLIBS-1715#5bfc68ecbbc039001cec572a +# +# Thus, we reduce negative effects as much as possible +import os +os.environ['OPENBLAS_NUM_THREADS'] = '1' + + +# Following part allows us later to format tracebacks via sys.excepthook +# in thread-safe manner +import sys +import threading +if sys.version_info >= (3, 0): + from io import StringIO, TextIOWrapper as SysStderrType +else: + from cStringIO import StringIO + SysStderrType = file + +class StderrLocal(threading.local): + + def __init__(self): + self.is_real_mode = True + self.buffer = StringIO() + + +class StderrProxy(object): + def __init__(self, stderr): + self._stderr = stderr + self._tls = StderrLocal() + + def _toggle_real_mode(self): + self._tls.is_real_mode = not self._tls.is_real_mode + if not self._tls.is_real_mode: + self._tls.buffer.clear() + + def _get_value(self): + assert not self._tls.is_real_mode + return self._tls.buffer.getvalue() + + def __getattr__(self, attr): + target = self._stderr + if not self._tls.is_real_mode: + target = self._tls.buffer + + return getattr(target, attr) + +if isinstance(sys.stderr, SysStderrType): + sys.stderr = StderrProxy(sys.stderr) +)"; + enum class EPythonFlavor { System, Arcadia, |