diff options
author | eak1mov <eak1mov@yandex-team.com> | 2023-06-06 21:09:43 +0300 |
---|---|---|
committer | eak1mov <eak1mov@yandex-team.com> | 2023-06-06 21:09:43 +0300 |
commit | d0919e63bf76e5117a2544b75b9ea3ed3dac7b77 (patch) | |
tree | ab93fdd00dc8a9c7dfa9ed844a13367273f0d289 | |
parent | 94ab32ef88972cb80d8960aa928650343f0fb860 (diff) | |
download | ydb-d0919e63bf76e5117a2544b75b9ea3ed3dac7b77.tar.gz |
Revert "Kernel for String::LevensteinDistance + BinaryKernel helper"
This reverts commit 7d2e5bf13855490b6221d731db3ef16973ec8714, reversing
changes made to 17dd633f13f3be5d1e240c31d0c4498b9a62beee.
6 files changed, 7 insertions, 109 deletions
diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 2c3badb0e2a..ed0755f46df 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -322,88 +322,6 @@ struct TUnaryKernelExec { } }; -template <typename TDerived> -struct TBinaryKernelExec { - static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { - auto& state = dynamic_cast<TUdfKernelState&>(*ctx->state()); - auto& reader1 = state.GetReader(0); - auto& reader2 = state.GetReader(1); - const auto& arg1 = batch.values[0]; - const auto& arg2 = batch.values[1]; - if (arg1.is_scalar() && arg2.is_scalar()) { - auto& builder = state.GetScalarBuilder(); - auto item1 = reader1.GetScalarItem(*arg1.scalar()); - auto item2 = reader2.GetScalarItem(*arg2.scalar()); - TDerived::Process(item1, item2, [&](TBlockItem out) { - *res = builder.Build(out); - }); - } - else if (arg1.is_scalar() && arg2.is_array()) { - auto item1 = reader1.GetScalarItem(*arg1.scalar()); - auto& array2 = *arg2.array(); - auto& builder = state.GetArrayBuilder(); - size_t maxBlockLength = builder.MaxLength(); - Y_ENSURE(maxBlockLength > 0); - TVector<std::shared_ptr<arrow::ArrayData>> outputArrays; - for (int64_t i = 0; i < array2.length;) { - for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) { - auto item2 = reader2.GetItem(array2, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { - builder.Add(out); - }); - } - auto outputDatum = builder.Build(false); - ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); - } - - *res = MakeArray(outputArrays); - } else if (arg1.is_array() && arg2.is_scalar()) { - auto& array1 = *arg1.array(); - auto item2 = reader2.GetScalarItem(*arg2.scalar()); - auto& builder = state.GetArrayBuilder(); - size_t maxBlockLength = builder.MaxLength(); - Y_ENSURE(maxBlockLength > 0); - TVector<std::shared_ptr<arrow::ArrayData>> outputArrays; - for (int64_t i = 0; i < array1.length;) { - for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { - auto item1 = reader1.GetItem(array1, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { - builder.Add(out); - }); - } - auto outputDatum = builder.Build(false); - ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); - } - - *res = MakeArray(outputArrays); - } else { - Y_ENSURE(arg1.is_array() && arg2.is_array()); - auto& array1 = *arg1.array(); - auto& array2 = *arg2.array(); - auto& builder = state.GetArrayBuilder(); - size_t maxBlockLength = builder.MaxLength(); - Y_ENSURE(maxBlockLength > 0); - TVector<std::shared_ptr<arrow::ArrayData>> outputArrays; - Y_ENSURE(array1.length == array2.length); - for (int64_t i = 0; i < array1.length;) { - for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) { - auto item1 = reader1.GetItem(array1, i); - auto item2 = reader2.GetItem(array2, i); - TDerived::Process(item1, item2, [&](TBlockItem out) { - builder.Add(out); - }); - } - auto outputDatum = builder.Build(false); - ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); }); - } - - *res = MakeArray(outputArrays); - } - - return arrow::Status::OK(); - } -}; - template <typename TInput, typename TOutput, TOutput(*Core)(TInput)> arrow::Status UnaryPreallocatedExecImpl(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) { Y_UNUSED(ctx); diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt index fc63c16d947..443f7fdf0ca 100644 --- a/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt @@ -13,7 +13,6 @@ target_link_libraries(string_udf INTERFACE yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata @@ -25,7 +24,7 @@ target_link_libraries(string_udf INTERFACE add_global_library_for(string_udf.global string_udf) target_compile_options(string_udf.global PRIVATE -DUDF_ABI_VERSION_MAJOR=2 - -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_MINOR=28 -DUDF_ABI_VERSION_PATCH=0 ) target_link_libraries(string_udf.global PUBLIC @@ -33,7 +32,6 @@ target_link_libraries(string_udf.global PUBLIC yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt index 8213bf95ba9..2548da1b236 100644 --- a/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt @@ -14,7 +14,6 @@ target_link_libraries(string_udf INTERFACE yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata @@ -26,7 +25,7 @@ target_link_libraries(string_udf INTERFACE add_global_library_for(string_udf.global string_udf) target_compile_options(string_udf.global PRIVATE -DUDF_ABI_VERSION_MAJOR=2 - -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_MINOR=28 -DUDF_ABI_VERSION_PATCH=0 ) target_link_libraries(string_udf.global PUBLIC @@ -35,7 +34,6 @@ target_link_libraries(string_udf.global PUBLIC yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt index 8213bf95ba9..2548da1b236 100644 --- a/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt @@ -14,7 +14,6 @@ target_link_libraries(string_udf INTERFACE yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata @@ -26,7 +25,7 @@ target_link_libraries(string_udf INTERFACE add_global_library_for(string_udf.global string_udf) target_compile_options(string_udf.global PRIVATE -DUDF_ABI_VERSION_MAJOR=2 - -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_MINOR=28 -DUDF_ABI_VERSION_PATCH=0 ) target_link_libraries(string_udf.global PUBLIC @@ -35,7 +34,6 @@ target_link_libraries(string_udf.global PUBLIC yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt index fc63c16d947..443f7fdf0ca 100644 --- a/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt @@ -13,7 +13,6 @@ target_link_libraries(string_udf INTERFACE yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata @@ -25,7 +24,7 @@ target_link_libraries(string_udf INTERFACE add_global_library_for(string_udf.global string_udf) target_compile_options(string_udf.global PRIVATE -DUDF_ABI_VERSION_MAJOR=2 - -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_MINOR=28 -DUDF_ABI_VERSION_PATCH=0 ) target_link_libraries(string_udf.global PUBLIC @@ -33,7 +32,6 @@ target_link_libraries(string_udf.global PUBLIC yutil yql-public-udf public-udf-support - public-udf-arrow library-cpp-charset cpp-deprecated-split cpp-html-pcdata diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp index e1ceed7bbd6..c2bce4ae420 100644 --- a/ydb/library/yql/udfs/common/string/string_udf.cpp +++ b/ydb/library/yql/udfs/common/string/string_udf.cpp @@ -9,8 +9,6 @@ #include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h> #include <library/cpp/string_utils/quote/quote.h> -#include <ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h> - #include <util/charset/wide.h> #include <util/generic/vector.h> #include <util/stream/format.h> @@ -28,6 +26,7 @@ using namespace NKikimr; using namespace NUdf; +namespace { #define STRING_UDF(udfName, function) \ SIMPLE_STRICT_UDF(T##udfName, char*(TAutoMap<char*>)) { \ const TString input(args[0].AsStringRef()); \ @@ -370,7 +369,7 @@ using namespace NUdf; return valueBuilder->NewString(JoinSeq(delimeter, items)); } - BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { + SIMPLE_STRICT_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { Y_UNUSED(valueBuilder); const TStringBuf left(args[0].AsStringRef()); const TStringBuf right(args[1].AsStringRef()); @@ -378,18 +377,6 @@ using namespace NUdf; return TUnboxedValuePod(result); } - struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> { - template <typename TSink> - static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - const std::string_view left(arg1.AsStringRef()); - const std::string_view right(arg2.AsStringRef()); - const ui64 result = NLevenshtein::Distance(left, right); - sink(TBlockItem(result)); - } - }; - - END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do); - static constexpr ui64 padLim = 1000000; SIMPLE_UDF_OPTIONS(TRightPad, char*(TAutoMap<char*>, ui64, TOptional<char*>), builder.OptionalArgs(1)) { @@ -572,5 +559,6 @@ using namespace NUdf; TPrec, TToByteList, TFromByteList) +} REGISTER_MODULES(TStringModule) |