aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoreak1mov <eak1mov@yandex-team.com>2023-06-06 21:09:43 +0300
committereak1mov <eak1mov@yandex-team.com>2023-06-06 21:09:43 +0300
commitd0919e63bf76e5117a2544b75b9ea3ed3dac7b77 (patch)
treeab93fdd00dc8a9c7dfa9ed844a13367273f0d289
parent94ab32ef88972cb80d8960aa928650343f0fb860 (diff)
downloadydb-d0919e63bf76e5117a2544b75b9ea3ed3dac7b77.tar.gz
Revert "Kernel for String::LevensteinDistance + BinaryKernel helper"
This reverts commit 7d2e5bf13855490b6221d731db3ef16973ec8714, reversing changes made to 17dd633f13f3be5d1e240c31d0c4498b9a62beee.
-rw-r--r--ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h82
-rw-r--r--ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt4
-rw-r--r--ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt4
-rw-r--r--ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt4
-rw-r--r--ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt4
-rw-r--r--ydb/library/yql/udfs/common/string/string_udf.cpp18
6 files changed, 7 insertions, 109 deletions
diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h
index 2c3badb0e2a..ed0755f46df 100644
--- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h
+++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h
@@ -322,88 +322,6 @@ struct TUnaryKernelExec {
}
};
-template <typename TDerived>
-struct TBinaryKernelExec {
- static arrow::Status Do(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) {
- auto& state = dynamic_cast<TUdfKernelState&>(*ctx->state());
- auto& reader1 = state.GetReader(0);
- auto& reader2 = state.GetReader(1);
- const auto& arg1 = batch.values[0];
- const auto& arg2 = batch.values[1];
- if (arg1.is_scalar() && arg2.is_scalar()) {
- auto& builder = state.GetScalarBuilder();
- auto item1 = reader1.GetScalarItem(*arg1.scalar());
- auto item2 = reader2.GetScalarItem(*arg2.scalar());
- TDerived::Process(item1, item2, [&](TBlockItem out) {
- *res = builder.Build(out);
- });
- }
- else if (arg1.is_scalar() && arg2.is_array()) {
- auto item1 = reader1.GetScalarItem(*arg1.scalar());
- auto& array2 = *arg2.array();
- auto& builder = state.GetArrayBuilder();
- size_t maxBlockLength = builder.MaxLength();
- Y_ENSURE(maxBlockLength > 0);
- TVector<std::shared_ptr<arrow::ArrayData>> outputArrays;
- for (int64_t i = 0; i < array2.length;) {
- for (size_t j = 0; j < maxBlockLength && i < array2.length; ++j, ++i) {
- auto item2 = reader2.GetItem(array2, i);
- TDerived::Process(item1, item2, [&](TBlockItem out) {
- builder.Add(out);
- });
- }
- auto outputDatum = builder.Build(false);
- ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); });
- }
-
- *res = MakeArray(outputArrays);
- } else if (arg1.is_array() && arg2.is_scalar()) {
- auto& array1 = *arg1.array();
- auto item2 = reader2.GetScalarItem(*arg2.scalar());
- auto& builder = state.GetArrayBuilder();
- size_t maxBlockLength = builder.MaxLength();
- Y_ENSURE(maxBlockLength > 0);
- TVector<std::shared_ptr<arrow::ArrayData>> outputArrays;
- for (int64_t i = 0; i < array1.length;) {
- for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) {
- auto item1 = reader1.GetItem(array1, i);
- TDerived::Process(item1, item2, [&](TBlockItem out) {
- builder.Add(out);
- });
- }
- auto outputDatum = builder.Build(false);
- ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); });
- }
-
- *res = MakeArray(outputArrays);
- } else {
- Y_ENSURE(arg1.is_array() && arg2.is_array());
- auto& array1 = *arg1.array();
- auto& array2 = *arg2.array();
- auto& builder = state.GetArrayBuilder();
- size_t maxBlockLength = builder.MaxLength();
- Y_ENSURE(maxBlockLength > 0);
- TVector<std::shared_ptr<arrow::ArrayData>> outputArrays;
- Y_ENSURE(array1.length == array2.length);
- for (int64_t i = 0; i < array1.length;) {
- for (size_t j = 0; j < maxBlockLength && i < array1.length; ++j, ++i) {
- auto item1 = reader1.GetItem(array1, i);
- auto item2 = reader2.GetItem(array2, i);
- TDerived::Process(item1, item2, [&](TBlockItem out) {
- builder.Add(out);
- });
- }
- auto outputDatum = builder.Build(false);
- ForEachArrayData(outputDatum, [&](const auto& arr) { outputArrays.push_back(arr); });
- }
-
- *res = MakeArray(outputArrays);
- }
-
- return arrow::Status::OK();
- }
-};
-
template <typename TInput, typename TOutput, TOutput(*Core)(TInput)>
arrow::Status UnaryPreallocatedExecImpl(arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) {
Y_UNUSED(ctx);
diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt
index fc63c16d947..443f7fdf0ca 100644
--- a/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/udfs/common/string/CMakeLists.darwin-x86_64.txt
@@ -13,7 +13,6 @@ target_link_libraries(string_udf INTERFACE
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
@@ -25,7 +24,7 @@ target_link_libraries(string_udf INTERFACE
add_global_library_for(string_udf.global string_udf)
target_compile_options(string_udf.global PRIVATE
-DUDF_ABI_VERSION_MAJOR=2
- -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_MINOR=28
-DUDF_ABI_VERSION_PATCH=0
)
target_link_libraries(string_udf.global PUBLIC
@@ -33,7 +32,6 @@ target_link_libraries(string_udf.global PUBLIC
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt
index 8213bf95ba9..2548da1b236 100644
--- a/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/udfs/common/string/CMakeLists.linux-aarch64.txt
@@ -14,7 +14,6 @@ target_link_libraries(string_udf INTERFACE
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
@@ -26,7 +25,7 @@ target_link_libraries(string_udf INTERFACE
add_global_library_for(string_udf.global string_udf)
target_compile_options(string_udf.global PRIVATE
-DUDF_ABI_VERSION_MAJOR=2
- -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_MINOR=28
-DUDF_ABI_VERSION_PATCH=0
)
target_link_libraries(string_udf.global PUBLIC
@@ -35,7 +34,6 @@ target_link_libraries(string_udf.global PUBLIC
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt
index 8213bf95ba9..2548da1b236 100644
--- a/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/udfs/common/string/CMakeLists.linux-x86_64.txt
@@ -14,7 +14,6 @@ target_link_libraries(string_udf INTERFACE
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
@@ -26,7 +25,7 @@ target_link_libraries(string_udf INTERFACE
add_global_library_for(string_udf.global string_udf)
target_compile_options(string_udf.global PRIVATE
-DUDF_ABI_VERSION_MAJOR=2
- -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_MINOR=28
-DUDF_ABI_VERSION_PATCH=0
)
target_link_libraries(string_udf.global PUBLIC
@@ -35,7 +34,6 @@ target_link_libraries(string_udf.global PUBLIC
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
diff --git a/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt b/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt
index fc63c16d947..443f7fdf0ca 100644
--- a/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/udfs/common/string/CMakeLists.windows-x86_64.txt
@@ -13,7 +13,6 @@ target_link_libraries(string_udf INTERFACE
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
@@ -25,7 +24,7 @@ target_link_libraries(string_udf INTERFACE
add_global_library_for(string_udf.global string_udf)
target_compile_options(string_udf.global PRIVATE
-DUDF_ABI_VERSION_MAJOR=2
- -DUDF_ABI_VERSION_MINOR=33
+ -DUDF_ABI_VERSION_MINOR=28
-DUDF_ABI_VERSION_PATCH=0
)
target_link_libraries(string_udf.global PUBLIC
@@ -33,7 +32,6 @@ target_link_libraries(string_udf.global PUBLIC
yutil
yql-public-udf
public-udf-support
- public-udf-arrow
library-cpp-charset
cpp-deprecated-split
cpp-html-pcdata
diff --git a/ydb/library/yql/udfs/common/string/string_udf.cpp b/ydb/library/yql/udfs/common/string/string_udf.cpp
index e1ceed7bbd6..c2bce4ae420 100644
--- a/ydb/library/yql/udfs/common/string/string_udf.cpp
+++ b/ydb/library/yql/udfs/common/string/string_udf.cpp
@@ -9,8 +9,6 @@
#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h>
#include <library/cpp/string_utils/quote/quote.h>
-#include <ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h>
-
#include <util/charset/wide.h>
#include <util/generic/vector.h>
#include <util/stream/format.h>
@@ -28,6 +26,7 @@
using namespace NKikimr;
using namespace NUdf;
+namespace {
#define STRING_UDF(udfName, function) \
SIMPLE_STRICT_UDF(T##udfName, char*(TAutoMap<char*>)) { \
const TString input(args[0].AsStringRef()); \
@@ -370,7 +369,7 @@ using namespace NUdf;
return valueBuilder->NewString(JoinSeq(delimeter, items));
}
- BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) {
+ SIMPLE_STRICT_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) {
Y_UNUSED(valueBuilder);
const TStringBuf left(args[0].AsStringRef());
const TStringBuf right(args[1].AsStringRef());
@@ -378,18 +377,6 @@ using namespace NUdf;
return TUnboxedValuePod(result);
}
- struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> {
- template <typename TSink>
- static void Process(TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
- const std::string_view left(arg1.AsStringRef());
- const std::string_view right(arg2.AsStringRef());
- const ui64 result = NLevenshtein::Distance(left, right);
- sink(TBlockItem(result));
- }
- };
-
- END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do);
-
static constexpr ui64 padLim = 1000000;
SIMPLE_UDF_OPTIONS(TRightPad, char*(TAutoMap<char*>, ui64, TOptional<char*>), builder.OptionalArgs(1)) {
@@ -572,5 +559,6 @@ using namespace NUdf;
TPrec,
TToByteList,
TFromByteList)
+}
REGISTER_MODULES(TStringModule)