summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorimunkin <[email protected]>2025-04-22 12:45:54 +0300
committerimunkin <[email protected]>2025-04-22 13:13:16 +0300
commit07cda51d4e3ba8b8e5df8b3542deec8f7fa474a5 (patch)
tree0f2848f6e23dadd396b24191878c6c0de579976c
parent57c56063ee203e5fb003e293f449a635f46053e0 (diff)
YQL-19884: Add block implementation for STRING_TWO_ARGS_UDF
commit_hash:22ad01050569869e2e75f89e65b06abf113791e4
-rw-r--r--yql/essentials/udfs/common/string/string_udf.cpp44
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt52
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt52
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockFind.sql4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Find.sql4
5 files changed, 142 insertions, 14 deletions
diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp
index 9d2920ec804..161c51c060b 100644
--- a/yql/essentials/udfs/common/string/string_udf.cpp
+++ b/yql/essentials/udfs/common/string/string_udf.cpp
@@ -153,17 +153,36 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>),
} \
}
-#define STRING_TWO_ARGS_UDF(udfName, function) \
- SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
- Y_UNUSED(valueBuilder); \
- if (args[0]) { \
- const TString haystack(args[0].AsStringRef()); \
- const TString needle(args[1].AsStringRef()); \
- return TUnboxedValuePod(function(haystack, needle)); \
- } else { \
- return TUnboxedValuePod(false); \
- } \
- }
+#define STRING_TWO_ARGS_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TString haystack(args[0].AsStringRef()); \
+ const TString needle(args[1].AsStringRef()); \
+ return TUnboxedValuePod(function(haystack, needle)); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TBinaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, \
+ TBlockItem arg2, const TSink& sink) \
+ { \
+ if (arg1) { \
+ const TString haystack(arg1.AsStringRef()); \
+ const TString needle(arg2.AsStringRef()); \
+ sink(TBlockItem(function(haystack, needle))); \
+ } else { \
+ sink(TBlockItem(false)); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
#define IS_ASCII_UDF(function) \
BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \
@@ -361,9 +380,6 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>),
XX(HasPrefix, StartsWith) \
XX(HasSuffix, EndsWith)
-// NOTE: The functions below are marked as deprecated, so block implementation
-// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the
-// scalar one at the moment.
#define STRING_TWO_ARGS_UDF_MAP(XX) \
XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
index f6374e682e5..5470e5a7a37 100644
--- a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
@@ -22,6 +22,34 @@
]
];
[
+ "icprefix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "icstarts";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "icsuffix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "icends";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
"levenstein";
[
"DataType";
@@ -35,31 +63,55 @@
[
"fdsa";
%false;
+ %false;
+ %false;
+ %false;
+ %false;
"3"
];
[
"aswedfg";
%true;
+ %true;
+ %true;
+ %false;
+ %false;
"5"
];
[
"asdadsaasd";
%true;
+ %true;
+ %true;
+ %false;
+ %false;
"8"
];
[
"gdsfsassas";
%true;
+ %false;
+ %false;
+ %true;
+ %true;
"8"
];
[
"";
%false;
+ %false;
+ %false;
+ %false;
+ %false;
"2"
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
%false;
+ %false;
+ %false;
+ %false;
+ %false;
"23"
]
]
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
index cec53212501..7abed4de2a4 100644
--- a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
@@ -50,6 +50,34 @@
]
];
[
+ "icprefix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "icstarts";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "icsuffix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "icends";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
"find";
[
"DataType";
@@ -81,6 +109,10 @@
%false;
%false;
%false;
+ %false;
+ %false;
+ %false;
+ %false;
"-1";
"-1";
"3"
@@ -92,6 +124,10 @@
%true;
%false;
%false;
+ %true;
+ %true;
+ %false;
+ %false;
"0";
"0";
"5"
@@ -103,6 +139,10 @@
%true;
%false;
%false;
+ %true;
+ %true;
+ %false;
+ %false;
"0";
"7";
"8"
@@ -114,6 +154,10 @@
%false;
%true;
%true;
+ %false;
+ %false;
+ %true;
+ %true;
"5";
"8";
"8"
@@ -125,6 +169,10 @@
%false;
%false;
%false;
+ %false;
+ %false;
+ %false;
+ %false;
"-1";
"-1";
"2"
@@ -136,6 +184,10 @@
%false;
%false;
%false;
+ %false;
+ %false;
+ %false;
+ %false;
"-1";
"-1";
"23"
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
index f1c855bcc11..0135bef3627 100644
--- a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
+++ b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
@@ -3,5 +3,9 @@ pragma UseBlocks;
SELECT
value,
String::Contains(value, "as") AS contains,
+ String::HasPrefixIgnoreCase(value, "AS") AS icprefix,
+ String::StartsWithIgnoreCase(value, "AS") AS icstarts,
+ String::HasSuffixIgnoreCase(value, "AS") AS icsuffix,
+ String::EndsWithIgnoreCase(value, "AS") AS icends,
String::LevensteinDistance(value, "as") AS levenstein
FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Find.sql b/yql/essentials/udfs/common/string/test/cases/Find.sql
index 273553dcf9e..856b53f6d4d 100644
--- a/yql/essentials/udfs/common/string/test/cases/Find.sql
+++ b/yql/essentials/udfs/common/string/test/cases/Find.sql
@@ -6,6 +6,10 @@ SELECT
String::StartsWith(value, "as") AS starts,
String::HasSuffix(value, "as") AS suffix,
String::EndsWith(value, "as") AS ends,
+ String::HasPrefixIgnoreCase(value, "AS") AS icprefix,
+ String::StartsWithIgnoreCase(value, "AS") AS icstarts,
+ String::HasSuffixIgnoreCase(value, "AS") AS icsuffix,
+ String::EndsWithIgnoreCase(value, "AS") AS icends,
String::Find(value, "as") AS find,
String::ReverseFind(value, "as") AS rfind,
String::LevensteinDistance(value, "as") AS levenstein