diff options
| author | vvvv <[email protected]> | 2025-06-04 12:10:23 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2025-06-04 12:31:29 +0300 |
| commit | 62b27c7d93a63b7c2ab41fe6882cbcbdfc5218a6 (patch) | |
| tree | 0b97c2665aa217b8cdfd2d7d1f7afcf662157730 /yql/essentials/udfs/common | |
| parent | 363a2f6e500d85aea80c6edd60a7918b4eb43fa9 (diff) | |
YQL-20050 ascii ilike in the peephole
commit_hash:ecabde17ea40a36ec6c992bec2b371529d6cc84c
Diffstat (limited to 'yql/essentials/udfs/common')
| -rw-r--r-- | yql/essentials/udfs/common/string/string_udf.cpp | 77 |
1 files changed, 52 insertions, 25 deletions
diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp index 273e7617d5b..5c24485129d 100644 --- a/yql/essentials/udfs/common/string/string_udf.cpp +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -186,11 +186,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } #define STRING_ASCII_CMP_IGNORE_CASE_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, \ - bool(TOptional<char*>, char*), \ - builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) \ - { \ - Y_UNUSED(valueBuilder); \ + TUnboxedValuePod udfName##Impl(const TUnboxedValuePod* args) { \ if (args[0]) { \ const TString haystack(args[0].AsStringRef()); \ const TString needle(args[1].AsStringRef()); \ @@ -217,9 +213,26 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ }; \ \ - END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, \ + bool(TOptional<char*>, char*), \ + builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) \ + { \ + Y_UNUSED(valueBuilder); \ + return udfName##Impl(args); \ + } \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ + \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_##udfName, \ + bool(TOptional<char*>, char*)) \ + { \ + Y_UNUSED(valueBuilder); \ + return udfName##Impl(args); \ + } \ + \ + END_SIMPLE_ARROW_UDF(T_yql_##udfName, T##udfName##KernelExec::Do) -#define IS_ASCII_UDF(function) \ +#define IS_ASCII_UDF(function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \ Y_UNUSED(valueBuilder); \ if (args[0]) { \ @@ -516,24 +529,6 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), return AsciiToUpper(a) == AsciiToUpper(b); } - BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*), - builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) - { - Y_UNUSED(valueBuilder); - if (!args[0]) { - return TUnboxedValuePod(false); - } - - const TString haystack(args[0].AsStringRef()); - const TString needle(args[1].AsStringRef()); - if (haystack.empty()) { - return TUnboxedValuePod(needle.empty()); - } - const auto found = std::search(haystack.cbegin(), haystack.cend(), - needle.cbegin(), needle.cend(), IgnoreCaseComparator); - return TUnboxedValuePod(found != haystack.cend()); - } - struct TAsciiContainsIgnoreCaseKernelExec : public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec> { @@ -554,8 +549,37 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } }; + TUnboxedValuePod AsciiContainsIgnoreCaseImpl(const TUnboxedValuePod* args) { + if (!args[0]) { + return TUnboxedValuePod(false); + } + + const TString haystack(args[0].AsStringRef()); + const TString needle(args[1].AsStringRef()); + if (haystack.empty()) { + return TUnboxedValuePod(needle.empty()); + } + const auto found = std::search(haystack.cbegin(), haystack.cend(), + needle.cbegin(), needle.cend(), IgnoreCaseComparator); + return TUnboxedValuePod(found != haystack.cend()); + } + + BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*), + builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) + { + Y_UNUSED(valueBuilder); + return AsciiContainsIgnoreCaseImpl(args); + } + END_SIMPLE_ARROW_UDF(TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); + BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, bool(TOptional<char*>, char*)) + { + Y_UNUSED(valueBuilder); + return AsciiContainsIgnoreCaseImpl(args); + } + + END_SIMPLE_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) { if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) @@ -981,6 +1005,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } #define STRING_REGISTER_UDF(udfName, ...) T##udfName, +#define STRING_OPT_REGISTER_UDF(udfName, ...) T_yql_##udfName, STRING_UDF_MAP(STRING_UDF) STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF) @@ -1006,6 +1031,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF) STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_OPT_REGISTER_UDF) IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) @@ -1021,6 +1047,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), TRemoveLast, TContains, TAsciiContainsIgnoreCase, + T_yql_AsciiContainsIgnoreCase, TFind, TReverseFind, TSubstring, |
