diff options
| author | imunkin <[email protected]> | 2025-04-22 12:45:54 +0300 | 
|---|---|---|
| committer | imunkin <[email protected]> | 2025-04-22 13:13:16 +0300 | 
| commit | 07cda51d4e3ba8b8e5df8b3542deec8f7fa474a5 (patch) | |
| tree | 0f2848f6e23dadd396b24191878c6c0de579976c /yql/essentials/udfs/common | |
| parent | 57c56063ee203e5fb003e293f449a635f46053e0 (diff) | |
YQL-19884: Add block implementation for STRING_TWO_ARGS_UDF
commit_hash:22ad01050569869e2e75f89e65b06abf113791e4
Diffstat (limited to 'yql/essentials/udfs/common')
5 files changed, 142 insertions, 14 deletions
diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp index 9d2920ec804..161c51c060b 100644 --- a/yql/essentials/udfs/common/string/string_udf.cpp +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -153,17 +153,36 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>),          }                                                              \      } -#define STRING_TWO_ARGS_UDF(udfName, function)                          \ -    SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) {      \ -        Y_UNUSED(valueBuilder);                                         \ -        if (args[0]) {                                                  \ -            const TString haystack(args[0].AsStringRef());              \ -            const TString needle(args[1].AsStringRef());                \ -            return TUnboxedValuePod(function(haystack, needle));        \ -        } else {                                                        \ -            return TUnboxedValuePod(false);                             \ -        }                                                               \ -    } +#define STRING_TWO_ARGS_UDF(udfName, function)                                 \ +    BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ +        Y_UNUSED(valueBuilder);                                                \ +        if (args[0]) {                                                         \ +            const TString haystack(args[0].AsStringRef());                     \ +            const TString needle(args[1].AsStringRef());                       \ +            return TUnboxedValuePod(function(haystack, needle));               \ +        } else {                                                               \ +            return TUnboxedValuePod(false);                                    \ +        }                                                                      \ +    }                                                                          \ +                                                                               \ +    struct T##udfName##KernelExec                                              \ +        : public TBinaryKernelExec<T##udfName##KernelExec>                     \ +    {                                                                          \ +        template <typename TSink>                                              \ +        static void Process(const IValueBuilder*, TBlockItem arg1,             \ +                            TBlockItem arg2, const TSink& sink)                \ +        {                                                                      \ +            if (arg1) {                                                        \ +                const TString haystack(arg1.AsStringRef());                    \ +                const TString needle(arg2.AsStringRef());                      \ +                sink(TBlockItem(function(haystack, needle)));                  \ +            } else {                                                           \ +                sink(TBlockItem(false));                                       \ +            }                                                                  \ +        }                                                                      \ +    };                                                                         \ +                                                                               \ +    END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)  #define IS_ASCII_UDF(function)                                                           \      BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) {                \ @@ -361,9 +380,6 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>),      XX(HasPrefix, StartsWith)   \      XX(HasSuffix, EndsWith) -// NOTE: The functions below are marked as deprecated, so block implementation -// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the -// scalar one at the moment.  #define STRING_TWO_ARGS_UDF_MAP(XX)                    \      XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \      XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase)   \ diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt index f6374e682e5..5470e5a7a37 100644 --- a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt @@ -22,6 +22,34 @@                                  ]                              ];                              [ +                                "icprefix"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [ +                                "icstarts"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [ +                                "icsuffix"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [ +                                "icends"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [                                  "levenstein";                                  [                                      "DataType"; @@ -35,31 +63,55 @@                      [                          "fdsa";                          %false; +                        %false; +                        %false; +                        %false; +                        %false;                          "3"                      ];                      [                          "aswedfg";                          %true; +                        %true; +                        %true; +                        %false; +                        %false;                          "5"                      ];                      [                          "asdadsaasd";                          %true; +                        %true; +                        %true; +                        %false; +                        %false;                          "8"                      ];                      [                          "gdsfsassas";                          %true; +                        %false; +                        %false; +                        %true; +                        %true;                          "8"                      ];                      [                          "";                          %false; +                        %false; +                        %false; +                        %false; +                        %false;                          "2"                      ];                      [                          "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";                          %false; +                        %false; +                        %false; +                        %false; +                        %false;                          "23"                      ]                  ] diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt index cec53212501..7abed4de2a4 100644 --- a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt @@ -50,6 +50,34 @@                                  ]                              ];                              [ +                                "icprefix"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [ +                                "icstarts"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [ +                                "icsuffix"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [ +                                "icends"; +                                [ +                                    "DataType"; +                                    "Bool" +                                ] +                            ]; +                            [                                  "find";                                  [                                      "DataType"; @@ -81,6 +109,10 @@                          %false;                          %false;                          %false; +                        %false; +                        %false; +                        %false; +                        %false;                          "-1";                          "-1";                          "3" @@ -92,6 +124,10 @@                          %true;                          %false;                          %false; +                        %true; +                        %true; +                        %false; +                        %false;                          "0";                          "0";                          "5" @@ -103,6 +139,10 @@                          %true;                          %false;                          %false; +                        %true; +                        %true; +                        %false; +                        %false;                          "0";                          "7";                          "8" @@ -114,6 +154,10 @@                          %false;                          %true;                          %true; +                        %false; +                        %false; +                        %true; +                        %true;                          "5";                          "8";                          "8" @@ -125,6 +169,10 @@                          %false;                          %false;                          %false; +                        %false; +                        %false; +                        %false; +                        %false;                          "-1";                          "-1";                          "2" @@ -136,6 +184,10 @@                          %false;                          %false;                          %false; +                        %false; +                        %false; +                        %false; +                        %false;                          "-1";                          "-1";                          "23" diff --git a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql index f1c855bcc11..0135bef3627 100644 --- a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql +++ b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql @@ -3,5 +3,9 @@ pragma UseBlocks;  SELECT      value,      String::Contains(value, "as") AS contains, +    String::HasPrefixIgnoreCase(value, "AS") AS icprefix, +    String::StartsWithIgnoreCase(value, "AS") AS icstarts, +    String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, +    String::EndsWithIgnoreCase(value, "AS") AS icends,      String::LevensteinDistance(value, "as") AS levenstein  FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/Find.sql b/yql/essentials/udfs/common/string/test/cases/Find.sql index 273553dcf9e..856b53f6d4d 100644 --- a/yql/essentials/udfs/common/string/test/cases/Find.sql +++ b/yql/essentials/udfs/common/string/test/cases/Find.sql @@ -6,6 +6,10 @@ SELECT      String::StartsWith(value, "as") AS starts,      String::HasSuffix(value, "as") AS suffix,      String::EndsWith(value, "as") AS ends, +    String::HasPrefixIgnoreCase(value, "AS") AS icprefix, +    String::StartsWithIgnoreCase(value, "AS") AS icstarts, +    String::HasSuffixIgnoreCase(value, "AS") AS icsuffix, +    String::EndsWithIgnoreCase(value, "AS") AS icends,      String::Find(value, "as") AS find,      String::ReverseFind(value, "as") AS rfind,      String::LevensteinDistance(value, "as") AS levenstein  | 
