diff options
author | atarasov5 <[email protected]> | 2025-02-10 13:41:59 +0300 |
---|---|---|
committer | atarasov5 <[email protected]> | 2025-02-10 14:40:42 +0300 |
commit | e46bed95ee43ea70afccfa413ea7e9f9e088cc33 (patch) | |
tree | dd3f39b014a9f1aacc2c132b7fccf6b770e411b3 | |
parent | d5a7416eb3d3b6e73c97d2511781875814cb7045 (diff) |
YQL-19535: Provide block implementations for some functions
YQL-19535: Provide block operations
YQL-19535: Specify tests for blocked operations
commit_hash:032aa58fc3f44f0eba3d9b38def021178da949ce
33 files changed, 865 insertions, 245 deletions
diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json index 564c44698bf..f7aa9bc69d3 100644 --- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json @@ -588,9 +588,9 @@ ], "test.test[expr-struct_literal--Debug]": [ { - "checksum": "7013abbd2487b4c5c0783bd9d8e8773e", - "size": 582, - "uri": "https://{canondata_backend}/1942525/ede9d81525f3cde3c09402fe9435fdbba85f47bc/resource.tar.gz#test.test_expr-struct_literal--Debug_/opt.yql" + "checksum": "32fb9ad7f0ff99f13245971fde9c9e44", + "size": 607, + "uri": "https://{canondata_backend}/1600758/668d9612baf2b806cdbf57a4a5626576611cb0c8/resource.tar.gz#test.test_expr-struct_literal--Debug_/opt.yql" } ], "test.test[expr-struct_literal--Results]": [ diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json index e299c4d970d..9338973cc71 100644 --- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json @@ -15,9 +15,9 @@ ], "test.test[action-eval_capture--Debug]": [ { - "checksum": "addd79d812135465fc39c6ede76b5b00", - "size": 1065, - "uri": "https://{canondata_backend}/1925821/e00f3e167890c5f5da97383429fa618c17c22f4b/resource.tar.gz#test.test_action-eval_capture--Debug_/opt.yql" + "checksum": "11fa4fe28d1d33bfbe682131dba7ccdf", + "size": 1090, + "uri": "https://{canondata_backend}/1600758/8128a043e648302a268bf13245bc303a361f75b9/resource.tar.gz#test.test_action-eval_capture--Debug_/opt.yql" } ], "test.test[action-eval_capture--Results]": [ @@ -1302,9 +1302,9 @@ ], "test.test[udf-trivial_udf--Debug]": [ { - "checksum": "e30ef93274f818b56638089fa4a0513e", - "size": 400, - "uri": "https://{canondata_backend}/995452/57f8b127ed5fa9fae2dd5ebb0f5870d86a7fcd2f/resource.tar.gz#test.test_udf-trivial_udf--Debug_/opt.yql" + "checksum": "8a826f54ac3877f855e9d5f4039f1957", + "size": 425, + "uri": "https://{canondata_backend}/1937001/da32717675dd7b959b82585f5fe8b8f1d2542461/resource.tar.gz#test.test_udf-trivial_udf--Debug_/opt.yql" } ], "test.test[udf-trivial_udf--Results]": [ diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json index 97c6fb14dd7..97562f4708a 100644 --- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json @@ -281,9 +281,9 @@ ], "test.test[binding-compact_named_with_subq_contexts--Debug]": [ { - "checksum": "6ae122590fa1a3740afe06c807082844", - "size": 1096, - "uri": "https://{canondata_backend}/1925821/db505909f0fb5dcb9a1c2635b652923e2e5d33c8/resource.tar.gz#test.test_binding-compact_named_with_subq_contexts--Debug_/opt.yql" + "checksum": "5fbd0bbbfed9dceb14486930c58f6d2a", + "size": 1135, + "uri": "https://{canondata_backend}/1889210/9d6331356a8b5731f25d9bf2d510824a0256baa0/resource.tar.gz#test.test_binding-compact_named_with_subq_contexts--Debug_/opt.yql" } ], "test.test[binding-compact_named_with_subq_contexts--Results]": [ diff --git a/yql/essentials/tests/sql/minirun/part5/canondata/result.json b/yql/essentials/tests/sql/minirun/part5/canondata/result.json index 148c9d957ee..d109287d0f3 100644 --- a/yql/essentials/tests/sql/minirun/part5/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part5/canondata/result.json @@ -1,9 +1,9 @@ { "test.test[action-action_udf_args--Debug]": [ { - "checksum": "8f84413764bb8e1f2b44fbc31956d7ec", - "size": 430, - "uri": "https://{canondata_backend}/1925821/6007882aec2e7b1330cc057157b466b121eec1eb/resource.tar.gz#test.test_action-action_udf_args--Debug_/opt.yql" + "checksum": "460a745ac85e95986996b9d1aa9379ae", + "size": 455, + "uri": "https://{canondata_backend}/1809005/41147930b57b9f7a31e613bdd3a9f9eaef9009f6/resource.tar.gz#test.test_action-action_udf_args--Debug_/opt.yql" } ], "test.test[action-action_udf_args--Results]": [ @@ -1091,9 +1091,9 @@ ], "test.test[library-library_udf--Debug]": [ { - "checksum": "e30ef93274f818b56638089fa4a0513e", - "size": 400, - "uri": "https://{canondata_backend}/1942100/1466d7e49a6dc5a8df761a5ac92539095e1a14a0/resource.tar.gz#test.test_library-library_udf--Debug_/opt.yql" + "checksum": "8a826f54ac3877f855e9d5f4039f1957", + "size": 425, + "uri": "https://{canondata_backend}/1942671/adb6336095b48aab4fed8e97a973ecc6eb2c7004/resource.tar.gz#test.test_library-library_udf--Debug_/opt.yql" } ], "test.test[library-library_udf--Results]": [ diff --git a/yql/essentials/tests/sql/minirun/part8/canondata/result.json b/yql/essentials/tests/sql/minirun/part8/canondata/result.json index 99429e3c851..76dd1b302e7 100644 --- a/yql/essentials/tests/sql/minirun/part8/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part8/canondata/result.json @@ -1,9 +1,9 @@ { "test.test[action-eval_percentile-default.txt-Debug]": [ { - "checksum": "f1d59477f03e0fa8b684f5c6db6a2aca", - "size": 2173, - "uri": "https://{canondata_backend}/1925821/dbb639c652a305ac0d22d675f471bfcd73848bae/resource.tar.gz#test.test_action-eval_percentile-default.txt-Debug_/opt.yql" + "checksum": "132b473519fcf8576fbbc8a1ecdfd6bd", + "size": 2202, + "uri": "https://{canondata_backend}/1903280/9b009523486ad950a7d921352a60f1c892f4f1cc/resource.tar.gz#test.test_action-eval_percentile-default.txt-Debug_/opt.yql" } ], "test.test[action-eval_percentile-default.txt-Results]": [ diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h index 4a852a5a6f6..a16582fb4e3 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -3,6 +3,7 @@ #include <yql/essentials/public/udf/udf_allocator.h> #include <yql/essentials/public/udf/udf_helpers.h> #include <yql/essentials/utils/utf8.h> +#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h> #include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h> #include <library/cpp/unicode/normalization/normalization.h> @@ -24,6 +25,9 @@ using namespace NUdf; using namespace NUnicode; namespace { + inline constexpr bool IsAscii(wchar32 c) noexcept { + return ::IsAscii(c); + } template <class It> struct TIsUnicodeSpaceAdapter { @@ -37,51 +41,144 @@ namespace { return {}; } -#define NORMALIZE_UDF_MAP(XX) \ - XX(Normalize, NFC) \ - XX(NormalizeNFD, NFD) \ - XX(NormalizeNFC, NFC) \ - XX(NormalizeNFKD, NFKD) \ - XX(NormalizeNFKC, NFKC) - -#define IS_CATEGORY_UDF_MAP(XX) \ - XX(IsAscii, IsAscii) \ - XX(IsSpace, IsSpace) \ - XX(IsUpper, IsUpper) \ - XX(IsLower, IsLower) \ - XX(IsDigit, IsDigit) \ - XX(IsAlpha, IsAlpha) \ - XX(IsAlnum, IsAlnum) \ - XX(IsHex, IsHexdigit) - -#define NORMALIZE_UDF(name, mode) \ - SIMPLE_UDF(T##name, TUtf8(TAutoMap<TUtf8>)) { \ - const auto& inputRef = args[0].AsStringRef(); \ - const TUtf16String& input = UTF8ToWide(inputRef.Data(), inputRef.Size()); \ - const TString& output = WideToUTF8(Normalize<mode>(input)); \ - return valueBuilder->NewString(output); \ - } + struct TNoChangesTag {}; -#define IS_CATEGORY_UDF(udfName, function) \ - SIMPLE_UDF(T##udfName, bool(TAutoMap<TUtf8>)) { \ - Y_UNUSED(valueBuilder); \ - const TStringBuf input(args[0].AsStringRef()); \ - bool result = true; \ - wchar32 rune; \ - const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); \ - const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); \ - while (cur != last) { \ - ReadUTF8CharAndAdvance(rune, cur, last); \ - if (!function(rune)) { \ - result = false; \ - break; \ - } \ - } \ - return TUnboxedValuePod(result); \ - } + template <typename TDerived> + struct TScalarOperationMixin { + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) { + Y_DEBUG_ABORT_UNLESS(IsUtf8(args[0].AsStringRef())); + auto&& executeResult = TDerived::Execute(args[0].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); + } + + private: + static TUnboxedValue ProcessResult(const IValueBuilder* builder, TString&& newString, const TUnboxedValuePod*) { + return builder->NewString(std::move(newString)); + } + + template <typename T> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, std::variant<TNoChangesTag, T> newValue, const TUnboxedValuePod* initialArg) { + if (std::holds_alternative<T>(newValue)) { + return ProcessResult(builder, std::move(std::get<T>(newValue)), initialArg); + } else { + return initialArg[0]; + } + } + + static TUnboxedValue ProcessResult(const IValueBuilder* builder, bool result, const TUnboxedValuePod*) { + Y_UNUSED(builder); + return TUnboxedValuePod(result); + } + }; + + template <typename TDerived> + struct TBlockOperationMixin { + template <typename Sync> + static void DoExecute(const TBlockItem arg, const Sync& sync) { + Y_DEBUG_ABORT_UNLESS(IsUtf8(arg.AsStringRef())); + auto&& executeResult = TDerived::Execute(arg.AsStringRef()); + TBlockItem boxedValue = ProcessResult(std::move(executeResult), arg); + sync(boxedValue); + } + + private: + static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(std::move(newString)); + } + + template <typename T> + static TBlockItem ProcessResult(const std::variant<TNoChangesTag, T>& newValue, const TBlockItem arg) { + if (std::holds_alternative<T>(newValue)) { + return ProcessResult(std::get<T>(newValue), arg); + } else { + return arg; + } + } + + static TBlockItem ProcessResult(bool result, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(result); + } + }; - NORMALIZE_UDF_MAP(NORMALIZE_UDF) - IS_CATEGORY_UDF_MAP(IS_CATEGORY_UDF) + template <typename TDerived> + struct TOperationMixin: public TBlockOperationMixin<TDerived>, public TScalarOperationMixin<TDerived> { + using TBlockOperationMixin<TDerived>::DoExecute; + using TScalarOperationMixin<TDerived>::DoExecute; + }; + + template <auto mode> + struct TNormalizeUTF8: public TOperationMixin<TNormalizeUTF8<mode>> { + static TString Execute(TStringRef arg) { + const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); + return WideToUTF8(Normalize<mode>(input)); + } + }; + + template <bool (*Function)(wchar32)> + struct TCheckAllChars: public TOperationMixin<TCheckAllChars<Function>> { + static bool Execute(TStringRef arg) { + const TStringBuf input(arg); + wchar32 rune; + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); + while (cur != last) { + ReadUTF8CharAndAdvance(rune, cur, last); + if (!static_cast<bool (*)(wchar32)>(Function)(rune)) { + return false; + } + } + return true; + } + }; + + template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> + struct TStringToStringMapper: public TOperationMixin<TStringToStringMapper<Function>> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef arg) { + if (auto wide = UTF8ToWide(arg); + static_cast<bool (*)(TUtf16String&, size_t pos, size_t count)>(Function)(wide, 0, TUtf16String::npos)) { + return WideToUTF8(std::move(wide)); + } else { + return TNoChangesTag{}; + } + } + }; + +#define DEFINE_UTF8_OPERATION(udfName, Executor, signature) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, signature) { \ + return Executor::DoExecute(valueBuilder, args); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ + template <typename TSink> \ + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, const TSink& sink) { \ + Y_UNUSED(valueBuilder); \ + Executor::DoExecute(arg1, sink); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + + DEFINE_UTF8_OPERATION(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(NormalizeNFKD, TNormalizeUTF8<NFKD>, TUtf8(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(NormalizeNFKC, TNormalizeUTF8<NFKC>, TUtf8(TAutoMap<TUtf8>)); + + DEFINE_UTF8_OPERATION(IsAscii, TCheckAllChars<IsAscii>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsSpace, TCheckAllChars<IsSpace>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsUpper, TCheckAllChars<IsUpper>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsLower, TCheckAllChars<IsLower>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsDigit, TCheckAllChars<IsDigit>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsAlpha, TCheckAllChars<IsAlpha>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsAlnum, TCheckAllChars<IsAlnum>, bool(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(IsHex, TCheckAllChars<IsHexdigit>, bool(TAutoMap<TUtf8>)); + + DEFINE_UTF8_OPERATION(ToTitle, TStringToStringMapper<ToTitle>, TUtf8(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(ToUpper, TStringToStringMapper<ToUpper>, TUtf8(TAutoMap<TUtf8>)); + DEFINE_UTF8_OPERATION(ToLower, TStringToStringMapper<ToLower>, TUtf8(TAutoMap<TUtf8>)); SIMPLE_UDF(TIsUtf, bool(TOptional<char*>)) { Y_UNUSED(valueBuilder); @@ -461,27 +558,6 @@ namespace { return valueBuilder->NewString(WideToUTF8(wide)); } - SIMPLE_UDF(TToLower, TUtf8(TAutoMap<TUtf8>)) { - if (auto wide = UTF8ToWide(args->AsStringRef()); ToLower(wide)) - return valueBuilder->NewString(WideToUTF8(wide)); - else - return *args; - } - - SIMPLE_UDF(TToUpper, TUtf8(TAutoMap<TUtf8>)) { - if (auto wide = UTF8ToWide(args->AsStringRef()); ToUpper(wide)) - return valueBuilder->NewString(WideToUTF8(wide)); - else - return *args; - } - - SIMPLE_UDF(TToTitle, TUtf8(TAutoMap<TUtf8>)) { - if (auto wide = UTF8ToWide(args->AsStringRef()); ToTitle(wide)) - return valueBuilder->NewString(WideToUTF8(wide)); - else - return *args; - } - SIMPLE_UDF(TStrip, TUtf8(TAutoMap<TUtf8>)) { const TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef()); const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); @@ -512,33 +588,42 @@ namespace { return TUnboxedValuePod(result); } -#define REGISTER_NORMALIZE_UDF(name, mode) T##name, -#define REGISTER_IS_CATEGORY_UDF(name, function) T##name, #define EXPORTED_UNICODE_BASE_UDF \ - NORMALIZE_UDF_MAP(REGISTER_NORMALIZE_UDF) \ - IS_CATEGORY_UDF_MAP(REGISTER_IS_CATEGORY_UDF) \ - TIsUtf, \ - TGetLength, \ - TSubstring, \ - TFind, \ - TRFind, \ - TSplitToList, \ - TJoinFromList, \ - TLevensteinDistance, \ - TReplaceAll, \ - TReplaceFirst, \ - TReplaceLast, \ - TRemoveAll, \ - TRemoveFirst, \ - TRemoveLast, \ - TToCodePointList, \ - TFromCodePointList, \ - TReverse, \ - TToLower, \ - TToUpper, \ - TToTitle, \ - TToUint64, \ - TTryToUint64, \ - TStrip, \ - TIsUnicodeSet + TIsUtf, \ + TGetLength, \ + TSubstring, \ + TFind, \ + TRFind, \ + TSplitToList, \ + TJoinFromList, \ + TLevensteinDistance, \ + TReplaceAll, \ + TReplaceFirst, \ + TReplaceLast, \ + TRemoveAll, \ + TRemoveFirst, \ + TRemoveLast, \ + TToCodePointList, \ + TFromCodePointList, \ + TReverse, \ + TToLower, \ + TToUpper, \ + TToTitle, \ + TToUint64, \ + TTryToUint64, \ + TStrip, \ + TIsUnicodeSet, \ + TNormalize, \ + TNormalizeNFD, \ + TNormalizeNFC, \ + TNormalizeNFKD, \ + TNormalizeNFKC, \ + TIsAscii, \ + TIsSpace, \ + TIsUpper, \ + TIsLower, \ + TIsDigit, \ + TIsAlpha, \ + TIsAlnum, \ + TIsHex } diff --git a/yql/essentials/udfs/common/unicode_base/lib/ya.make b/yql/essentials/udfs/common/unicode_base/lib/ya.make index f50858d02ae..2fda0829667 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/ya.make +++ b/yql/essentials/udfs/common/unicode_base/lib/ya.make @@ -2,7 +2,7 @@ LIBRARY() YQL_ABI_VERSION( 2 - 27 + 37 0 ) @@ -16,6 +16,7 @@ PEERDIR( library/cpp/unicode/normalization library/cpp/unicode/set yql/essentials/public/udf + yql/essentials/public/udf/arrow yql/essentials/utils ) diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json index 8d19afc4281..8189dd16e08 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json @@ -1,4 +1,19 @@ { + "test.test[BlockIsCategory]": [ + { + "uri": "file://test.test_BlockIsCategory_/results.txt" + } + ], + "test.test[BlockNormalize]": [ + { + "uri": "file://test.test_BlockNormalize_/results.txt" + } + ], + "test.test[BlockTo]": [ + { + "uri": "file://test.test_BlockTo_/results.txt" + } + ], "test.test[Find]": [ { "uri": "file://test.test_Find_/results.txt" @@ -19,6 +34,11 @@ "uri": "file://test.test_List_/results.txt" } ], + "test.test[Normalize]": [ + { + "uri": "file://test.test_Normalize_/results.txt" + } + ], "test.test[Remove]": [ { "uri": "file://test.test_Remove_/results.txt" diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockIsCategory_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockIsCategory_/results.txt new file mode 100644 index 00000000000..e95de9fe1d2 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockIsCategory_/results.txt @@ -0,0 +1,160 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column7"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column8"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column9"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0F3A4E"; + %true; + %false; + %false; + %false; + %false; + %false; + %true; + %true; + %false + ]; + [ + "\xD0\xB2\xD0\x92\xD0\xB0\xD0\x92\xD1\x8B\xD0\xB0"; + %false; + %false; + %false; + %false; + %false; + %true; + %true; + %false; + %false + ]; + [ + "\xD1\x84\xD1\x8B\xD0\xB2"; + %false; + %false; + %false; + %true; + %false; + %true; + %true; + %false; + %false + ]; + [ + "1234"; + %true; + %false; + %false; + %false; + %true; + %false; + %true; + %true; + %false + ]; + [ + "\xD0\xB2\xD1\2132\xD0\xB2-\xD0\xB0"; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + "\xD0\xB2\xD1\x8B\xD0\2601-!\xD1\x8B\xD0\xB2"; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockNormalize_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockNormalize_/results.txt new file mode 100644 index 00000000000..2fc20b07f1e --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockNormalize_/results.txt @@ -0,0 +1,92 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfd"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfc"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfkd"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfkc"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "\xC3\xA9"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9" + ]; + [ + "e\xCC\x81"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9" + ]; + [ + "\xC2\xB5"; + "\xC2\xB5"; + "\xC2\xB5"; + "\xC2\xB5"; + "\xCE\xBC"; + "\xCE\xBC" + ]; + [ + "\xE2\x84\x8C"; + "\xE2\x84\x8C"; + "\xE2\x84\x8C"; + "\xE2\x84\x8C"; + "H"; + "H" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTo_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTo_/results.txt new file mode 100644 index 00000000000..7f7b2525d78 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTo_/results.txt @@ -0,0 +1,102 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "lower"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "upper"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "title"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "reverse"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "test"; + "test"; + "TEST"; + "Test"; + "tset" + ]; + [ + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"; + "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD1\x81\xD0\xB5\xD1\x82" + ]; + [ + "TeSt"; + "test"; + "TEST"; + "Test"; + "tSeT" + ]; + [ + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"; + "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"; + "\xD0\xA2\xD1\x81\xD0\x95\xD1\x82" + ]; + [ + "Eyl\xC3\xBCl"; + "eyl\xC3\xBCl"; + "EYL\xC3\x9CL"; + "Eyl\xC3\xBCl"; + "l\xC3\xBClyE" + ]; + [ + "6"; + "6"; + "6"; + "6"; + "6" + ]; + [ + ""; + ""; + ""; + ""; + "" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt index a6fd861c645..e95de9fe1d2 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt @@ -8,10 +8,10 @@ "StructType"; [ [ - "column0"; + "value"; [ "DataType"; - "Bool" + "Utf8" ] ]; [ @@ -76,85 +76,81 @@ "DataType"; "Bool" ] - ]; - [ - "column10"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column11"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column12"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column13"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column14"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column15"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column16"; - [ - "DataType"; - "Bool" - ] - ]; - [ - "column17"; - [ - "DataType"; - "Bool" - ] ] ] ] ]; "Data" = [ [ + "0F3A4E"; %true; %false; - %true; %false; + %false; + %false; + %false; + %true; %true; + %false + ]; + [ + "\xD0\xB2\xD0\x92\xD0\xB0\xD0\x92\xD1\x8B\xD0\xB0"; + %false; + %false; %false; + %false; + %false; + %true; %true; %false; + %false + ]; + [ + "\xD1\x84\xD1\x8B\xD0\xB2"; + %false; + %false; + %false; %true; %false; %true; + %true; %false; + %false + ]; + [ + "1234"; %true; %false; + %false; + %false; %true; %false; %true; + %true; + %false + ]; + [ + "\xD0\xB2\xD1\2132\xD0\xB2-\xD0\xB0"; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + "\xD0\xB2\xD1\x8B\xD0\2601-!\xD1\x8B\xD0\xB2"; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false; %false ] ] diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Normalize_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Normalize_/results.txt new file mode 100644 index 00000000000..2fc20b07f1e --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Normalize_/results.txt @@ -0,0 +1,92 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfd"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfc"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfkd"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "normalize_nfkc"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "\xC3\xA9"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9" + ]; + [ + "e\xCC\x81"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9"; + "e\xCC\x81"; + "\xC3\xA9" + ]; + [ + "\xC2\xB5"; + "\xC2\xB5"; + "\xC2\xB5"; + "\xC2\xB5"; + "\xCE\xBC"; + "\xCE\xBC" + ]; + [ + "\xE2\x84\x8C"; + "\xE2\x84\x8C"; + "\xE2\x84\x8C"; + "\xE2\x84\x8C"; + "H"; + "H" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt index 465ad350553..502cea3fd0f 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt @@ -18,16 +18,6 @@ ] ]; [ - "normalize"; - [ - "OptionalType"; - [ - "DataType"; - "Utf8" - ] - ] - ]; - [ "is"; [ "DataType"; @@ -175,9 +165,6 @@ [ "Eyl\xC3\xBCl" ]; - [ - "Eyl\xC3\xBCl" - ]; %true; [ "5" @@ -221,9 +208,6 @@ [ "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" ]; - [ - "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" - ]; %true; [ "6" @@ -268,9 +252,6 @@ [ "\xC3\xBAnora" ]; - [ - "\xC3\xBAnora" - ]; %true; [ "5" @@ -314,9 +295,6 @@ [ "Ci\xD1\x87 Ci\xD1\x87" ]; - [ - "Ci\xD1\x87 Ci\xD1\x87" - ]; %true; [ "7" @@ -362,9 +340,6 @@ [ "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" ]; - [ - "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" - ]; %true; [ "13" @@ -424,9 +399,6 @@ [ "6" ]; - [ - "6" - ]; %true; [ "1" @@ -466,9 +438,6 @@ [ "" ]; - [ - "" - ]; %true; [ "0" diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.in new file mode 100644 index 00000000000..4aba89386b4 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.in @@ -0,0 +1,6 @@ +{"key"="1";"value"="0F3A4E"}; +{"key"="2";"value"="вВаВыа"}; +{"key"="3";"value"="фыв"}; +{"key"="4";"value"="1234"}; +{"key"="5";"value"="вы2в-а"}; +{"key"="6";"value"="выа1-!ыв"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.sql new file mode 100644 index 00000000000..3a2b3d0c214 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockIsCategory.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::IsAscii(value), + Unicode::IsSpace(value), + Unicode::IsUpper(value), + Unicode::IsLower(value), + Unicode::IsDigit(value), + Unicode::IsAlpha(value), + Unicode::IsAlnum(value), + Unicode::IsHex(value), + Unicode::IsUnicodeSet(value, "[вао]"u) +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.in new file mode 100644 index 00000000000..2e56f171a4b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.in @@ -0,0 +1,4 @@ +{"key"="1";"value"="\xC3\xA9"}; +{"key"="2";"value"="e\xCC\x81"}; +{"key"="3";"value"="\xC2\xB5"}; +{"key"="4";"value"="\xE2\x84\x8C"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.sql new file mode 100644 index 00000000000..c0e063acd6b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockNormalize.sql @@ -0,0 +1,13 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value AS value, + Unicode::Normalize(value) AS normalize, + Unicode::NormalizeNFD(value) AS normalize_nfd, + Unicode::NormalizeNFC(value) AS normalize_nfc, + Unicode::NormalizeNFKD(value) AS normalize_nfkd, + Unicode::NormalizeNFKC(value) AS normalize_nfkc +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.in new file mode 100644 index 00000000000..82d72f16711 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.in @@ -0,0 +1,7 @@ +{"key"="1";"value"="test"}; +{"key"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"}; +{"key"="3";"value"="TeSt"}; +{"key"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"}; +{"key"="5";"value"="Eyl\xC3\xBCl"}; +{"key"="6";"value"="6"}; +{"key"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.sql new file mode 100644 index 00000000000..a4d546ca6dd --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTo.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value, + Unicode::ToLower(value) AS lower, + Unicode::ToUpper(value) AS upper, + Unicode::ToTitle(value) AS title, + Unicode::Reverse(value) AS reverse, +FROM Input; + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.in b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.in new file mode 100644 index 00000000000..4aba89386b4 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.in @@ -0,0 +1,6 @@ +{"key"="1";"value"="0F3A4E"}; +{"key"="2";"value"="вВаВыа"}; +{"key"="3";"value"="фыв"}; +{"key"="4";"value"="1234"}; +{"key"="5";"value"="вы2в-а"}; +{"key"="6";"value"="выа1-!ыв"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql index 2effa23221e..bd933f911bf 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql @@ -1,21 +1,13 @@ /* syntax version 1 */ SELECT - Unicode::IsAscii("sdf"u), - Unicode::IsAscii("выавыа"u), - Unicode::IsSpace(" \u2002\u200a"u), - Unicode::IsSpace("выавыа"u), - Unicode::IsUpper("ФЫВ"u), - Unicode::IsUpper("вВаВыа"u), - Unicode::IsLower("фыв"u), - Unicode::IsLower("вВаВыа"u), - Unicode::IsDigit("1234"u), - Unicode::IsDigit("выавыа"u), - Unicode::IsAlpha("фвфы"u), - Unicode::IsAlpha("вы2в-а"u), - Unicode::IsAlnum("фыв13в"u), - Unicode::IsAlnum("выа1-}ыв"u), - Unicode::IsHex("0F3A4E"u), - Unicode::IsHex("ваоао"u), - Unicode::IsUnicodeSet("ваоао"u, "[вао]"u), - Unicode::IsUnicodeSet("ваоао"u, "[ваб]"u) - + value as value, + Unicode::IsAscii(value), + Unicode::IsSpace(value), + Unicode::IsUpper(value), + Unicode::IsLower(value), + Unicode::IsDigit(value), + Unicode::IsAlpha(value), + Unicode::IsAlnum(value), + Unicode::IsHex(value), + Unicode::IsUnicodeSet(value, "[вао]"u) +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.in b/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.in new file mode 100644 index 00000000000..2e56f171a4b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.in @@ -0,0 +1,4 @@ +{"key"="1";"value"="\xC3\xA9"}; +{"key"="2";"value"="e\xCC\x81"}; +{"key"="3";"value"="\xC2\xB5"}; +{"key"="4";"value"="\xE2\x84\x8C"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.sql new file mode 100644 index 00000000000..c0c8b053894 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Normalize.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +SELECT + value AS value, + Unicode::Normalize(value) AS normalize, + Unicode::NormalizeNFD(value) AS normalize_nfd, + Unicode::NormalizeNFC(value) AS normalize_nfc, + Unicode::NormalizeNFKD(value) AS normalize_nfkd, + Unicode::NormalizeNFKC(value) AS normalize_nfkc +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.in b/yql/essentials/udfs/common/unicode_base/test/cases/To.in index 5effdb9971b..82d72f16711 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/To.in +++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.in @@ -1,8 +1,7 @@ -{"key"="1";"subkey"="1";"value"="test"}; -{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"}; -{"key"="3";"subkey"="3";"value"="TeSt"}; -{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"}; -{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"}; -{"key"="6";"subkey"="6";"value"="6"}; -{"key"="4";"subkey"="4";"value"=""}; - +{"key"="1";"value"="test"}; +{"key"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"}; +{"key"="3";"value"="TeSt"}; +{"key"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"}; +{"key"="5";"value"="Eyl\xC3\xBCl"}; +{"key"="6";"value"="6"}; +{"key"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr index 990efb1ff2c..d5e5b2ca484 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr +++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr @@ -1,12 +1,8 @@ -{"_yql_row_spec"={ - "Type"=["StructType";[ - ["key";["DataType";"Utf8"]]; - ["subkey";["DataType";"Utf8"]]; - ["value";["DataType";"Utf8"]] - ]]; - "SortDirections"=[1;1;]; - "SortedBy"=["key";"subkey";]; - "SortedByTypes"=[["DataType";"Utf8";];["DataType";"Utf8";];]; - "SortMembers"=["key";"subkey";]; -}} - +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql index b330682b6ed..cdff12f352b 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql @@ -1,7 +1,6 @@ /* syntax version 1 */ SELECT value AS value, - Unicode::Normalize(value) AS normalize, Unicode::IsUtf(value) AS is, Unicode::GetLength(value) AS length, Unicode::Substring(value, 1) AS one_end_substring, diff --git a/yql/essentials/udfs/common/unicode_base/ya.make b/yql/essentials/udfs/common/unicode_base/ya.make index 53a8f3af45b..4ec872e2495 100644 --- a/yql/essentials/udfs/common/unicode_base/ya.make +++ b/yql/essentials/udfs/common/unicode_base/ya.make @@ -2,7 +2,7 @@ YQL_UDF_CONTRIB(unicode_udf) YQL_ABI_VERSION( 2 - 27 + 37 0 ) |