diff options
| author | atarasov5 <[email protected]> | 2025-05-13 12:28:18 +0300 |
|---|---|---|
| committer | atarasov5 <[email protected]> | 2025-05-13 12:45:42 +0300 |
| commit | 4c2a6c359aaf5e8cc986d7733a46fa7aa709d8ce (patch) | |
| tree | 6958a07a11cf8ed8473059ed445e22a8e2d8ab84 /yql/essentials | |
| parent | ec46fad581b7467a2ddbf3328e344ae30fe505c0 (diff) | |
YQL-19748: Add ReverseBytes and ReverseBits
commit_hash:8ea4f04ae478bcbe0bb1d77cf5d6d1575bbc16f1
Diffstat (limited to 'yql/essentials')
24 files changed, 409 insertions, 34 deletions
diff --git a/yql/essentials/docs/en/changelog/2025.02.md b/yql/essentials/docs/en/changelog/2025.02.md index 54b7f5c0587..cb6040a988b 100644 --- a/yql/essentials/docs/en/changelog/2025.02.md +++ b/yql/essentials/docs/en/changelog/2025.02.md @@ -32,3 +32,4 @@ Removed String::Reverse function, use Unicode::Reverse. Removed String::HasPrefixIgnoreCase and String::StartsWithIgnoreCase functions, use String::AsciiStartsWithIgnoreCase. Removed String::HasSuffixIgnoreCase and String::EndsWithIgnoreCase functions, use String::AsciiEndsWithIgnoreCase. Added String::AsciiContainsIgnoreCase and String::AsciiEqualsIgnoreCase functions. +Added String::ReverseBytes and String::ReverseBits functions. diff --git a/yql/essentials/docs/en/udf/list/string.md b/yql/essentials/docs/en/udf/list/string.md index c010814d870..6b8e000c765 100644 --- a/yql/essentials/docs/en/udf/list/string.md +++ b/yql/essentials/docs/en/udf/list/string.md @@ -98,6 +98,12 @@ Functions for ASCII strings: * `String::RemoveLast(String{Flags:AutoMap}, String) -> String`: An unordered set of characters in the second argument, only the last encountered character from the set is deleted +* `String::ReverseBytes(String{Flags:AutoMap}) -> String` - Added in the version [2025.02](../../changelog/2025.02.md#string-module) + Reverses a string, treating it as a byte sequence. + +* `String::ReverseBits(String{Flags:AutoMap}) -> String` - Added in the version [2025.02](../../changelog/2025.02.md#string-module) + Reverses a string, treating it as a bit sequence. + * `String::IsAscii(String{Flags:AutoMap}) -> Bool` * `String::IsAsciiSpace(String{Flags:AutoMap}) -> Bool` @@ -153,4 +159,3 @@ SELECT String::Base64Encode("YQL"); -- "WVFM" SELECT String::Strip("YQL "); -- "YQL" SELECT String::SplitToList("1,2,3,4,5,6,7", ",", 3 as Limit); -- ["1", "2", "3", "4,5,6,7"] ``` - diff --git a/yql/essentials/docs/ru/changelog/2025.02.md b/yql/essentials/docs/ru/changelog/2025.02.md index ba5f08c8146..6fc85009362 100644 --- a/yql/essentials/docs/ru/changelog/2025.02.md +++ b/yql/essentials/docs/ru/changelog/2025.02.md @@ -32,3 +32,4 @@ SELECT foo_new, ... WHERE foo = 1 GROUP BY expr AS foo_new Удалены функции String::HasPrefixIgnoreCase и String::StartsWithIgnoreCase, используйте String::AsciiStartsWithIgnoreCase. Удалены функции String::HasSuffixIgnoreCase и String::EndsWithIgnoreCase, используйте String::AsciiEndsWithIgnoreCase. Добавлены функции String::AsciiContainsIgnoreCase и String::AsciiEqualsIgnoreCase. +Добавлены функции String::ReverseBytes и String::ReverseBits. diff --git a/yql/essentials/docs/ru/udf/list/string.md b/yql/essentials/docs/ru/udf/list/string.md index 4f41cd4b6eb..38152a483e1 100644 --- a/yql/essentials/docs/ru/udf/list/string.md +++ b/yql/essentials/docs/ru/udf/list/string.md @@ -116,6 +116,12 @@ SELECT String::SplitToList("1,2,3,4,5,6,7", ",", 3 as Limit); -- ["1", "2", "3", Удаляют все/первое/последнее вхождения(е) символа в наборе `symbols` из `input`. Второй аргумент интерпретируется как неупорядоченный набор символов для удаления. +* `String::ReverseBytes(input:String{Flags:AutoMap}) -> String` - добавлена в версии [2025.02](../../changelog/2025.02.md#string-module) + Разворачивает строку, рассматривая ее как байтовую последовательность. + +* `String::ReverseBits(input:String{Flags:AutoMap}) -> String` - добавлена в версии [2025.02](../../changelog/2025.02.md#string-module) + Разворачивает строку, рассматривая ее как битовую последовательность. + * `String::IsAscii(string:String{Flags:AutoMap}) -> Bool` Проверяет, является ли строка валидной ascii последовательностью. diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp index 34b7fdc6a53..273e7617d5b 100644 --- a/yql/essentials/udfs/common/string/string_udf.cpp +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -25,32 +25,50 @@ #include <util/string/subst.h> #include <util/string/util.h> #include <util/string/vector.h> +#include <util/generic/bitops.h> + +#include <bit> using namespace NKikimr; using namespace NUdf; namespace { -#define STRING_UDF(udfName, function) \ - BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \ - const TString input(args[0].AsStringRef()); \ - const auto& result = function(input); \ - return valueBuilder->NewString(result); \ - } \ - \ - struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ - template <typename TSink> \ - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ - const TString input(arg1.AsStringRef()); \ - const auto& result = function(input); \ - sink(TBlockItem(result)); \ - } \ - }; \ - \ - END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ +TString ReverseBytes(const TStringRef input) { + TString result; + result.ReserveAndResize(input.Size()); + for (size_t i = 0; i < input.Size(); ++i) { + result[i] = input.Data()[input.Size() - 1 - i]; + } + return result; +} +TString ReverseBits(const TStringRef input) { + TString result; + result.ReserveAndResize(input.Size()); + for (size_t i = 0; i < input.Size(); ++i) { + result[i] = std::bit_cast<char>(::ReverseBits(std::bit_cast<ui8>(input.Data()[input.Size() - 1 - i]))); + } + return result; +} + +#define STRING_UDF(udfName, function, minVersion) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, char*(TAutoMap<char*>), builder.SetMinLangVer(minVersion)) { \ + const TString input(args[0].AsStringRef()); \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } \ + \ + struct T##udfName##KernelExec: public TUnaryKernelExec<T##udfName##KernelExec> { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + const TString input(arg1.AsStringRef()); \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) // 'unsafe' udf is actually strict - it returns null on any exception #define STRING_UNSAFE_UDF(udfName, function) \ @@ -354,19 +372,21 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) -#define STRING_UDF_MAP(XX) \ - XX(Base32Encode, Base32Encode) \ - XX(Base64Encode, Base64Encode) \ - XX(Base64EncodeUrl, Base64EncodeUrl) \ - XX(EscapeC, EscapeC) \ - XX(UnescapeC, UnescapeC) \ - XX(HexEncode, HexEncode) \ - XX(EncodeHtml, EncodeHtmlPcdata) \ - XX(DecodeHtml, DecodeHtmlPcdata) \ - XX(CgiEscape, CGIEscapeRet) \ - XX(CgiUnescape, CGIUnescapeRet) \ - XX(Strip, Strip) \ - XX(Collapse, Collapse) +#define STRING_UDF_MAP(XX) \ + XX(Base32Encode, Base32Encode, NYql::UnknownLangVersion) \ + XX(Base64Encode, Base64Encode, NYql::UnknownLangVersion) \ + XX(Base64EncodeUrl, Base64EncodeUrl, NYql::UnknownLangVersion) \ + XX(EscapeC, EscapeC, NYql::UnknownLangVersion) \ + XX(UnescapeC, UnescapeC, NYql::UnknownLangVersion) \ + XX(HexEncode, HexEncode, NYql::UnknownLangVersion) \ + XX(EncodeHtml, EncodeHtmlPcdata, NYql::UnknownLangVersion) \ + XX(DecodeHtml, DecodeHtmlPcdata, NYql::UnknownLangVersion) \ + XX(CgiEscape, CGIEscapeRet, NYql::UnknownLangVersion) \ + XX(CgiUnescape, CGIUnescapeRet, NYql::UnknownLangVersion) \ + XX(Strip, Strip, NYql::UnknownLangVersion) \ + XX(Collapse, Collapse, NYql::UnknownLangVersion) \ + XX(ReverseBytes, ReverseBytes, NYql::MakeLangVersion(2025, 2)) \ + XX(ReverseBits, ReverseBits, NYql::MakeLangVersion(2025, 2)) #define STRING_UNSAFE_UDF_MAP(XX) \ XX(Base32Decode, Base32Decode) \ @@ -1011,6 +1031,6 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), TPrec, TToByteList, TFromByteList) -} + } // namespace REGISTER_MODULES(TStringModule) diff --git a/yql/essentials/udfs/common/string/test/canondata/result.json b/yql/essentials/udfs/common/string/test/canondata/result.json index e47e0bc7c64..aff883c405e 100644 --- a/yql/essentials/udfs/common/string/test/canondata/result.json +++ b/yql/essentials/udfs/common/string/test/canondata/result.json @@ -49,6 +49,16 @@ "uri": "file://test.test_BlockReplace_/results.txt" } ], + "test.test[BlockReverseBits]": [ + { + "uri": "file://test.test_BlockReverseBits_/results.txt" + } + ], + "test.test[BlockReverseBytes]": [ + { + "uri": "file://test.test_BlockReverseBytes_/results.txt" + } + ], "test.test[BlockStreamFormat]": [ { "uri": "file://test.test_BlockStreamFormat_/results.txt" @@ -104,6 +114,26 @@ "uri": "file://test.test_Replace_/results.txt" } ], + "test.test[ReverseBits]": [ + { + "uri": "file://test.test_ReverseBits_/results.txt" + } + ], + "test.test[ReverseBits_2025_01]": [ + { + "uri": "file://test.test_ReverseBits_2025_01_/extracted" + } + ], + "test.test[ReverseBytes]": [ + { + "uri": "file://test.test_ReverseBytes_/results.txt" + } + ], + "test.test[ReverseBytes_2025_01]": [ + { + "uri": "file://test.test_ReverseBytes_2025_01_/extracted" + } + ], "test.test[Reverse_2025.02]": [ { "uri": "file://test.test_Reverse_2025.02_/extracted" diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReverseBits_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReverseBits_/results.txt new file mode 100644 index 00000000000..51574f08ed4 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReverseBits_/results.txt @@ -0,0 +1,56 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "MixAsciiAndUnicode"; + %true + ]; + [ + "Empty"; + %true + ]; + [ + "Ascii"; + %true + ]; + [ + "Unicode"; + %true + ]; + [ + "Hex"; + %true + ]; + [ + "SingleChar"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReverseBytes_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReverseBytes_/results.txt new file mode 100644 index 00000000000..d9750f7e437 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReverseBytes_/results.txt @@ -0,0 +1,60 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "MixAsciiAndUnicode"; + %true + ]; + [ + "Empty"; + %true + ]; + [ + "Ascii"; + %true + ]; + [ + "Unicode"; + %true + ]; + [ + "Hex"; + %true + ]; + [ + "Polindrome"; + %true + ]; + [ + "SingleChar"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBits_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBits_/results.txt new file mode 100644 index 00000000000..51574f08ed4 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBits_/results.txt @@ -0,0 +1,56 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "MixAsciiAndUnicode"; + %true + ]; + [ + "Empty"; + %true + ]; + [ + "Ascii"; + %true + ]; + [ + "Unicode"; + %true + ]; + [ + "Hex"; + %true + ]; + [ + "SingleChar"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBits_2025_01_/extracted b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBits_2025_01_/extracted new file mode 100644 index 00000000000..a783318ac04 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBits_2025_01_/extracted @@ -0,0 +1,11 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:2:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At tuple, At function: SqlProjectItem, At lambda + select + ^ + <tmp_path>/program.sql:<main>:4:13: Error: At function: Apply, At function: Udf + String::ReverseBits("Simple строка long enough\xfe\xff"); + ^ + <tmp_path>/program.sql:<main>:4:13: Error: UDF 'String.ReverseBits' is not available before version 2025.02 + String::ReverseBits("Simple строка long enough\xfe\xff"); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBytes_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBytes_/results.txt new file mode 100644 index 00000000000..d9750f7e437 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBytes_/results.txt @@ -0,0 +1,60 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "MixAsciiAndUnicode"; + %true + ]; + [ + "Empty"; + %true + ]; + [ + "Ascii"; + %true + ]; + [ + "Unicode"; + %true + ]; + [ + "Hex"; + %true + ]; + [ + "Polindrome"; + %true + ]; + [ + "SingleChar"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBytes_2025_01_/extracted b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBytes_2025_01_/extracted new file mode 100644 index 00000000000..16b624bccb9 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReverseBytes_2025_01_/extracted @@ -0,0 +1,11 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:2:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At tuple, At function: SqlProjectItem, At lambda + select + ^ + <tmp_path>/program.sql:<main>:4:13: Error: At function: Apply, At function: Udf + String::ReverseBytes("Simple строка long enough\xfe\xff"); + ^ + <tmp_path>/program.sql:<main>:4:13: Error: UDF 'String.ReverseBytes' is not available before version 2025.02 + String::ReverseBytes("Simple строка long enough\xfe\xff"); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReverseBits.in b/yql/essentials/udfs/common/string/test/cases/BlockReverseBits.in new file mode 100644 index 00000000000..a427e1c1c10 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockReverseBits.in @@ -0,0 +1,6 @@ +{"key"="MixAsciiAndUnicode";"subkey"="Simple строка long enough";"value"="\x16\xe6\xae\xf6v\xa6\x04\xe6v\xf66\x04\r\x0b\x5D\x0b\x7D\x0b\x01\x8bA\x8b\x81\x8b\x04\xa66\x0e\xb6\x96\xca"}; +{"key"="Empty";"subkey"="";"value"=""}; +{"key"="Ascii";"subkey"="ABCDEFG";"value"="\xe2b\xa2\x22\xc2B\x82"}; +{"key"="Unicode";"subkey"="строка";"value"="\r\x0b\x5D\x0b\x7D\x0b\x01\x8bA\x8b\x81\x8b"}; +{"key"="Hex";"subkey"="\x00\x01\x02\x03\x04\x05\x06\x07\x11\x22\x33\x53\xac\xfe\xfc";"value"="?\x7f5\xca\xccD\x88\xe0`\xa0 \xc0@\x80\x00"}; +{"key"="SingleChar";"subkey"="A";"value"="\x82"}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReverseBits.sql b/yql/essentials/udfs/common/string/test/cases/BlockReverseBits.sql new file mode 100644 index 00000000000..b89064681ae --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockReverseBits.sql @@ -0,0 +1,7 @@ +PRAGMA UseBlocks; + +SELECT + -- Use explicit comparasion instead of canonization to produce more human readable test input data. + -- Canonization of binary data produces encoded bytes. + key, + String::ReverseBits(subkey) == value FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReverseBytes.in b/yql/essentials/udfs/common/string/test/cases/BlockReverseBytes.in new file mode 100644 index 00000000000..9ffc4ea3d05 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockReverseBytes.in @@ -0,0 +1,7 @@ +{"key"="MixAsciiAndUnicode";"subkey"="Simple строка long enough";"value"="hguone gnol \xb0\xd0\xba\xd0\xbe\xd0\x80\xd1\x82\xd1\x81\xd1 elpmiS"}; +{"key"="Empty";"subkey"="";"value"=""}; +{"key"="Ascii";"subkey"="ABCDEFG";"value"="GFEDCBA"}; +{"key"="Unicode";"subkey"="строка";"value"="\xb0\xd0\xba\xd0\xbe\xd0\x80\xd1\x82\xd1\x81\xd1"}; +{"key"="Hex";"subkey"="\xff\xae\x00\x01\x02\x03\x04\x05\x06\x07";"value"="\x07\x06\x05\x04\x03\x02\x01\x00\xae\xff"}; +{"key"="Polindrome";"subkey"="radar";"value"="radar"}; +{"key"="SingleChar";"subkey"="A";"value"="A"}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReverseBytes.sql b/yql/essentials/udfs/common/string/test/cases/BlockReverseBytes.sql new file mode 100644 index 00000000000..0263ad3eec2 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockReverseBytes.sql @@ -0,0 +1,7 @@ +PRAGMA UseBlocks; + +SELECT + -- Use explicit comparasion instead of canonization to produce more human readable test input data. + -- Canonization of binary data produces encoded bytes. + key, + String::ReverseBytes(subkey) == value FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBits.in b/yql/essentials/udfs/common/string/test/cases/ReverseBits.in new file mode 100644 index 00000000000..a427e1c1c10 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBits.in @@ -0,0 +1,6 @@ +{"key"="MixAsciiAndUnicode";"subkey"="Simple строка long enough";"value"="\x16\xe6\xae\xf6v\xa6\x04\xe6v\xf66\x04\r\x0b\x5D\x0b\x7D\x0b\x01\x8bA\x8b\x81\x8b\x04\xa66\x0e\xb6\x96\xca"}; +{"key"="Empty";"subkey"="";"value"=""}; +{"key"="Ascii";"subkey"="ABCDEFG";"value"="\xe2b\xa2\x22\xc2B\x82"}; +{"key"="Unicode";"subkey"="строка";"value"="\r\x0b\x5D\x0b\x7D\x0b\x01\x8bA\x8b\x81\x8b"}; +{"key"="Hex";"subkey"="\x00\x01\x02\x03\x04\x05\x06\x07\x11\x22\x33\x53\xac\xfe\xfc";"value"="?\x7f5\xca\xccD\x88\xe0`\xa0 \xc0@\x80\x00"}; +{"key"="SingleChar";"subkey"="A";"value"="\x82"}; diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBits.sql b/yql/essentials/udfs/common/string/test/cases/ReverseBits.sql new file mode 100644 index 00000000000..2b970eba8aa --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBits.sql @@ -0,0 +1,5 @@ +SELECT + -- Use explicit comparasion instead of canonization to produce more human readable test input data. + -- Canonization of binary data produces encoded bytes. + key, + String::ReverseBits(subkey) == value FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBits_2025_01.cfg b/yql/essentials/udfs/common/string/test/cases/ReverseBits_2025_01.cfg new file mode 100644 index 00000000000..b92b7c1caef --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBits_2025_01.cfg @@ -0,0 +1,2 @@ +xfail +langver 2025.01 diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBits_2025_01.sql b/yql/essentials/udfs/common/string/test/cases/ReverseBits_2025_01.sql new file mode 100644 index 00000000000..8c9a8cca58b --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBits_2025_01.sql @@ -0,0 +1,2 @@ +select + String::ReverseBits("Simple строка long enough\xfe\xff"); diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBytes.in b/yql/essentials/udfs/common/string/test/cases/ReverseBytes.in new file mode 100644 index 00000000000..9ffc4ea3d05 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBytes.in @@ -0,0 +1,7 @@ +{"key"="MixAsciiAndUnicode";"subkey"="Simple строка long enough";"value"="hguone gnol \xb0\xd0\xba\xd0\xbe\xd0\x80\xd1\x82\xd1\x81\xd1 elpmiS"}; +{"key"="Empty";"subkey"="";"value"=""}; +{"key"="Ascii";"subkey"="ABCDEFG";"value"="GFEDCBA"}; +{"key"="Unicode";"subkey"="строка";"value"="\xb0\xd0\xba\xd0\xbe\xd0\x80\xd1\x82\xd1\x81\xd1"}; +{"key"="Hex";"subkey"="\xff\xae\x00\x01\x02\x03\x04\x05\x06\x07";"value"="\x07\x06\x05\x04\x03\x02\x01\x00\xae\xff"}; +{"key"="Polindrome";"subkey"="radar";"value"="radar"}; +{"key"="SingleChar";"subkey"="A";"value"="A"}; diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBytes.sql b/yql/essentials/udfs/common/string/test/cases/ReverseBytes.sql new file mode 100644 index 00000000000..81922fbaea6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBytes.sql @@ -0,0 +1,5 @@ +SELECT + -- Use explicit comparasion instead of canonization to produce more human readable test input data. + -- Canonization of binary data produces encoded bytes. + key, + String::ReverseBytes(subkey) == value FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBytes_2025_01.cfg b/yql/essentials/udfs/common/string/test/cases/ReverseBytes_2025_01.cfg new file mode 100644 index 00000000000..b92b7c1caef --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBytes_2025_01.cfg @@ -0,0 +1,2 @@ +xfail +langver 2025.01 diff --git a/yql/essentials/udfs/common/string/test/cases/ReverseBytes_2025_01.sql b/yql/essentials/udfs/common/string/test/cases/ReverseBytes_2025_01.sql new file mode 100644 index 00000000000..55efba8c2d6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReverseBytes_2025_01.sql @@ -0,0 +1,2 @@ +select + String::ReverseBytes("Simple строка long enough\xfe\xff"); |
