summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs/common/string
diff options
context:
space:
mode:
authorimunkin <[email protected]>2024-11-08 10:00:23 +0300
committerimunkin <[email protected]>2024-11-08 10:12:13 +0300
commita784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch)
tree05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs/common/string
parentd70137a7b530ccaa52834274913bbb5a3d1ca06e (diff)
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories: * clickhouse/client * datetime * knn * roaring commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs/common/string')
-rw-r--r--yql/essentials/udfs/common/string/string_udf.cpp926
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/result.json112
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt124
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt79
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt44
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt124
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt69
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt173
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt134
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt208
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt169
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt158
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt88
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt60
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt147
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt265
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt125
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt173
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt84
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt134
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt208
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt169
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt158
-rw-r--r--yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt294
-rw-r--r--yql/essentials/udfs/common/string/test/cases/AsciiChecks.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Decode.in4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Decode.sql6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Encode.in3
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Base32Encode.sql5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql13
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockFind.sql7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockRemove.sql16
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockReplace.sql13
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr9
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql20
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql18
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockTo.in7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/BlockTo.sql9
-rw-r--r--yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in3
-rw-r--r--yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Find.sql12
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List.sql12
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List_v0.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/List_v0.sql27
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Remove.sql14
-rw-r--r--yql/essentials/udfs/common/string/test/cases/Replace.sql11
-rw-r--r--yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql10
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StreamFormat.in5
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr9
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StreamFormat.sql19
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUDF.in4
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUDF.sql15
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in6
-rw-r--r--yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/To.in7
-rw-r--r--yql/essentials/udfs/common/string/test/cases/To.sql14
-rw-r--r--yql/essentials/udfs/common/string/test/cases/default.in6
-rw-r--r--yql/essentials/udfs/common/string/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/string/ya.make38
66 files changed, 4654 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp
new file mode 100644
index 00000000000..d621e92582d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/string_udf.cpp
@@ -0,0 +1,926 @@
+#include <yql/essentials/public/udf/udf_allocator.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+#include <yql/essentials/public/udf/udf_value_builder.h>
+
+#include <library/cpp/charset/codepage.h>
+#include <library/cpp/deprecated/split/split_iterator.h>
+#include <library/cpp/html/pcdata/pcdata.h>
+#include <library/cpp/string_utils/base32/base32.h>
+#include <library/cpp/string_utils/base64/base64.h>
+#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h>
+#include <library/cpp/string_utils/quote/quote.h>
+
+#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h>
+
+#include <util/charset/wide.h>
+#include <util/generic/vector.h>
+#include <util/stream/format.h>
+#include <util/string/ascii.h>
+#include <util/string/escape.h>
+#include <util/string/hex.h>
+#include <util/string/join.h>
+#include <util/string/reverse.h>
+#include <util/string/split.h>
+#include <util/string/strip.h>
+#include <util/string/subst.h>
+#include <util/string/util.h>
+#include <util/string/vector.h>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+
+#define STRING_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \
+ const TString input(args[0].AsStringRef()); \
+ const auto& result = function(input); \
+ return valueBuilder->NewString(result); \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ const TString input(arg1.AsStringRef()); \
+ const auto& result = function(input); \
+ sink(TBlockItem(result)); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \
+
+
+// 'unsafe' udf is actually strict - it returns null on any exception
+#define STRING_UNSAFE_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0); \
+ const TString input(args[0].AsStringRef()); \
+ try { \
+ const auto& result = function(input); \
+ return valueBuilder->NewString(result); \
+ } catch (yexception&) { \
+ return TUnboxedValue(); \
+ } \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ if (!arg1) { \
+ return sink(TBlockItem()); \
+ } \
+ \
+ const TString input(arg1.AsStringRef()); \
+ try { \
+ const auto& result = function(input); \
+ sink(TBlockItem(result)); \
+ } catch (yexception&) { \
+ return sink(TBlockItem()); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
+
+#define STROKA_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0) \
+ const TString input(args[0].AsStringRef()); \
+ try { \
+ TUtf16String wide = UTF8ToWide(input); \
+ function(wide); \
+ return valueBuilder->NewString(WideToUTF8(wide)); \
+ } catch (yexception&) { \
+ return TUnboxedValue(); \
+ } \
+ }
+
+#define STROKA_CASE_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0) \
+ const TString input(args[0].AsStringRef()); \
+ try { \
+ TUtf16String wide = UTF8ToWide(input); \
+ function(wide.begin(), wide.size()); \
+ return valueBuilder->NewString(WideToUTF8(wide)); \
+ } catch (yexception&) { \
+ return TUnboxedValue(); \
+ } \
+ }
+
+#define STROKA_ASCII_CASE_UDF(udfName, function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \
+ TString input(args[0].AsStringRef()); \
+ if (input.function()) { \
+ return valueBuilder->NewString(input); \
+ } else { \
+ return args[0]; \
+ } \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TString input(arg1.AsStringRef()); \
+ if (input.function()) { \
+ sink(TBlockItem(input)); \
+ } else { \
+ sink(arg1); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
+
+
+#define STROKA_FIND_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TString haystack(args[0].AsStringRef()); \
+ const TString needle(args[1].AsStringRef()); \
+ return TUnboxedValuePod(haystack.function(needle)); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ }
+
+#define STRING_TWO_ARGS_UDF(udfName, function) \
+ SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TString haystack(args[0].AsStringRef()); \
+ const TString needle(args[1].AsStringRef()); \
+ return TUnboxedValuePod(function(haystack, needle)); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ }
+
+#define IS_ASCII_UDF(function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \
+ Y_UNUSED(valueBuilder); \
+ if (args[0]) { \
+ const TStringBuf input(args[0].AsStringRef()); \
+ bool result = true; \
+ for (auto c : input) { \
+ if (!function(c)) { \
+ result = false; \
+ break; \
+ } \
+ } \
+ return TUnboxedValuePod(result); \
+ } else { \
+ return TUnboxedValuePod(false); \
+ } \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TUnaryKernelExec<T##function##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ if (arg1) { \
+ const TStringBuf input(arg1.AsStringRef()); \
+ bool result = true; \
+ for (auto c : input) { \
+ if (!function(c)) { \
+ result = false; \
+ break; \
+ } \
+ } \
+ sink(TBlockItem(result)); \
+ } else { \
+ sink(TBlockItem(false)); \
+ } \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+
+
+#define STRING_STREAM_PAD_FORMATTER_UDF(function) \
+ BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \
+ char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) \
+ { \
+ TStringStream result; \
+ const TStringBuf input(args[0].AsStringRef()); \
+ char paddingSymbol = ' '; \
+ if (args[2]) { \
+ if (args[2].AsStringRef().Size() != 1) { \
+ ythrow yexception() << "Not 1 symbol in paddingSymbol"; \
+ } \
+ paddingSymbol = TString(args[2].AsStringRef())[0]; \
+ } \
+ const ui64 padLen = args[1].Get<ui64>(); \
+ if (padLen > padLim) { \
+ ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \
+ } \
+ result << function(input, padLen, paddingSymbol); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TGenericKernelExec<T##function##KernelExec, 3> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \
+ TStringStream result; \
+ const TStringBuf input(args.GetElement(0).AsStringRef()); \
+ char paddingSymbol = ' '; \
+ if (args.GetElement(2)) { \
+ if (args.GetElement(2).AsStringRef().Size() != 1) { \
+ ythrow yexception() << "Not 1 symbol in paddingSymbol"; \
+ } \
+ paddingSymbol = TString(args.GetElement(2).AsStringRef())[0]; \
+ } \
+ const ui64 padLen = args.GetElement(1).Get<ui64>(); \
+ if (padLen > padLim) { \
+ ythrow yexception() << "Padding length (" << padLen \
+ << ") exceeds maximum: " << padLim; \
+ } \
+ result << function(input, padLen, paddingSymbol); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<argType>)) { \
+ TStringStream result; \
+ result << function(args[0].Get<argType>()); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TUnaryKernelExec<T##function##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TStringStream result; \
+ result << function(arg1.Get<argType>()); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<char*>)) { \
+ TStringStream result; \
+ const TStringBuf input(args[0].AsStringRef()); \
+ result << function(input); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##function##KernelExec \
+ : public TUnaryKernelExec<T##function##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TStringStream result; \
+ const TStringBuf input(arg1.AsStringRef()); \
+ result << function(input); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do)
+
+
+#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<ui64>)) { \
+ TStringStream result; \
+ result << HumanReadableSize(args[0].Get<ui64>(), hrSize); \
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \
+ } \
+ \
+ struct T##udfName##KernelExec \
+ : public TUnaryKernelExec<T##udfName##KernelExec> \
+ { \
+ template <typename TSink> \
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \
+ TStringStream result; \
+ result << HumanReadableSize(arg1.Get<ui64>(), hrSize); \
+ sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \
+ } \
+ }; \
+ \
+ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
+
+#define STRING_UDF_MAP(XX) \
+ XX(Base32Encode, Base32Encode) \
+ XX(Base64Encode, Base64Encode) \
+ XX(Base64EncodeUrl, Base64EncodeUrl) \
+ XX(EscapeC, EscapeC) \
+ XX(UnescapeC, UnescapeC) \
+ XX(HexEncode, HexEncode) \
+ XX(EncodeHtml, EncodeHtmlPcdata) \
+ XX(DecodeHtml, DecodeHtmlPcdata) \
+ XX(CgiEscape, CGIEscapeRet) \
+ XX(CgiUnescape, CGIUnescapeRet) \
+ XX(Strip, Strip) \
+ XX(Collapse, Collapse)
+
+#define STRING_UNSAFE_UDF_MAP(XX) \
+ XX(Base32Decode, Base32Decode) \
+ XX(Base32StrictDecode, Base32StrictDecode) \
+ XX(Base64Decode, Base64Decode) \
+ XX(Base64StrictDecode, Base64StrictDecode) \
+ XX(HexDecode, HexDecode)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STROKA_CASE_UDF provides only the scalar
+// one at the moment.
+#define STROKA_CASE_UDF_MAP(XX) \
+ XX(ToLower, ToLower) \
+ XX(ToUpper, ToUpper) \
+ XX(ToTitle, ToTitle)
+
+#define STROKA_ASCII_CASE_UDF_MAP(XX) \
+ XX(AsciiToLower, to_lower) \
+ XX(AsciiToUpper, to_upper) \
+ XX(AsciiToTitle, to_title)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STROKA_FIND_UDF provides only the scalar
+// one at the moment.
+#define STROKA_FIND_UDF_MAP(XX) \
+ XX(StartsWith, StartsWith) \
+ XX(EndsWith, EndsWith) \
+ XX(HasPrefix, StartsWith) \
+ XX(HasSuffix, EndsWith)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the
+// scalar one at the moment.
+#define STRING_TWO_ARGS_UDF_MAP(XX) \
+ XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
+ XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
+ XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \
+ XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase)
+
+// NOTE: The functions below are marked as deprecated, so block implementation
+// is not required for them. Hence, STROKA_UDF provides only the scalar one at
+// the moment.
+#define STROKA_UDF_MAP(XX) \
+ XX(Reverse, ReverseInPlace)
+
+#define IS_ASCII_UDF_MAP(XX) \
+ XX(IsAscii) \
+ XX(IsAsciiSpace) \
+ XX(IsAsciiUpper) \
+ XX(IsAsciiLower) \
+ XX(IsAsciiDigit) \
+ XX(IsAsciiAlpha) \
+ XX(IsAsciiAlnum) \
+ XX(IsAsciiHex)
+
+#define STRING_STREAM_PAD_FORMATTER_UDF_MAP(XX) \
+ XX(LeftPad) \
+ XX(RightPad)
+
+#define STRING_STREAM_NUM_FORMATTER_UDF_MAP(XX) \
+ XX(Hex, ui64) \
+ XX(SHex, i64) \
+ XX(Bin, ui64) \
+ XX(SBin, i64)
+
+#define STRING_STREAM_TEXT_FORMATTER_UDF_MAP(XX) \
+ XX(HexText) \
+ XX(BinText)
+
+#define STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(XX) \
+ XX(HumanReadableQuantity, SF_QUANTITY) \
+ XX(HumanReadableBytes, SF_BYTES)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) {
+ TString input(args[0].AsStringRef());
+ ui64 maxLength = args[1].Get<ui64>();
+ CollapseText(input, maxLength);
+ return valueBuilder->NewString(input);
+ }
+
+ struct TCollapseTextKernelExec
+ : public TBinaryKernelExec<TCollapseTextKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ TString input(arg1.AsStringRef());
+ ui64 maxLength = arg2.Get<ui64>();
+ CollapseText(input, maxLength);
+ return sink(TBlockItem(input));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do);
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) {
+ Y_UNUSED(valueBuilder);
+ if (!args[0])
+ return TUnboxedValuePod(false);
+
+ const TString haystack(args[0].AsStringRef());
+ const TString needle(args[1].AsStringRef());
+ return TUnboxedValuePod(haystack.Contains(needle));
+ }
+
+ struct TContainsKernelExec : public TBinaryKernelExec<TContainsKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ if (!arg1)
+ return sink(TBlockItem(false));
+
+ const TString haystack(arg1.AsStringRef());
+ const TString needle(arg2.AsStringRef());
+ sink(TBlockItem(haystack.Contains(needle)));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do);
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) {
+ if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef()))
+ return valueBuilder->NewString(result);
+ else
+ return args[0];
+ }
+
+ struct TReplaceAllKernelExec
+ : public TGenericKernelExec<TReplaceAllKernelExec, 3>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) {
+ TString result(args.GetElement(0).AsStringRef());
+ const TStringBuf what(args.GetElement(1).AsStringRef());
+ const TStringBuf with(args.GetElement(2).AsStringRef());
+ if (SubstGlobal(result, what, with)) {
+ return sink(TBlockItem(result));
+ } else {
+ return sink(args.GetElement(0));
+ }
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) {
+ std::string result(args[0].AsStringRef());
+ const std::string_view what(args[1].AsStringRef());
+ if (const auto index = result.find(what); index != std::string::npos) {
+ result.replace(index, what.size(), std::string_view(args[2].AsStringRef()));
+ return valueBuilder->NewString(result);
+ }
+ return args[0];
+ }
+
+ struct TReplaceFirstKernelExec
+ : public TGenericKernelExec<TReplaceFirstKernelExec, 3>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) {
+ std::string result(args.GetElement(0).AsStringRef());
+ const std::string_view what(args.GetElement(1).AsStringRef());
+ const std::string_view with(args.GetElement(2).AsStringRef());
+ if (const auto index = result.find(what); index != std::string::npos) {
+ result.replace(index, what.size(), with);
+ return sink(TBlockItem(result));
+ }
+ return sink(args.GetElement(0));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) {
+ std::string result(args[0].AsStringRef());
+ const std::string_view what(args[1].AsStringRef());
+ if (const auto index = result.rfind(what); index != std::string::npos) {
+ result.replace(index, what.size(), std::string_view(args[2].AsStringRef()));
+ return valueBuilder->NewString(result);
+ }
+ return args[0];
+ }
+
+ struct TReplaceLastKernelExec
+ : public TGenericKernelExec<TReplaceLastKernelExec, 3>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) {
+ std::string result(args.GetElement(0).AsStringRef());
+ const std::string_view what(args.GetElement(1).AsStringRef());
+ const std::string_view with(args.GetElement(2).AsStringRef());
+ if (const auto index = result.rfind(what); index != std::string::npos) {
+ result.replace(index, what.size(), with);
+ return sink(TBlockItem(result));
+ }
+ return sink(args.GetElement(0));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) {
+ std::string input(args[0].AsStringRef());
+ const std::string_view remove(args[1].AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ size_t tpos = 0;
+ for (const ui8 c : input) {
+ if (!chars[c]) {
+ input[tpos++] = c;
+ }
+ }
+ if (tpos != input.size()) {
+ input.resize(tpos);
+ return valueBuilder->NewString(input);
+ }
+ return args[0];
+ }
+
+ struct TRemoveAllKernelExec
+ : public TBinaryKernelExec<TRemoveAllKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ std::string input(arg1.AsStringRef());
+ const std::string_view remove(arg2.AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ size_t tpos = 0;
+ for (const ui8 c : input) {
+ if (!chars[c]) {
+ input[tpos++] = c;
+ }
+ }
+ if (tpos != input.size()) {
+ input.resize(tpos);
+ return sink(TBlockItem(input));
+ }
+ sink(arg1);
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) {
+ std::string input(args[0].AsStringRef());
+ const std::string_view remove(args[1].AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.cbegin(); it != input.cend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(it);
+ return valueBuilder->NewString(input);
+ }
+ }
+ return args[0];
+ }
+
+ struct TRemoveFirstKernelExec
+ : public TBinaryKernelExec<TRemoveFirstKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ std::string input(arg1.AsStringRef());
+ const std::string_view remove(arg2.AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.cbegin(); it != input.cend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(it);
+ return sink(TBlockItem(input));
+ }
+ }
+ sink(arg1);
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) {
+ std::string input(args[0].AsStringRef());
+ const std::string_view remove(args[1].AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.crbegin(); it != input.crend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(input.crend() - it - 1, 1);
+ return valueBuilder->NewString(input);
+ }
+ }
+ return args[0];
+ }
+
+ struct TRemoveLastKernelExec
+ : public TBinaryKernelExec<TRemoveLastKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ std::string input(arg1.AsStringRef());
+ const std::string_view remove(arg2.AsStringRef());
+ std::array<bool, 256> chars{};
+ for (const ui8 c : remove) {
+ chars[c] = true;
+ }
+ for (auto it = input.crbegin(); it != input.crend(); ++it) {
+ if (chars[static_cast<ui8>(*it)]) {
+ input.erase(input.crend() - it - 1, 1);
+ return sink(TBlockItem(input));
+ }
+ }
+ sink(arg1);
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do)
+
+
+ // NOTE: String::Find is marked as deprecated, so block implementation is
+ // not required for them. Hence, only the scalar one is provided.
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const TString haystack(args[0].AsStringRef());
+ const TString needle(args[1].AsStringRef());
+ const ui64 pos = args[2].GetOrDefault<ui64>(0);
+ return TUnboxedValuePod(haystack.find(needle, pos));
+ }
+
+ // NOTE: String::ReverseFind is marked as deprecated, so block
+ // implementation is not required for them. Hence, only the scalar one is
+ // provided.
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) {
+ Y_UNUSED(valueBuilder);
+ const TString haystack(args[0].AsStringRef());
+ const TString needle(args[1].AsStringRef());
+ const ui64 pos = args[2].GetOrDefault<ui64>(TString::npos);
+ return TUnboxedValuePod(haystack.rfind(needle, pos));
+ }
+
+ // NOTE: String::Substring is marked as deprecated, so block implementation
+ // is not required for them. Hence, only the scalar one is provided.
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) {
+ const TString input(args[0].AsStringRef());
+ const ui64 from = args[1].GetOrDefault<ui64>(0);
+ const ui64 count = args[2].GetOrDefault<ui64>(TString::npos);
+ return valueBuilder->NewString(input.substr(from, count));
+ }
+
+ using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>;
+
+ template <typename TIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const std::string_view::const_iterator from,
+ const TIt& it,
+ TTmpVector& result) {
+ for (const auto& elem : it) {
+ result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim())));
+ }
+ }
+ template <typename TIt>
+ static void SplitToListImpl(
+ const IValueBuilder* valueBuilder,
+ const TUnboxedValue& input,
+ const std::string_view::const_iterator from,
+ TIt& it,
+ bool skipEmpty,
+ TTmpVector& result) {
+ if (skipEmpty) {
+ SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result);
+ } else {
+ SplitToListImpl(valueBuilder, input, from, it, result);
+ }
+ }
+
+ constexpr char delimeterStringName[] = "DelimeterString";
+ constexpr char skipEmptyName[] = "SkipEmpty";
+ constexpr char limitName[] = "Limit";
+ using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>;
+ using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>;
+ using TLimitArg = TNamedArg<ui64, limitName>;
+
+
+ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>(
+ TOptional<char*>,
+ char*,
+ TDelimeterStringArg,
+ TSkipEmptyArg,
+ TLimitArg
+ ),
+ 3) {
+ TTmpVector result;
+ if (args[0]) {
+ const std::string_view input(args[0].AsStringRef());
+ const std::string_view delimeter(args[1].AsStringRef());
+ const bool delimiterString = args[2].GetOrDefault<bool>(true);
+ const bool skipEmpty = args[3].GetOrDefault<bool>(false);
+ const auto limit = args[4].GetOrDefault<ui64>(0);
+ if (delimiterString) {
+ if (limit) {
+ auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ } else {
+ auto it = StringSplitter(input).SplitByString(delimeter);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ }
+ } else {
+ if (limit) {
+ auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1);
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ } else {
+ auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str());
+ SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result);
+ }
+ }
+ }
+ return valueBuilder->NewList(result.data(), result.size());
+ }
+
+ SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) {
+ auto input = args[0].GetListIterator();
+ const TString delimeter(args[1].AsStringRef());
+ TVector<TString> items;
+
+ for (TUnboxedValue current; input.Next(current);) {
+ if (current) {
+ TString item(current.AsStringRef());
+ items.push_back(std::move(item));
+ }
+ }
+
+ return valueBuilder->NewString(JoinSeq(delimeter, items));
+ }
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) {
+ Y_UNUSED(valueBuilder);
+ const TStringBuf left(args[0].AsStringRef());
+ const TStringBuf right(args[1].AsStringRef());
+ const ui64 result = NLevenshtein::Distance(left, right);
+ return TUnboxedValuePod(result);
+ }
+
+ struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ const std::string_view left(arg1.AsStringRef());
+ const std::string_view right(arg2.AsStringRef());
+ const ui64 result = NLevenshtein::Distance(left, right);
+ sink(TBlockItem(result));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do);
+
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) {
+ TStringStream result;
+ result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>()));
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size()));
+ }
+
+ struct THumanReadableDurationKernelExec
+ : public TUnaryKernelExec<THumanReadableDurationKernelExec>
+ {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) {
+ TStringStream result;
+ result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>()));
+ sink(TBlockItem(TStringRef(result.Data(), result.Size())));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do)
+
+
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) {
+ TStringStream result;
+ result << Prec(args[0].Get<double>(), args[1].Get<ui64>());
+ return valueBuilder->NewString(TStringRef(result.Data(), result.Size()));
+ }
+
+ struct TPrecKernelExec : public TBinaryKernelExec<TPrecKernelExec> {
+ template <typename TSink>
+ static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
+ TStringStream result;
+ result << Prec(arg1.Get<double>(), arg2.Get<ui64>());
+ sink(TBlockItem(TStringRef(result.Data(), result.Size())));
+ }
+ };
+
+ END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do)
+
+
+ SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) {
+ const TStringBuf input(args[0].AsStringRef());
+ TUnboxedValue* items = nullptr;
+ TUnboxedValue result = valueBuilder->NewArray(input.size(), items);
+ for (const unsigned char c : input) {
+ *items++ = TUnboxedValuePod(c);
+ }
+ return result;
+ }
+
+ SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) {
+ auto input = args[0];
+
+ if (auto elems = input.GetElements()) {
+ const auto elemCount = input.GetListLength();
+ TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength());
+ auto bufferPtr = result.AsStringRef().Data();
+ for (ui64 i = 0; i != elemCount; ++i) {
+ *(bufferPtr++) = elems[i].Get<ui8>();
+ }
+ return result;
+ }
+
+ std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer;
+ buffer.reserve(TUnboxedValuePod::InternalBufferSize);
+
+ const auto& iter = input.GetListIterator();
+ for (NUdf::TUnboxedValue item; iter.Next(item); ) {
+ buffer.push_back(item.Get<ui8>());
+ }
+
+ return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size()));
+ }
+
+#define STRING_REGISTER_UDF(udfName, ...) T##udfName,
+
+ STRING_UDF_MAP(STRING_UDF)
+ STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF)
+ STROKA_UDF_MAP(STROKA_UDF)
+ STROKA_CASE_UDF_MAP(STROKA_CASE_UDF)
+ STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF)
+ STROKA_FIND_UDF_MAP(STROKA_FIND_UDF)
+ STRING_TWO_ARGS_UDF_MAP(STRING_TWO_ARGS_UDF)
+ IS_ASCII_UDF_MAP(IS_ASCII_UDF)
+
+ static constexpr ui64 padLim = 1000000;
+ STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF)
+ STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF)
+ STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF)
+ STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF)
+
+ SIMPLE_MODULE(TStringModule,
+ STRING_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF)
+ STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_TWO_ARGS_UDF_MAP(STRING_REGISTER_UDF)
+ IS_ASCII_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
+ TCollapseText,
+ TReplaceAll,
+ TReplaceFirst,
+ TReplaceLast,
+ TRemoveAll,
+ TRemoveFirst,
+ TRemoveLast,
+ TContains,
+ TFind,
+ TReverseFind,
+ TSubstring,
+ TSplitToList,
+ TJoinFromList,
+ TLevensteinDistance,
+ THumanReadableDuration,
+ TPrec,
+ TToByteList,
+ TFromByteList)
+}
+
+REGISTER_MODULES(TStringModule)
diff --git a/yql/essentials/udfs/common/string/test/canondata/result.json b/yql/essentials/udfs/common/string/test/canondata/result.json
new file mode 100644
index 00000000000..f9e3a670c2c
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/result.json
@@ -0,0 +1,112 @@
+{
+ "test.test[AsciiChecks]": [
+ {
+ "uri": "file://test.test_AsciiChecks_/results.txt"
+ }
+ ],
+ "test.test[Base32Decode]": [
+ {
+ "uri": "file://test.test_Base32Decode_/results.txt"
+ }
+ ],
+ "test.test[Base32Encode]": [
+ {
+ "uri": "file://test.test_Base32Encode_/results.txt"
+ }
+ ],
+ "test.test[BlockAsciiChecks]": [
+ {
+ "uri": "file://test.test_BlockAsciiChecks_/results.txt"
+ }
+ ],
+ "test.test[BlockFind]": [
+ {
+ "uri": "file://test.test_BlockFind_/results.txt"
+ }
+ ],
+ "test.test[BlockRemove]": [
+ {
+ "uri": "file://test.test_BlockRemove_/results.txt"
+ }
+ ],
+ "test.test[BlockReplace]": [
+ {
+ "uri": "file://test.test_BlockReplace_/results.txt"
+ }
+ ],
+ "test.test[BlockStreamFormat]": [
+ {
+ "uri": "file://test.test_BlockStreamFormat_/results.txt"
+ }
+ ],
+ "test.test[BlockStringUDF]": [
+ {
+ "uri": "file://test.test_BlockStringUDF_/results.txt"
+ }
+ ],
+ "test.test[BlockStringUnsafeUDF]": [
+ {
+ "uri": "file://test.test_BlockStringUnsafeUDF_/results.txt"
+ }
+ ],
+ "test.test[BlockTo]": [
+ {
+ "uri": "file://test.test_BlockTo_/results.txt"
+ }
+ ],
+ "test.test[ExtendAndTake]": [
+ {
+ "uri": "file://test.test_ExtendAndTake_/results.txt"
+ }
+ ],
+ "test.test[Find]": [
+ {
+ "uri": "file://test.test_Find_/results.txt"
+ }
+ ],
+ "test.test[List]": [
+ {
+ "uri": "file://test.test_List_/results.txt"
+ }
+ ],
+ "test.test[List_v0]": [
+ {
+ "uri": "file://test.test_List_v0_/results.txt"
+ }
+ ],
+ "test.test[Remove]": [
+ {
+ "uri": "file://test.test_Remove_/results.txt"
+ }
+ ],
+ "test.test[ReplaceFirstLast]": [
+ {
+ "uri": "file://test.test_ReplaceFirstLast_/results.txt"
+ }
+ ],
+ "test.test[Replace]": [
+ {
+ "uri": "file://test.test_Replace_/results.txt"
+ }
+ ],
+ "test.test[StreamFormat]": [
+ {
+ "uri": "file://test.test_StreamFormat_/results.txt"
+ }
+ ],
+ "test.test[StringUDF]": [
+ {
+ "uri": "file://test.test_StringUDF_/results.txt"
+ }
+ ],
+ "test.test[StringUnsafeUDF]": [
+ {
+ "uri": "file://test.test_StringUnsafeUDF_/results.txt"
+ }
+ ],
+ "test.test[To]": [
+ {
+ "uri": "file://test.test_To_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt
new file mode 100644
index 00000000000..944b17d4c1e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "isascii";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isspace";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isupper";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "islower";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isdigit";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalpha";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalnum";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "ishex";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt
new file mode 100644
index 00000000000..bf4aa56fa93
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt
@@ -0,0 +1,79 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "strict_decoded";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "decoded";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "ORSXG5A=";
+ [
+ "test"
+ ];
+ [
+ "test"
+ ]
+ ];
+ [
+ "KRSXG5CUMVZXI===";
+ [
+ "TestTest"
+ ];
+ [
+ "TestTest"
+ ]
+ ];
+ [
+ "MFYHA3DF";
+ [
+ "apple"
+ ];
+ [
+ "apple"
+ ]
+ ];
+ [
+ "hmmmm===hmmmm";
+ #;
+ [
+ "\0\0\0"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt
new file mode 100644
index 00000000000..51c74759fc7
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt
@@ -0,0 +1,44 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "encoded";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "ORSXG5A="
+ ];
+ [
+ "TestTest";
+ "KRSXG5CUMVZXI==="
+ ];
+ [
+ "apple";
+ "MFYHA3DF"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt
new file mode 100644
index 00000000000..944b17d4c1e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt
@@ -0,0 +1,124 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "isascii";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isspace";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isupper";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "islower";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isdigit";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalpha";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "isalnum";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "ishex";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true;
+ %false
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %true;
+ %false;
+ %true;
+ %true
+ ];
+ [
+ %true;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ %false
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
new file mode 100644
index 00000000000..f6374e682e5
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt
@@ -0,0 +1,69 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "contains";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "levenstein";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ %false;
+ "3"
+ ];
+ [
+ "aswedfg";
+ %true;
+ "5"
+ ];
+ [
+ "asdadsaasd";
+ %true;
+ "8"
+ ];
+ [
+ "gdsfsassas";
+ %true;
+ "8"
+ ];
+ [
+ "";
+ %false;
+ "2"
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ %false;
+ "23"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt
new file mode 100644
index 00000000000..6fbf37a9f9b
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt
@@ -0,0 +1,173 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwruall";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrufirst";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrulast";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fd";
+ "fds";
+ "fds";
+ "fda";
+ "fds";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa"
+ ];
+ [
+ "aswedfg";
+ "wedfg";
+ "swedfg";
+ "swedfg";
+ "swedfg";
+ "awedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg"
+ ];
+ [
+ "asdadsaasd";
+ "ddd";
+ "sdadsaasd";
+ "asdadsasd";
+ "sdadsaasd";
+ "asdadsaad";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdf";
+ "gdsfsssas";
+ "gdsfsasss";
+ "gdfsassas";
+ "gdsfsassa";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt
new file mode 100644
index 00000000000..2ac3566c61d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt
@@ -0,0 +1,134 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fdsa";
+ "fdsz";
+ "fdsz";
+ "fdszz";
+ "fdszz";
+ "fds";
+ "fds"
+ ];
+ [
+ "aswedfg";
+ "zzzwedfg";
+ "zswedfg";
+ "zswedfg";
+ "zzswedfg";
+ "zzswedfg";
+ "swedfg";
+ "swedfg"
+ ];
+ [
+ "asdadsaasd";
+ "zzzdadsazzzd";
+ "zsdadsaasd";
+ "asdadsazsd";
+ "zzsdadsaasd";
+ "asdadsazzsd";
+ "sdadsaasd";
+ "asdadsasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdsfszzzszzz";
+ "gdsfszssas";
+ "gdsfsasszs";
+ "gdsfszzssas";
+ "gdsfsasszzs";
+ "gdsfsssas";
+ "gdsfsasss"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt
new file mode 100644
index 00000000000..b1bff8a57b8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt
@@ -0,0 +1,208 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "shex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sbin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "duration";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "quantity";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bytes";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "prec";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "qwertyui";
+ "qwertyui ";
+ " qwertyui";
+ "qwertyui000000000000";
+ "000000000000qwertyui";
+ "0x00000000499602D2";
+ "-0x000000000000007B";
+ "0b0000000000000000000000000000000001001001100101100000001011010010";
+ "-0b0000000000000000000000000000000000000000000000000000000001111011";
+ "71 77 65 72 74 79 75 69";
+ "01110001 01110111 01100101 01110010 01110100 01111001 01110101 01101001";
+ "20m 34s";
+ "1.23G";
+ "1.15GiB";
+ "-0.009963"
+ ];
+ [
+ "asdfghjl";
+ "asdfghjl ";
+ " asdfghjl";
+ "asdfghjl000000000000";
+ "000000000000asdfghjl";
+ "0x000000024CB016EA";
+ "-0x00000000000001C8";
+ "0b0000000000000000000000000000001001001100101100000001011011101010";
+ "-0b0000000000000000000000000000000000000000000000000000000111001000";
+ "61 73 64 66 67 68 6A 6C";
+ "01100001 01110011 01100100 01100110 01100111 01101000 01101010 01101100";
+ "2h 44m 36s";
+ "9.88G";
+ "9.2GiB";
+ "-0.03694"
+ ];
+ [
+ "zxcvbnm?";
+ "zxcvbnm? ";
+ " zxcvbnm?";
+ "zxcvbnm?000000000000";
+ "000000000000zxcvbnm?";
+ "0x00000002540BE3FF";
+ "-0x0000000000000315";
+ "0b0000000000000000000000000000001001010100000010111110001111111111";
+ "-0b0000000000000000000000000000000000000000000000000000001100010101";
+ "7A 78 63 76 62 6E 6D 3F";
+ "01111010 01111000 01100011 01110110 01100010 01101110 01101101 00111111";
+ "2h 46m 40s";
+ "10G";
+ "9.31GiB";
+ "-0.06391"
+ ];
+ [
+ "12345678";
+ "12345678 ";
+ " 12345678";
+ "12345678000000000000";
+ "00000000000012345678";
+ "0x0000000000000000";
+ "0x0000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "31 32 33 34 35 36 37 38";
+ "00110001 00110010 00110011 00110100 00110101 00110110 00110111 00111000";
+ "0us";
+ "0";
+ "0B";
+ "0"
+ ];
+ [
+ "!@#$%^&*";
+ "!@#$%^&* ";
+ " !@#$%^&*";
+ "!@#$%^&*000000000000";
+ "000000000000!@#$%^&*";
+ "0x0000000223557439";
+ "-0x00000000000003E7";
+ "0b0000000000000000000000000000001000100011010101010111010000111001";
+ "-0b0000000000000000000000000000000000000000000000000000001111100111";
+ "21 40 23 24 25 5E 26 2A";
+ "00100001 01000000 00100011 00100100 00100101 01011110 00100110 00101010";
+ "2h 33m 2s";
+ "9.18G";
+ "8.55GiB";
+ "-0.08092"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt
new file mode 100644
index 00000000000..a665105224f
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt
@@ -0,0 +1,169 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64encu";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "xenc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "henc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hdec";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clps";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "strp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clpst";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI===";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ=";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ,";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "202020217177652072747920202075696F70205B205D24";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "+++!qwe+rty+++uiop+%5B+%5D$";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "!qwe rty uiop [ ]$";
+ "!qwe ..."
+ ];
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA==";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,";
+ "@as dfgh jkl\\\\n;'% ";
+ "@as dfgh jkl\n;'% ";
+ "4061732020202020202064666768206A6B6C5C6E3B27252020";
+ "@as dfgh jkl\\n;&#39;% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as+++++++dfgh+jkl%5Cn;%27%25++";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'%";
+ "@as ..."
+ ];
+ [
+ "EAQCAI32PBRQS5TCNYQASCQIEBWSYLRPH5PCAIBA";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8/XiAgIA==";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8_XiAgIA,,";
+ " #zxc\\tvbn \\t\\n\\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "202020237A78630976626E20090A08206D2C2E2F3F5E202020";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "+++%23zxc%09vbn+%09%0A%08+m%2C./%3F%5E+++";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc vbn \x08 m,./?^ ";
+ "#zxc\tvbn \t\n\x08 m,./?^";
+ "#zxc ..."
+ ];
+ [
+ "GEQTEQBTEM2CINJFGZPDOJRYFI4SQMBJFVPT2KZMHQXD4===";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg==";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg,,";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "31213240332334243525365E3726382A392830292D5F3D2B2C3C2E3E";
+ "1!2@3#4$5%6^7&amp;8*9(0)-_=+,&lt;.&gt;";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3%234$5%256%5E7%268*9%280%29-_%3D%2B%2C%3C.%3E";
+ "1!2@3#4$5%6^7&8*9(0)-_= ,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@ ..."
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt
new file mode 100644
index 00000000000..26b182f9343
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt
@@ -0,0 +1,158 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b32sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "xdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOS"
+ ]
+ ];
+ #;
+ #
+ ];
+ [
+ [
+ [
+ "QIAEXLvMggAcAECCAFgAQUALyg=="
+ ]
+ ];
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "0DQNA0D4P/93QP6/z4NA0DQP98Dxfg0DodA6PQ=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ]
+ ];
+ [
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "gYoECABAgAQaIM6AAAAAubn0goBAAA=="
+ ]
+ ];
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "4DwP70DQNA0DQNA0D3Pe9/wNA8DwfC6LxNh1/XdA0A=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt
new file mode 100644
index 00000000000..143cfb76417
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt
@@ -0,0 +1,88 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_lower";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_upper";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_title";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "test";
+ "TEST";
+ "Test"
+ ];
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "TeSt";
+ "test";
+ "TEST";
+ "Test"
+ ];
+ [
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"
+ ];
+ [
+ "Eyl\xC3\xBCl";
+ "eyl\xC3\xBCl";
+ "EYL\xC3\xBCL";
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "6";
+ "6";
+ "6";
+ "6"
+ ];
+ [
+ "";
+ "";
+ "";
+ ""
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt
new file mode 100644
index 00000000000..81269c68153
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt
@@ -0,0 +1,60 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "b"
+ ]
+ ];
+ [
+ [
+ "d"
+ ];
+ [
+ "d"
+ ]
+ ];
+ [
+ [];
+ #
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
new file mode 100644
index 00000000000..cec53212501
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt
@@ -0,0 +1,147 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "contains";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "prefix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "starts";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "suffix";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "ends";
+ [
+ "DataType";
+ "Bool"
+ ]
+ ];
+ [
+ "find";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "rfind";
+ [
+ "DataType";
+ "Int64"
+ ]
+ ];
+ [
+ "levenstein";
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ "-1";
+ "-1";
+ "3"
+ ];
+ [
+ "aswedfg";
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ "0";
+ "0";
+ "5"
+ ];
+ [
+ "asdadsaasd";
+ %true;
+ %true;
+ %true;
+ %false;
+ %false;
+ "0";
+ "7";
+ "8"
+ ];
+ [
+ "gdsfsassas";
+ %true;
+ %false;
+ %false;
+ %true;
+ %true;
+ "5";
+ "8";
+ "8"
+ ];
+ [
+ "";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ "-1";
+ "-1";
+ "2"
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ %false;
+ %false;
+ %false;
+ %false;
+ %false;
+ "-1";
+ "-1";
+ "23"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt
new file mode 100644
index 00000000000..dac9a135756
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt
@@ -0,0 +1,265 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "equals_to_original";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "replace_delimeter";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "just_split";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "first";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "skip_empty";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "multichar_delim_set";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "multichar_delim_string";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "limited";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "a@b@c";
+ "a@b@c";
+ "a#b#c";
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "a"
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "a";
+ "";
+ "";
+ "c"
+ ];
+ [
+ "a@";
+ "c"
+ ];
+ [
+ "a";
+ "b@c"
+ ]
+ ];
+ [
+ "@a@b@c";
+ "@a@b@c";
+ "#a#b#c";
+ [
+ "";
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ ""
+ ];
+ [
+ "a";
+ "b";
+ "c"
+ ];
+ [
+ "";
+ "a";
+ "";
+ "";
+ "c"
+ ];
+ [
+ "@a@";
+ "c"
+ ];
+ [
+ "";
+ "a@b@c"
+ ]
+ ];
+ [
+ "@@@a@a";
+ "@@@a@a";
+ "###a#a";
+ [
+ "";
+ "";
+ "";
+ "a";
+ "a"
+ ];
+ [
+ ""
+ ];
+ [
+ "a";
+ "a"
+ ];
+ [
+ "";
+ "";
+ "";
+ "a";
+ "a"
+ ];
+ [
+ "@@@a@a"
+ ];
+ [
+ "";
+ "@@a@a"
+ ]
+ ];
+ [
+ "d#e#f";
+ "d#e#f";
+ "d#e#f";
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ];
+ [
+ "d#e#f"
+ ]
+ ];
+ [
+ "d";
+ "d";
+ "d";
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ];
+ [
+ "d"
+ ]
+ ];
+ [
+ "";
+ "";
+ "";
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt
new file mode 100644
index 00000000000..b149ad38a60
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt
@@ -0,0 +1,125 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "not_equals_to_original";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "not_equals_to_original_skip_empty";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "equals_to_original";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "multichar";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "a@b@c";
+ #;
+ #;
+ "a@b@c";
+ [
+ "a";
+ "b";
+ "c"
+ ]
+ ];
+ [
+ "@a@b@c";
+ #;
+ #;
+ "@a@b@c";
+ [
+ "a";
+ "b";
+ "c"
+ ]
+ ];
+ [
+ "@@@a@a";
+ [
+ "@@@a@a"
+ ];
+ [
+ "@@@a@a"
+ ];
+ "@@@a@a";
+ [
+ "a";
+ "a"
+ ]
+ ];
+ [
+ "d#e#f";
+ #;
+ #;
+ "d#e#f";
+ [
+ "d";
+ "e";
+ "f"
+ ]
+ ];
+ [
+ "d";
+ #;
+ #;
+ "d";
+ [
+ "d"
+ ]
+ ];
+ [
+ "";
+ #;
+ #;
+ "";
+ []
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt
new file mode 100644
index 00000000000..6fbf37a9f9b
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt
@@ -0,0 +1,173 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwruall";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrufirst";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hwrulast";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fd";
+ "fds";
+ "fds";
+ "fda";
+ "fds";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa";
+ "fdsa"
+ ];
+ [
+ "aswedfg";
+ "wedfg";
+ "swedfg";
+ "swedfg";
+ "swedfg";
+ "awedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg";
+ "aswedfg"
+ ];
+ [
+ "asdadsaasd";
+ "ddd";
+ "sdadsaasd";
+ "asdadsasd";
+ "sdadsaasd";
+ "asdadsaad";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd";
+ "asdadsaasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdf";
+ "gdsfsssas";
+ "gdsfsasss";
+ "gdfsassas";
+ "gdsfsassa";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas";
+ "gdsfsassas"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
+ "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt
new file mode 100644
index 00000000000..9320ac1c18a
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt
@@ -0,0 +1,84 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column4";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column5";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column6";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column7";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "gzas";
+ "gzzzsas";
+ "gsas";
+ "gasas";
+ "gasz";
+ "gaszzzs";
+ "gass";
+ "gasas"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt
new file mode 100644
index 00000000000..2ac3566c61d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt
@@ -0,0 +1,134 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "all";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last2";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "first3";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "last3";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "fdsa";
+ "fdsa";
+ "fdsz";
+ "fdsz";
+ "fdszz";
+ "fdszz";
+ "fds";
+ "fds"
+ ];
+ [
+ "aswedfg";
+ "zzzwedfg";
+ "zswedfg";
+ "zswedfg";
+ "zzswedfg";
+ "zzswedfg";
+ "swedfg";
+ "swedfg"
+ ];
+ [
+ "asdadsaasd";
+ "zzzdadsazzzd";
+ "zsdadsaasd";
+ "asdadsazsd";
+ "zzsdadsaasd";
+ "asdadsazzsd";
+ "sdadsaasd";
+ "asdadsasd"
+ ];
+ [
+ "gdsfsassas";
+ "gdsfszzzszzz";
+ "gdsfszssas";
+ "gdsfsasszs";
+ "gdsfszzssas";
+ "gdsfsasszzs";
+ "gdsfsssas";
+ "gdsfsasss"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ "";
+ ""
+ ];
+ [
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
+ "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt
new file mode 100644
index 00000000000..b1bff8a57b8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt
@@ -0,0 +1,208 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "right_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "left_pad_zero";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "shex";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "sbin";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hex_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bin_text";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "duration";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "quantity";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "bytes";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "prec";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "qwertyui";
+ "qwertyui ";
+ " qwertyui";
+ "qwertyui000000000000";
+ "000000000000qwertyui";
+ "0x00000000499602D2";
+ "-0x000000000000007B";
+ "0b0000000000000000000000000000000001001001100101100000001011010010";
+ "-0b0000000000000000000000000000000000000000000000000000000001111011";
+ "71 77 65 72 74 79 75 69";
+ "01110001 01110111 01100101 01110010 01110100 01111001 01110101 01101001";
+ "20m 34s";
+ "1.23G";
+ "1.15GiB";
+ "-0.009963"
+ ];
+ [
+ "asdfghjl";
+ "asdfghjl ";
+ " asdfghjl";
+ "asdfghjl000000000000";
+ "000000000000asdfghjl";
+ "0x000000024CB016EA";
+ "-0x00000000000001C8";
+ "0b0000000000000000000000000000001001001100101100000001011011101010";
+ "-0b0000000000000000000000000000000000000000000000000000000111001000";
+ "61 73 64 66 67 68 6A 6C";
+ "01100001 01110011 01100100 01100110 01100111 01101000 01101010 01101100";
+ "2h 44m 36s";
+ "9.88G";
+ "9.2GiB";
+ "-0.03694"
+ ];
+ [
+ "zxcvbnm?";
+ "zxcvbnm? ";
+ " zxcvbnm?";
+ "zxcvbnm?000000000000";
+ "000000000000zxcvbnm?";
+ "0x00000002540BE3FF";
+ "-0x0000000000000315";
+ "0b0000000000000000000000000000001001010100000010111110001111111111";
+ "-0b0000000000000000000000000000000000000000000000000000001100010101";
+ "7A 78 63 76 62 6E 6D 3F";
+ "01111010 01111000 01100011 01110110 01100010 01101110 01101101 00111111";
+ "2h 46m 40s";
+ "10G";
+ "9.31GiB";
+ "-0.06391"
+ ];
+ [
+ "12345678";
+ "12345678 ";
+ " 12345678";
+ "12345678000000000000";
+ "00000000000012345678";
+ "0x0000000000000000";
+ "0x0000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "0b0000000000000000000000000000000000000000000000000000000000000000";
+ "31 32 33 34 35 36 37 38";
+ "00110001 00110010 00110011 00110100 00110101 00110110 00110111 00111000";
+ "0us";
+ "0";
+ "0B";
+ "0"
+ ];
+ [
+ "!@#$%^&*";
+ "!@#$%^&* ";
+ " !@#$%^&*";
+ "!@#$%^&*000000000000";
+ "000000000000!@#$%^&*";
+ "0x0000000223557439";
+ "-0x00000000000003E7";
+ "0b0000000000000000000000000000001000100011010101010111010000111001";
+ "-0b0000000000000000000000000000000000000000000000000000001111100111";
+ "21 40 23 24 25 5E 26 2A";
+ "00100001 01000000 00100011 00100100 00100101 01011110 00100110 00101010";
+ "2h 33m 2s";
+ "9.18G";
+ "8.55GiB";
+ "-0.08092"
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt
new file mode 100644
index 00000000000..a665105224f
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt
@@ -0,0 +1,169 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64enc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "b64encu";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "xenc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "henc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "hdec";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "cgunesc";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clps";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "strp";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "clpst";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI===";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ=";
+ "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ,";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "202020217177652072747920202075696F70205B205D24";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "+++!qwe+rty+++uiop+%5B+%5D$";
+ " !qwe rty uiop [ ]$";
+ " !qwe rty uiop [ ]$";
+ "!qwe rty uiop [ ]$";
+ "!qwe ..."
+ ];
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA==";
+ "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,";
+ "@as dfgh jkl\\\\n;'% ";
+ "@as dfgh jkl\n;'% ";
+ "4061732020202020202064666768206A6B6C5C6E3B27252020";
+ "@as dfgh jkl\\n;&#39;% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as+++++++dfgh+jkl%5Cn;%27%25++";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'% ";
+ "@as dfgh jkl\\n;'%";
+ "@as ..."
+ ];
+ [
+ "EAQCAI32PBRQS5TCNYQASCQIEBWSYLRPH5PCAIBA";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8/XiAgIA==";
+ "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8_XiAgIA,,";
+ " #zxc\\tvbn \\t\\n\\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "202020237A78630976626E20090A08206D2C2E2F3F5E202020";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ "+++%23zxc%09vbn+%09%0A%08+m%2C./%3F%5E+++";
+ " #zxc\tvbn \t\n\x08 m,./?^ ";
+ " #zxc vbn \x08 m,./?^ ";
+ "#zxc\tvbn \t\n\x08 m,./?^";
+ "#zxc ..."
+ ];
+ [
+ "GEQTEQBTEM2CINJFGZPDOJRYFI4SQMBJFVPT2KZMHQXD4===";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg==";
+ "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg,,";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "31213240332334243525365E3726382A392830292D5F3D2B2C3C2E3E";
+ "1!2@3#4$5%6^7&amp;8*9(0)-_=+,&lt;.&gt;";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3%234$5%256%5E7%268*9%280%29-_%3D%2B%2C%3C.%3E";
+ "1!2@3#4$5%6^7&8*9(0)-_= ,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@3#4$5%6^7&8*9(0)-_=+,<.>";
+ "1!2@ ..."
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt
new file mode 100644
index 00000000000..26b182f9343
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt
@@ -0,0 +1,158 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "b32dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b32sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64dec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "b64sdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "xdec";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ [
+ "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOS"
+ ]
+ ];
+ #;
+ #
+ ];
+ [
+ [
+ [
+ "QIAEXLvMggAcAECCAFgAQUALyg=="
+ ]
+ ];
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ [
+ " !qwe rty uiop [ ]$"
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "0DQNA0D4P/93QP6/z4NA0DQP98Dxfg0DodA6PQ=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ " !qwe rty uiop [ ]$"
+ ]
+ ];
+ [
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ [
+ [
+ "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"
+ ]
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "gYoECABAgAQaIM6AAAAAubn0goBAAA=="
+ ]
+ ];
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ [
+ "@as dfgh jkl\\n;'% "
+ ];
+ #
+ ];
+ [
+ [
+ [
+ "4DwP70DQNA0DQNA0D3Pe9/wNA8DwfC6LxNh1/XdA0A=="
+ ]
+ ];
+ #;
+ #;
+ #;
+ [
+ "@as dfgh jkl\\n;'% "
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt
new file mode 100644
index 00000000000..441e62fd21b
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt
@@ -0,0 +1,294 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_lower";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_upper";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "ascii_title";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "lower";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "upper";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "title";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "reverse";
+ [
+ "OptionalType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "byte_list";
+ [
+ "ListType";
+ [
+ "DataType";
+ "Uint8"
+ ]
+ ]
+ ];
+ [
+ "from_byte_list";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "from_lazy_byte_list";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "test";
+ "test";
+ "TEST";
+ "Test";
+ [
+ "test"
+ ];
+ [
+ "TEST"
+ ];
+ [
+ "Test"
+ ];
+ [
+ "tset"
+ ];
+ [
+ "116";
+ "101";
+ "115";
+ "116"
+ ];
+ "test";
+ "test"
+ ];
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"
+ ];
+ [
+ "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD1\x82\xD1\x81\xD0\xB5\xD1\x82"
+ ];
+ [
+ "209";
+ "130";
+ "208";
+ "181";
+ "209";
+ "129";
+ "209";
+ "130"
+ ];
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "TeSt";
+ "test";
+ "TEST";
+ "Test";
+ [
+ "test"
+ ];
+ [
+ "TEST"
+ ];
+ [
+ "Test"
+ ];
+ [
+ "tSeT"
+ ];
+ [
+ "84";
+ "101";
+ "83";
+ "116"
+ ];
+ "TeSt";
+ "TeSt"
+ ];
+ [
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ [
+ "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"
+ ];
+ [
+ "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"
+ ];
+ [
+ "\xD0\xA2\xD1\x81\xD0\x95\xD1\x82"
+ ];
+ [
+ "209";
+ "130";
+ "208";
+ "149";
+ "209";
+ "129";
+ "208";
+ "162"
+ ];
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2";
+ "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"
+ ];
+ [
+ "Eyl\xC3\xBCl";
+ "eyl\xC3\xBCl";
+ "EYL\xC3\xBCL";
+ "Eyl\xC3\xBCl";
+ [
+ "eyl\xC3\xBCl"
+ ];
+ [
+ "EYL\xC3\x9CL"
+ ];
+ [
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "l\xC3\xBClyE"
+ ];
+ [
+ "69";
+ "121";
+ "108";
+ "195";
+ "188";
+ "108"
+ ];
+ "Eyl\xC3\xBCl";
+ "Eyl\xC3\xBCl"
+ ];
+ [
+ "6";
+ "6";
+ "6";
+ "6";
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "6"
+ ];
+ [
+ "54"
+ ];
+ "6";
+ "6"
+ ];
+ [
+ "";
+ "";
+ "";
+ "";
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [
+ ""
+ ];
+ [];
+ "";
+ ""
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in
new file mode 100644
index 00000000000..26a46b0f6c6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in
@@ -0,0 +1,5 @@
+{"value"="qweRTY123$%?"};
+{"value"="asdFGHjkl:'|"};
+{"value"="zxcvbnm"};
+{"value"="1234567890"};
+{"value"="!@#$%^&*()_+{}"};
diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql
new file mode 100644
index 00000000000..f6e74d87462
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql
@@ -0,0 +1,10 @@
+SELECT
+ String::IsAscii(value) as isascii,
+ String::IsAsciiSpace(value) as isspace,
+ String::IsAsciiUpper(value) as isupper,
+ String::IsAsciiLower(value) as islower,
+ String::IsAsciiDigit(value) as isdigit,
+ String::IsAsciiAlpha(value) as isalpha,
+ String::IsAsciiAlnum(value) as isalnum,
+ String::IsAsciiHex(value) as ishex
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Decode.in b/yql/essentials/udfs/common/string/test/cases/Base32Decode.in
new file mode 100644
index 00000000000..34af8b23d47
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Decode.in
@@ -0,0 +1,4 @@
+{"key"="1";subkey="";"value"="ORSXG5A="};
+{"key"="2";subkey="";"value"="KRSXG5CUMVZXI==="};
+{"key"="3";subkey="";"value"="MFYHA3DF"};
+{"key"="4";subkey="";"value"="hmmmm===hmmmm"};
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql b/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql
new file mode 100644
index 00000000000..51b47ec1665
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql
@@ -0,0 +1,6 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::Base32StrictDecode(value) AS strict_decoded,
+ String::Base32Decode(value) AS decoded
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Encode.in b/yql/essentials/udfs/common/string/test/cases/Base32Encode.in
new file mode 100644
index 00000000000..c0051d04efd
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Encode.in
@@ -0,0 +1,3 @@
+{"key"="1";subkey="";"value"="test"};
+{"key"="2";subkey="";"value"="TestTest"};
+{"key"="3";subkey="";"value"="apple"};
diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql b/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql
new file mode 100644
index 00000000000..1ff9e3e4078
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql
@@ -0,0 +1,5 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::Base32Encode(value) AS encoded
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in
new file mode 100644
index 00000000000..26a46b0f6c6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in
@@ -0,0 +1,5 @@
+{"value"="qweRTY123$%?"};
+{"value"="asdFGHjkl:'|"};
+{"value"="zxcvbnm"};
+{"value"="1234567890"};
+{"value"="!@#$%^&*()_+{}"};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql
new file mode 100644
index 00000000000..d8bf9e942be
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql
@@ -0,0 +1,13 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ String::IsAscii(value) as isascii,
+ String::IsAsciiSpace(value) as isspace,
+ String::IsAsciiUpper(value) as isupper,
+ String::IsAsciiLower(value) as islower,
+ String::IsAsciiDigit(value) as isdigit,
+ String::IsAsciiAlpha(value) as isalpha,
+ String::IsAsciiAlnum(value) as isalnum,
+ String::IsAsciiHex(value) as ishex
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
new file mode 100644
index 00000000000..f1c855bcc11
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql
@@ -0,0 +1,7 @@
+/* syntax version 1 */
+pragma UseBlocks;
+SELECT
+ value,
+ String::Contains(value, "as") AS contains,
+ String::LevensteinDistance(value, "as") AS levenstein
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql b/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql
new file mode 100644
index 00000000000..4c285b78d07
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql
@@ -0,0 +1,16 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::RemoveAll(value, "as") AS all,
+ String::RemoveFirst(value, "a") AS first,
+ String::RemoveLast(value, "a") AS last,
+ String::RemoveFirst(value, "as") AS first2,
+ String::RemoveLast(value, "as") AS last2,
+ String::RemoveFirst(value, "") AS first3,
+ String::RemoveLast(value, "") AS last3,
+ String::RemoveAll(value, "`") AS hwruall,
+ String::RemoveFirst(value, "`") AS hwrufirst,
+ String::RemoveLast(value, "`") AS hwrulast,
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql b/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql
new file mode 100644
index 00000000000..030e36050cd
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql
@@ -0,0 +1,13 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::ReplaceAll(value, "as", "zzz") AS all,
+ String::ReplaceFirst(value, "a", "z") AS first,
+ String::ReplaceLast(value, "a", "z") AS last,
+ String::ReplaceFirst(value, "a", "zz") AS first2,
+ String::ReplaceLast(value, "a", "zz") AS last2,
+ String::ReplaceFirst(value, "a", "") AS first3,
+ String::ReplaceLast(value, "a", "") AS last3
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in
new file mode 100644
index 00000000000..1a446c4e488
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in
@@ -0,0 +1,5 @@
+{"key"="1";"subkey"="1";"value"="qwertyui";"biguint"=1234567890u;"negint"=-123};
+{"key"="2";"subkey"="2";"value"="asdfghjl";"biguint"=9876543210u;"negint"=-456};
+{"key"="3";"subkey"="3";"value"="zxcvbnm?";"biguint"=9999999999u;"negint"=-789};
+{"key"="4";"subkey"="4";"value"="12345678";"biguint"=0000000000u;"negint"=-000};
+{"key"="5";"subkey"="5";"value"="!@#$%^&*";"biguint"=9182737465u;"negint"=-999};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr
new file mode 100644
index 00000000000..bbc040040c8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr
@@ -0,0 +1,9 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"String"]];
+ ["subkey";["DataType";"String"]];
+ ["value";["DataType";"String"]];
+ ["biguint";["DataType";"Uint64"]];
+ ["negint";["DataType";"Int64"]]
+ ]];
+}}
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql
new file mode 100644
index 00000000000..8b61758a964
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql
@@ -0,0 +1,20 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::RightPad(value, 20) AS right_pad,
+ String::LeftPad(value, 20) AS left_pad,
+ String::RightPad(value, 20, "0") AS right_pad_zero,
+ String::LeftPad(value, 20, "0") AS left_pad_zero,
+ String::Hex(biguint) AS hex,
+ String::SHex(negint) AS shex,
+ String::Bin(biguint) AS bin,
+ String::SBin(negint) AS sbin,
+ String::HexText(value) AS hex_text,
+ String::BinText(value) AS bin_text,
+ String::HumanReadableDuration(biguint) AS duration,
+ String::HumanReadableQuantity(biguint) AS quantity,
+ String::HumanReadableBytes(biguint) AS bytes,
+ String::Prec(negint / 12345.6789, 4) AS prec
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in
new file mode 100644
index 00000000000..a9d378e0590
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in
@@ -0,0 +1,4 @@
+{"value"=" !qwe rty uiop [ ]$"};
+{"value"="@as dfgh jkl\\n;'\% "};
+{"value"=" #zxc\tvbn \t\n\b m,./?^ "};
+{"value"="1!2@3#4$5%6^7&8*9(0)-_=+,<.>"};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql
new file mode 100644
index 00000000000..1f96f5d62b0
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql
@@ -0,0 +1,18 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ String::Base32Encode(value) as b32enc,
+ String::Base64Encode(value) as b64enc,
+ String::Base64EncodeUrl(value) as b64encu,
+ String::EscapeC(value) as cesc,
+ String::UnescapeC(value) as cunesc,
+ String::HexEncode(value) as xenc,
+ String::EncodeHtml(value) as henc,
+ String::DecodeHtml(value) as hdec,
+ String::CgiEscape(value) as cgesc,
+ String::CgiUnescape(value) as cgunesc,
+ String::Collapse(value) as clps,
+ String::Strip(value) as strp,
+ String::CollapseText(value, 9) as clpst,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in
new file mode 100644
index 00000000000..2c15dd67ac6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in
@@ -0,0 +1,6 @@
+{"value"="EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="};
+{"value"="ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="};
+{"value"="202020217177652072747920202075696F70205B205D24"};
+{"value"="IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"};
+{"value"="QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"};
+{"value"="4061732020202020202064666768206A6B6C5C6E3B27252020"};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql
new file mode 100644
index 00000000000..82f82f50d9d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql
@@ -0,0 +1,10 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ String::Base32Decode(value) as b32dec,
+ String::Base32StrictDecode(value) AS b32sdec,
+ String::Base64Decode(value) as b64dec,
+ String::Base64StrictDecode(value) AS b64sdec,
+ String::HexDecode(value) as xdec,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockTo.in b/yql/essentials/udfs/common/string/test/cases/BlockTo.in
new file mode 100644
index 00000000000..93a00f7db8d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockTo.in
@@ -0,0 +1,7 @@
+{"key"="1";"subkey"="1";"value"="test"};
+{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"};
+{"key"="3";"subkey"="3";"value"="TeSt"};
+{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"};
+{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"};
+{"key"="6";"subkey"="6";"value"="6"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/BlockTo.sql b/yql/essentials/udfs/common/string/test/cases/BlockTo.sql
new file mode 100644
index 00000000000..628febe899e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/BlockTo.sql
@@ -0,0 +1,9 @@
+/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */
+PRAGMA UseBlocks;
+
+SELECT
+ value,
+ String::AsciiToLower(value) AS ascii_lower,
+ String::AsciiToUpper(value) AS ascii_upper,
+ String::AsciiToTitle(value) AS ascii_title,
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in
new file mode 100644
index 00000000000..27fc322b1ae
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in
@@ -0,0 +1,3 @@
+{"key"="1";"subkey"="1";"value"="a b c"};
+{"key"="2";"subkey"="2";"value"="d"};
+{"key"="3";"subkey"="3";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql
new file mode 100644
index 00000000000..2dab551eb1c
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql
@@ -0,0 +1,10 @@
+/* syntax version 1 */
+
+$split = ($row) -> {
+ return String::SplitToList($row.value, " ", true AS SkipEmpty, false AS DelimeterString);
+};
+
+SELECT
+ $split(TableRow()),
+ ListExtend($split(TableRow()), $split(TableRow()))[1]
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Find.sql b/yql/essentials/udfs/common/string/test/cases/Find.sql
new file mode 100644
index 00000000000..273553dcf9e
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Find.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::Contains(value, "as") AS contains,
+ String::HasPrefix(value, "as") AS prefix,
+ String::StartsWith(value, "as") AS starts,
+ String::HasSuffix(value, "as") AS suffix,
+ String::EndsWith(value, "as") AS ends,
+ String::Find(value, "as") AS find,
+ String::ReverseFind(value, "as") AS rfind,
+ String::LevensteinDistance(value, "as") AS levenstein
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/List.in b/yql/essentials/udfs/common/string/test/cases/List.in
new file mode 100644
index 00000000000..949cf26c776
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="a@b@c"};
+{"key"="1";"subkey"="1";"value"="@a@b@c"};
+{"key"="1";"subkey"="1";"value"="@@@a@a"};
+{"key"="2";"subkey"="2";"value"="d#e#f"};
+{"key"="3";"subkey"="3";"value"="d"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/List.sql b/yql/essentials/udfs/common/string/test/cases/List.sql
new file mode 100644
index 00000000000..42b983074e5
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List.sql
@@ -0,0 +1,12 @@
+/* syntax version 1 */
+SELECT
+ value,
+ Ensure(value, String::JoinFromList(String::SplitToList(value, "@"), "@") == value) AS equals_to_original,
+ String::JoinFromList(String::SplitToList(value, "@"), "#") AS replace_delimeter,
+ String::SplitToList(value, "@") AS just_split,
+ String::SplitToList(value, "@")[0] as first,
+ String::SplitToList(value, "@", true AS SkipEmpty) AS skip_empty,
+ String::SplitToList(value, "b@", false AS DelimeterString) AS multichar_delim_set,
+ String::SplitToList(value, "b@", true AS DelimeterString) AS multichar_delim_string,
+ String::SplitToList(value, "@", 1 AS Limit) AS limited
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/List_v0.in b/yql/essentials/udfs/common/string/test/cases/List_v0.in
new file mode 100644
index 00000000000..949cf26c776
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List_v0.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="a@b@c"};
+{"key"="1";"subkey"="1";"value"="@a@b@c"};
+{"key"="1";"subkey"="1";"value"="@@@a@a"};
+{"key"="2";"subkey"="2";"value"="d#e#f"};
+{"key"="3";"subkey"="3";"value"="d"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/List_v0.sql b/yql/essentials/udfs/common/string/test/cases/List_v0.sql
new file mode 100644
index 00000000000..36d984dc6a8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/List_v0.sql
@@ -0,0 +1,27 @@
+/* syntax version 1 */
+-- use SplitToList settings which are used as defaults in v0 syntax
+SELECT
+ value,
+ IF (
+ String::Contains(value, "@@"),
+ Ensure(
+ value,
+ String::JoinFromList(String::SplitToList(value, "@", true AS SkipEmpty, false AS DelimeterString), "@") != value,
+ value
+ )
+ ) AS not_equals_to_original,
+ IF (
+ String::Contains(value, "@@"),
+ Ensure(
+ value,
+ String::JoinFromList(String::SplitToList(value, "@", true AS SkipEmpty, false AS DelimeterString), "@") != value,
+ value
+ )
+ ) AS not_equals_to_original_skip_empty,
+ Ensure(
+ value,
+ String::JoinFromList(String::SplitToList(value, "@", false AS SkipEmpty, false AS DelimeterString), "@") == value,
+ value
+ ) AS equals_to_original,
+ String::SplitToList(value, "@#", true AS SkipEmpty, false AS DelimeterString) AS multichar
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Remove.sql b/yql/essentials/udfs/common/string/test/cases/Remove.sql
new file mode 100644
index 00000000000..8bfe2c92e26
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Remove.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::RemoveAll(value, "as") AS all,
+ String::RemoveFirst(value, "a") AS first,
+ String::RemoveLast(value, "a") AS last,
+ String::RemoveFirst(value, "as") AS first2,
+ String::RemoveLast(value, "as") AS last2,
+ String::RemoveFirst(value, "") AS first3,
+ String::RemoveLast(value, "") AS last3,
+ String::RemoveAll(value, "`") AS hwruall,
+ String::RemoveFirst(value, "`") AS hwrufirst,
+ String::RemoveLast(value, "`") AS hwrulast,
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/Replace.sql b/yql/essentials/udfs/common/string/test/cases/Replace.sql
new file mode 100644
index 00000000000..0eea32a3e41
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/Replace.sql
@@ -0,0 +1,11 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::ReplaceAll(value, "as", "zzz") AS all,
+ String::ReplaceFirst(value, "a", "z") AS first,
+ String::ReplaceLast(value, "a", "z") AS last,
+ String::ReplaceFirst(value, "a", "zz") AS first2,
+ String::ReplaceLast(value, "a", "zz") AS last2,
+ String::ReplaceFirst(value, "a", "") AS first3,
+ String::ReplaceLast(value, "a", "") AS last3
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql b/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql
new file mode 100644
index 00000000000..6a83400d424
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql
@@ -0,0 +1,10 @@
+SELECT
+ String::ReplaceFirst("gasas", "as", "z"),
+ String::ReplaceFirst("gasas", "a", "zzz"),
+ String::ReplaceFirst("gasas", "a", ""),
+ String::ReplaceFirst("gasas", "e", "z"),
+ String::ReplaceLast("gasas", "as", "z"),
+ String::ReplaceLast("gasas", "a", "zzz"),
+ String::ReplaceLast("gasas", "a", ""),
+ String::ReplaceLast("gasas", "k", "ey");
+
diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.in b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in
new file mode 100644
index 00000000000..1a446c4e488
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in
@@ -0,0 +1,5 @@
+{"key"="1";"subkey"="1";"value"="qwertyui";"biguint"=1234567890u;"negint"=-123};
+{"key"="2";"subkey"="2";"value"="asdfghjl";"biguint"=9876543210u;"negint"=-456};
+{"key"="3";"subkey"="3";"value"="zxcvbnm?";"biguint"=9999999999u;"negint"=-789};
+{"key"="4";"subkey"="4";"value"="12345678";"biguint"=0000000000u;"negint"=-000};
+{"key"="5";"subkey"="5";"value"="!@#$%^&*";"biguint"=9182737465u;"negint"=-999};
diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr
new file mode 100644
index 00000000000..bbc040040c8
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr
@@ -0,0 +1,9 @@
+{"_yql_row_spec"={
+ "Type"=["StructType";[
+ ["key";["DataType";"String"]];
+ ["subkey";["DataType";"String"]];
+ ["value";["DataType";"String"]];
+ ["biguint";["DataType";"Uint64"]];
+ ["negint";["DataType";"Int64"]]
+ ]];
+}}
diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql b/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql
new file mode 100644
index 00000000000..46ee9a7c688
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql
@@ -0,0 +1,19 @@
+/* syntax version 1 */
+
+SELECT
+ value,
+ String::RightPad(value, 20) AS right_pad,
+ String::LeftPad(value, 20) AS left_pad,
+ String::RightPad(value, 20, "0") AS right_pad_zero,
+ String::LeftPad(value, 20, "0") AS left_pad_zero,
+ String::Hex(biguint) AS hex,
+ String::SHex(negint) AS shex,
+ String::Bin(biguint) AS bin,
+ String::SBin(negint) AS sbin,
+ String::HexText(value) AS hex_text,
+ String::BinText(value) AS bin_text,
+ String::HumanReadableDuration(biguint) AS duration,
+ String::HumanReadableQuantity(biguint) AS quantity,
+ String::HumanReadableBytes(biguint) AS bytes,
+ String::Prec(negint / 12345.6789, 4) AS prec
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUDF.in b/yql/essentials/udfs/common/string/test/cases/StringUDF.in
new file mode 100644
index 00000000000..a9d378e0590
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUDF.in
@@ -0,0 +1,4 @@
+{"value"=" !qwe rty uiop [ ]$"};
+{"value"="@as dfgh jkl\\n;'\% "};
+{"value"=" #zxc\tvbn \t\n\b m,./?^ "};
+{"value"="1!2@3#4$5%6^7&8*9(0)-_=+,<.>"};
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUDF.sql b/yql/essentials/udfs/common/string/test/cases/StringUDF.sql
new file mode 100644
index 00000000000..77af707acb0
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUDF.sql
@@ -0,0 +1,15 @@
+SELECT
+ String::Base32Encode(value) as b32enc,
+ String::Base64Encode(value) as b64enc,
+ String::Base64EncodeUrl(value) as b64encu,
+ String::EscapeC(value) as cesc,
+ String::UnescapeC(value) as cunesc,
+ String::HexEncode(value) as xenc,
+ String::EncodeHtml(value) as henc,
+ String::DecodeHtml(value) as hdec,
+ String::CgiEscape(value) as cgesc,
+ String::CgiUnescape(value) as cgunesc,
+ String::Collapse(value) as clps,
+ String::Strip(value) as strp,
+ String::CollapseText(value, 9) as clpst,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in
new file mode 100644
index 00000000000..2c15dd67ac6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in
@@ -0,0 +1,6 @@
+{"value"="EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="};
+{"value"="ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="};
+{"value"="202020217177652072747920202075696F70205B205D24"};
+{"value"="IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"};
+{"value"="QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"};
+{"value"="4061732020202020202064666768206A6B6C5C6E3B27252020"};
diff --git a/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql
new file mode 100644
index 00000000000..dab39cbd391
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql
@@ -0,0 +1,7 @@
+SELECT
+ String::Base32Decode(value) as b32dec,
+ String::Base32StrictDecode(value) AS b32sdec,
+ String::Base64Decode(value) as b64dec,
+ String::Base64StrictDecode(value) AS b64sdec,
+ String::HexDecode(value) as xdec,
+FROM Input
diff --git a/yql/essentials/udfs/common/string/test/cases/To.in b/yql/essentials/udfs/common/string/test/cases/To.in
new file mode 100644
index 00000000000..93a00f7db8d
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/To.in
@@ -0,0 +1,7 @@
+{"key"="1";"subkey"="1";"value"="test"};
+{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"};
+{"key"="3";"subkey"="3";"value"="TeSt"};
+{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"};
+{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"};
+{"key"="6";"subkey"="6";"value"="6"};
+{"key"="4";"subkey"="4";"value"=""};
diff --git a/yql/essentials/udfs/common/string/test/cases/To.sql b/yql/essentials/udfs/common/string/test/cases/To.sql
new file mode 100644
index 00000000000..a7faf41efe6
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/To.sql
@@ -0,0 +1,14 @@
+/* syntax version 1 */
+SELECT
+ value,
+ String::AsciiToLower(value) AS ascii_lower,
+ String::AsciiToUpper(value) AS ascii_upper,
+ String::AsciiToTitle(value) AS ascii_title,
+ String::ToLower(value) AS lower,
+ String::ToUpper(value) AS upper,
+ String::ToTitle(value) AS title,
+ String::Reverse(value) AS reverse,
+ String::ToByteList(value) AS byte_list,
+ String::FromByteList(String::ToByteList(value)) AS from_byte_list,
+ String::FromByteList(YQL::LazyList(String::ToByteList(value))) AS from_lazy_byte_list
+FROM Input;
diff --git a/yql/essentials/udfs/common/string/test/cases/default.in b/yql/essentials/udfs/common/string/test/cases/default.in
new file mode 100644
index 00000000000..182158fdf67
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/cases/default.in
@@ -0,0 +1,6 @@
+{"key"="1";"subkey"="1";"value"="fdsa"};
+{"key"="2";"subkey"="2";"value"="aswedfg"};
+{"key"="3";"subkey"="3";"value"="asdadsaasd"};
+{"key"="4";"subkey"="4";"value"="gdsfsassas"};
+{"key"="5";"subkey"="5";"value"=""};
+{"key"="6";"subkey"="6";"value"="`Привет, мир!`"};
diff --git a/yql/essentials/udfs/common/string/test/ya.make b/yql/essentials/udfs/common/string/test/ya.make
new file mode 100644
index 00000000000..87d8b667780
--- /dev/null
+++ b/yql/essentials/udfs/common/string/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/string)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/string/ya.make b/yql/essentials/udfs/common/string/ya.make
new file mode 100644
index 00000000000..12ae827ad17
--- /dev/null
+++ b/yql/essentials/udfs/common/string/ya.make
@@ -0,0 +1,38 @@
+IF (YQL_PACKAGED)
+ PACKAGE()
+ FROM_SANDBOX(FILE 7319905679 OUT_NOAUTO libstring_udf.so
+ )
+ END()
+ELSE ()
+YQL_UDF_CONTRIB(string_udf)
+
+ YQL_ABI_VERSION(
+ 2
+ 37
+ 0
+ )
+
+ SRCS(
+ string_udf.cpp
+ )
+
+ PEERDIR(
+ yql/essentials/public/udf/arrow
+ library/cpp/charset
+ library/cpp/deprecated/split
+ library/cpp/html/pcdata
+ library/cpp/string_utils/base32
+ library/cpp/string_utils/base64
+ library/cpp/string_utils/levenshtein_diff
+ library/cpp/string_utils/quote
+ )
+
+ END()
+ENDIF ()
+
+
+RECURSE_FOR_TESTS(
+ test
+)
+
+