diff options
| author | atarasov5 <[email protected]> | 2025-02-13 15:57:47 +0300 |
|---|---|---|
| committer | atarasov5 <[email protected]> | 2025-02-13 16:10:25 +0300 |
| commit | ee1586dbe7089790d721f00e52026c958098e9cd (patch) | |
| tree | b4bc422bdfacc83b786301c249f5d3e534ecff98 /yql/essentials/udfs | |
| parent | a8b10ce3de933ec55e006347e27da8f08e9cc598 (diff) | |
YQL-19535: Provide block implementation for some utf8 udf
commit_hash:30371c936e006647e6aee38a0c95980d2d1af49e
Diffstat (limited to 'yql/essentials/udfs')
58 files changed, 990 insertions, 342 deletions
diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h index 6982dbe162d..d27abcb9f65 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -16,6 +16,7 @@ #include <util/string/subst.h> #include <util/charset/wide.h> #include <util/charset/utf8.h> +#include <util/generic/scope.h> #include <util/string/strip.h> #include <util/string/ascii.h> #include <util/charset/unidata.h> @@ -25,6 +26,10 @@ using namespace NUdf; using namespace NUnicode; namespace { +#define DISABLE_IMPICT_ARGUMENT_CAST \ + template <typename... Args> \ + static auto Execute(Args&&... args) = delete; + inline constexpr bool IsAscii(wchar32 c) noexcept { return ::IsAscii(c); } @@ -54,6 +59,13 @@ namespace { } static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(args[0] ? TMaybe<TStringRef>(args[0].AsStringRef()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } + + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) requires requires { TDerived::Execute(TStringRef(), TStringRef()); } { auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef()); @@ -61,6 +73,13 @@ namespace { } static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1] ? TMaybe<ui16>(args[1].Get<ui16>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } + + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) requires requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); } { auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2].AsStringRef()); @@ -74,11 +93,24 @@ namespace { return ProcessResult(builder, std::move(executeResult), args); } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(), TMaybe<ui64>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), + args[1] ? TMaybe<ui64>(args[1].Get<ui64>()) : Nothing(), + args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } + private: static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TString& newString, const TUnboxedValuePod*) { return builder->NewString(newString); } + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TStringBuf newString, const TUnboxedValuePod*) { + return builder->NewString(newString); + } + template <typename T> static TUnboxedValue ProcessResult(const IValueBuilder* builder, const std::variant<TNoChangesTag, T>& newValue, const TUnboxedValuePod* initialArg) { if (std::holds_alternative<T>(newValue)) { @@ -117,6 +149,15 @@ namespace { } template <typename TSink> + static void BlockDoExecute(const TBlockItem arg, const TSink& sink) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(arg ? TMaybe<TStringRef>(arg.AsStringRef()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg); + sink(boxedValue); + } + + template <typename TSink> static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) requires requires { TDerived::Execute(TStringRef(), TStringRef()); } { @@ -127,6 +168,15 @@ namespace { } template <typename TSink> + static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(arg1.AsStringRef(), arg2 ? TMaybe<ui16>(arg2.Get<ui16>()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg1); + sink(boxedValue); + } + + template <typename TSink> static void BlockDoExecute(const TBlockItem args, const TSink& sink) requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); }) { @@ -148,12 +198,28 @@ namespace { sink(boxedValue); } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(0ULL), TMaybe<ui64>(0ULL)); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + (args.GetElement(1) ? TMaybe<ui64>(args.GetElement(1).Get<ui64>()) : Nothing()), + (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } + private: static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { Y_UNUSED(arg); return TBlockItem(newString); } + static TBlockItem ProcessResult(const TStringBuf newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(newString); + } + template <typename T> static TBlockItem ProcessResult(const TMaybe<T>& newValue, const TBlockItem arg) { if (newValue.Defined()) { @@ -188,6 +254,7 @@ namespace { const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); return WideToUTF8(Normalize<mode>(input)); } + DISABLE_IMPICT_ARGUMENT_CAST; }; template <bool (*Function)(wchar32)> @@ -205,6 +272,7 @@ namespace { } return true; } + DISABLE_IMPICT_ARGUMENT_CAST; }; template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> @@ -217,6 +285,7 @@ namespace { return TNoChangesTag{}; } } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLengthGetter: public TOperationMixin<TLengthGetter> { @@ -225,6 +294,7 @@ namespace { GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); return static_cast<ui64>(result); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TReverser: public TOperationMixin<TReverser> { @@ -233,6 +303,7 @@ namespace { ReverseInPlace(wide); return WideToUTF8(wide); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TStripper: public TOperationMixin<TStripper> { @@ -241,6 +312,7 @@ namespace { const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); return WideToUTF8(result); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TAllRemover: public TOperationMixin<TAllRemover> { @@ -260,6 +332,7 @@ namespace { } return TNoChangesTag{}; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TFirstRemover: public TOperationMixin<TFirstRemover> { @@ -275,6 +348,7 @@ namespace { } return TNoChangesTag{}; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TUnicodeSetMatcher: public TOperationMixin<TUnicodeSetMatcher> { @@ -298,6 +372,7 @@ namespace { } return true; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLevensteinDistanceFinder: public TOperationMixin<TLevensteinDistanceFinder> { @@ -308,6 +383,7 @@ namespace { const auto& rightUtf32 = UTF8ToUTF32<true>(right); return NLevenshtein::Distance(leftUtf32, rightUtf32); } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLastRemoval: public TOperationMixin<TLastRemoval> { @@ -323,6 +399,7 @@ namespace { } return TNoChangesTag{}; } + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TAllReplacer: public TOperationMixin<TAllReplacer> { @@ -333,9 +410,7 @@ namespace { return TNoChangesTag{}; } } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TFirstReplacer: public TOperationMixin<TFirstReplacer> { @@ -348,9 +423,7 @@ namespace { } return TNoChangesTag{}; } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TLastReplacer: public TOperationMixin<TLastReplacer> { @@ -363,9 +436,7 @@ namespace { } return TNoChangesTag{}; } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TFinder: public TOperationMixin<TFinder> { @@ -389,9 +460,7 @@ namespace { } return Nothing(); } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; }; struct TRFinder: public TOperationMixin<TRFinder> { @@ -416,9 +485,65 @@ namespace { } return Nothing(); } - // Disable implict casts for arguments. - template <typename... Args> - static auto Execute(Args&&... args) = delete; + DISABLE_IMPICT_ARGUMENT_CAST; + }; + + template <bool strict> + struct TToUint64Converter: public TOperationMixin<TToUint64Converter<strict>> { + static TNothing Terminate(const char* message) { + if constexpr (strict) { + return Nothing(); + } else { + UdfTerminate(message); + } + }; + + static TMaybe<ui64> Execute(TStringRef inputRef, TMaybe<ui16> inputBase) { + const TString inputStr(inputRef); + const char* input = inputStr.data(); + const int base = inputBase.GetOrElse(0); + char* pos = nullptr; + auto prevErrno = errno; + errno = 0; + Y_DEFER { + errno = prevErrno; + }; + unsigned long long res = std::strtoull(input, &pos, base); + if (!res && errno == EINVAL) { + return Terminate("Incorrect base"); + } + + ui64 ret = static_cast<ui64>(res); + if (!res && pos == input) { + return Terminate("Input string is not a number"); + } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { + return Terminate("Converted value falls out of Uint64 range"); + } else if (*pos) { + return Terminate("Input string contains junk after the number"); + } + return ret; + } + DISABLE_IMPICT_ARGUMENT_CAST; + }; + + struct TUtf8Checker: public TOperationMixin<TUtf8Checker> { + static bool Execute(TMaybe<TStringRef> inputRef) { + if (!inputRef.Defined()) { + return false; + } + return IsUtf8(*inputRef); + } + DISABLE_IMPICT_ARGUMENT_CAST; + }; + + struct TSubstringGetter: public TOperationMixin<TSubstringGetter> { + static TStringBuf Execute(TStringRef inputRef Y_LIFETIME_BOUND, TMaybe<ui64> inputFrom, TMaybe<ui64> inputLen) { + const TStringBuf input(inputRef); + size_t from = inputFrom.GetOrElse(0); + size_t len = inputLen.GetOrElse(TStringBuf::npos); + return SubstrUTF8(input, from, len); + } + DISABLE_IMPICT_ARGUMENT_CAST; }; #define DEFINE_UTF8_OPERATION_STRICT(udfName, Executor, signature, optArgs) \ @@ -475,6 +600,8 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + DEFINE_UTF8_OPERATION_STRICT(IsUtf, TUtf8Checker, bool(TOptional<char*>), /*optArgs=*/1); + DEFINE_UTF8_OPERATION_STRICT(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); DEFINE_UTF8_OPERATION_STRICT(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); DEFINE_UTF8_OPERATION_STRICT(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); @@ -498,6 +625,7 @@ namespace { DEFINE_UTF8_OPERATION_STRICT(Reverse, TReverser, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); DEFINE_UTF8_OPERATION_STRICT(Strip, TStripper, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); + DEFINE_UTF8_OPERATION_MANY_STRICT(Substring, TSubstringGetter, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), /*argsCount=*/3, /*optArgs=*/1); DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveAll, TAllRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveFirst, TFirstRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); @@ -512,69 +640,8 @@ namespace { DEFINE_UTF8_OPERATION_MANY_STRICT(Find, TFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); DEFINE_UTF8_OPERATION_MANY_STRICT(RFind, TRFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); - SIMPLE_UDF(TIsUtf, bool(TOptional<char*>)) { - Y_UNUSED(valueBuilder); - if (args[0]) { - return TUnboxedValuePod(IsUtf8(args[0].AsStringRef())); - } else { - return TUnboxedValuePod(false); - } - } - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TToUint64, ui64(TAutoMap<TUtf8>, TOptional<ui16>), 1) { - Y_UNUSED(valueBuilder); - const TString inputStr(args[0].AsStringRef()); - const char* input = inputStr.data(); - const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0)); - char* pos = nullptr; - errno = 0; - unsigned long long res = std::strtoull(input, &pos, base); - if (!res && errno == EINVAL) { - UdfTerminate("Incorrect base"); - } - - ui64 ret = static_cast<ui64>(res); - if (!res && pos == input) { - UdfTerminate("Input string is not a number"); - } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { - UdfTerminate("Converted value falls out of Uint64 range"); - } else if (*pos) { - UdfTerminate("Input string contains junk after the number"); - } - return TUnboxedValuePod(ret); - } - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTryToUint64, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), 1) { - Y_UNUSED(valueBuilder); - const TString inputStr(args[0].AsStringRef()); - const char* input = inputStr.data(); - const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0)); - char* pos = nullptr; - errno = 0; - unsigned long long res = std::strtoull(input, &pos, base); - if (!res && errno == EINVAL) { - return TUnboxedValuePod(); - } - - ui64 ret = static_cast<ui64>(res); - if (!res && pos == input) { - return TUnboxedValuePod(); - } - if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { - return TUnboxedValuePod(); - } - if (*pos) { - return TUnboxedValuePod(); - } - return TUnboxedValuePod(ret); - } - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSubstring, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), 1) { - const TStringBuf input(args[0].AsStringRef()); - size_t from = args[1].GetOrDefault<ui64>(0); - size_t len = !args[2] ? TStringBuf::npos : size_t(args[2].Get<ui64>()); - return valueBuilder->NewString(SubstrUTF8(input, from, len)); - } + DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(ToUint64, TToUint64Converter</*strict=*/false>, ui64(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); + DEFINE_UTF8_OPERATION_BIN_STRICT(TryToUint64, TToUint64Converter</*strict=*/true>, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json index bac6e1ebc46..15b8b4b473f 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json @@ -29,11 +29,41 @@ "uri": "file://test.test_BlockStrip_/results.txt" } ], + "test.test[BlockToUint64F0]": [ + { + "uri": "file://test.test_BlockToUint64F0_/extracted" + } + ], + "test.test[BlockToUint64F1]": [ + { + "uri": "file://test.test_BlockToUint64F1_/extracted" + } + ], + "test.test[BlockToUint64F2]": [ + { + "uri": "file://test.test_BlockToUint64F2_/extracted" + } + ], + "test.test[BlockToUint64F3]": [ + { + "uri": "file://test.test_BlockToUint64F3_/extracted" + } + ], + "test.test[BlockToUint64]": [ + { + "uri": "file://test.test_BlockToUint64_/results.txt" + } + ], "test.test[BlockTo]": [ { "uri": "file://test.test_BlockTo_/results.txt" } ], + "test.test[BlockTryToUint64]": [ + { + "uri": "file://test.test_BlockTryToUint64_/results.txt" + } + ], "test.test[BlockUnicode]": [ { "uri": "file://test.test_BlockUnicode_/results.txt" diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F0_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F0_/extracted new file mode 100644 index 00000000000..f8d4992f9e1 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F0_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Input string is not a number + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F1_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F1_/extracted new file mode 100644 index 00000000000..bbdeae1af46 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F1_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Input string contains junk after the number + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F2_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F2_/extracted new file mode 100644 index 00000000000..e15ed0de7a4 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F2_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Converted value falls out of Uint64 range + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F3_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F3_/extracted new file mode 100644 index 00000000000..aff4bb4c226 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64F3_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:10:1: Error: Execution of node: YtMap! + SELECT + ^ + <tmp_path>/program.sql:<main>:10:1: Error: Incorrect base + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64_/results.txt new file mode 100644 index 00000000000..ecd6e5bddbf --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockToUint64_/results.txt @@ -0,0 +1,178 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "key"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0x1234abcd"; + "with_format_1"; + "305441741" + ]; + [ + "0X4"; + "with_format_2"; + "4" + ]; + [ + "0644"; + "with_format_3"; + "420" + ]; + [ + "0101010"; + "binary_1"; + "33288" + ]; + [ + "101"; + "binary_2"; + "101" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "key"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0101010"; + "binary_1"; + "42"; + "1052688" + ]; + [ + "101"; + "binary_2"; + "5"; + "257" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "key"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0"; + "zero"; + "0"; + "0"; + "0" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTryToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTryToUint64_/results.txt new file mode 100644 index 00000000000..8e4cedcd682 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_BlockTryToUint64_/results.txt @@ -0,0 +1,173 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0x1234abcd"; + #; + #; + #; + #; + [ + "305441741" + ] + ]; + [ + "0X4"; + #; + #; + #; + #; + [ + "4" + ] + ]; + [ + "0644"; + [ + "644" + ]; + #; + #; + [ + "420" + ]; + [ + "1604" + ] + ]; + [ + "0101010"; + [ + "101010" + ]; + #; + [ + "1092" + ]; + [ + "33288" + ]; + [ + "1052688" + ] + ]; + [ + "101"; + [ + "101" + ]; + #; + [ + "17" + ]; + [ + "65" + ]; + [ + "257" + ] + ]; + [ + "0"; + [ + "0" + ]; + #; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ] + ]; + [ + "hell"; + #; + #; + #; + #; + # + ]; + [ + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"; + #; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted index 7e9db6c109e..6c16e6c2e39 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Input string is not a number + <tmp_path>/program.sql:<main>:4:1: Error: Input string is not a number SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted index dbf47216702..4288cfcc4c7 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Input string contains junk after the number + <tmp_path>/program.sql:<main>:4:1: Error: Input string contains junk after the number SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted index f6b225d5618..7ebb531e661 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Converted value falls out of Uint64 range + <tmp_path>/program.sql:<main>:4:1: Error: Converted value falls out of Uint64 range SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted index 5b73d97b401..6ff53caa9a2 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F3_/extracted @@ -1,8 +1,8 @@ <tmp_path>/program.sql:<main>: Error: Execution - <tmp_path>/program.sql:<main>:2:1: Error: Execution of node: Result + <tmp_path>/program.sql:<main>:4:1: Error: Execution of node: YtMap! SELECT ^ - <tmp_path>/program.sql:<main>:2:1: Error: Incorrect base + <tmp_path>/program.sql:<main>:4:1: Error: Incorrect base SELECT ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt index 399ba781437..ecd6e5bddbf 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt @@ -8,17 +8,17 @@ "StructType"; [ [ - "column0"; + "value"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column1"; + "key"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ @@ -27,30 +27,71 @@ "DataType"; "Uint64" ] - ]; + ] + ] + ] + ]; + "Data" = [ + [ + "0x1234abcd"; + "with_format_1"; + "305441741" + ]; + [ + "0X4"; + "with_format_2"; + "4" + ]; + [ + "0644"; + "with_format_3"; + "420" + ]; + [ + "0101010"; + "binary_1"; + "33288" + ]; + [ + "101"; + "binary_2"; + "101" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ [ - "column3"; + "value"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column4"; + "key"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column5"; + "column2"; [ "DataType"; "Uint64" ] ]; [ - "column6"; + "column3"; [ "DataType"; "Uint64" @@ -61,13 +102,16 @@ ]; "Data" = [ [ - "305441741"; - "4"; - "420"; - "1052688"; + "0101010"; + "binary_1"; "42"; - "33288"; - "101" + "1052688" + ]; + [ + "101"; + "binary_2"; + "5"; + "257" ] ] } @@ -82,17 +126,17 @@ "StructType"; [ [ - "column0"; + "value"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ - "column1"; + "key"; [ "DataType"; - "Uint64" + "Utf8" ] ]; [ @@ -108,6 +152,13 @@ "DataType"; "Uint64" ] + ]; + [ + "column4"; + [ + "DataType"; + "Uint64" + ] ] ] ] @@ -115,6 +166,7 @@ "Data" = [ [ "0"; + "zero"; "0"; "0"; "0" diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt index 3b715cea08f..8e4cedcd682 100644 --- a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt @@ -8,129 +8,10 @@ "StructType"; [ [ - "column0"; + "value"; [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] - ] - ] - ]; - "Data" = [ - [ - # - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; - [ - "StructType"; - [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] + "DataType"; + "Utf8" ] ]; [ @@ -182,104 +63,83 @@ "Uint64" ] ] - ]; - [ - "column6"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] ] ] ] ]; "Data" = [ [ + "0x1234abcd"; + #; + #; + #; + #; [ "305441741" - ]; + ] + ]; + [ + "0X4"; + #; + #; + #; + #; [ "4" + ] + ]; + [ + "0644"; + [ + "644" ]; + #; + #; [ "420" ]; [ - "1052688" + "1604" + ] + ]; + [ + "0101010"; + [ + "101010" ]; + #; [ - "42" + "1092" ]; [ - "101010" + "33288" ]; [ - "101" + "1052688" ] - ] - ] - } - ] - }; - { - "Write" = [ - { - "Type" = [ - "ListType"; + ]; [ - "StructType"; + "101"; [ - [ - "column0"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ]; - [ - "column1"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ]; - [ - "column2"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ]; - [ - "column3"; - [ - "OptionalType"; - [ - "DataType"; - "Uint64" - ] - ] - ] + "101" + ]; + #; + [ + "17" + ]; + [ + "65" + ]; + [ + "257" ] - ] - ]; - "Data" = [ + ]; [ + "0"; [ "0" ]; + #; [ "0" ]; @@ -289,6 +149,22 @@ [ "0" ] + ]; + [ + "hell"; + #; + #; + #; + #; + # + ]; + [ + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"; + #; + #; + #; + #; + # ] ] } diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in new file mode 100644 index 00000000000..c9a2f32dfcc --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in @@ -0,0 +1,6 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.sql new file mode 100644 index 00000000000..0c794abdb36 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64.sql @@ -0,0 +1,32 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value) +FROM Input +WHERE key = "with_format_1" + OR key = "with_format_2" + OR key = "with_format_3" + OR key = "binary_1" + OR key = "binary_2"; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value, 2), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "binary_1" + OR key = "binary_2"; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value, 8), + Unicode::ToUint64(value, 10), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "zero"; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.cfg new file mode 100644 index 00000000000..1235ff042d2 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.cfg @@ -0,0 +1,2 @@ +in plato.Input BlockToUint64F0.in +xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in new file mode 100644 index 00000000000..c431fc6e9c2 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in @@ -0,0 +1,2 @@ +{"key"="not_a_number_1";"value"="hello"}; +{"key"="not_a_number_2";"value"="meow"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.sql new file mode 100644 index 00000000000..cffd41a7f75 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F0.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.cfg new file mode 100644 index 00000000000..5403234fabe --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.cfg @@ -0,0 +1,3 @@ +in plato.Input BlockToUint64F1.in +xfail + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in new file mode 100644 index 00000000000..eb40ad3bf12 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in @@ -0,0 +1,2 @@ +{"key"="error1";"value"="01238"}; +{"key"="error2";"value"="01239"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.sql new file mode 100644 index 00000000000..cffd41a7f75 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F1.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.cfg new file mode 100644 index 00000000000..8ee27e44977 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.cfg @@ -0,0 +1,2 @@ +in plato.Input BlockToUint64F2.in +xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in new file mode 100644 index 00000000000..3895f453988 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in @@ -0,0 +1,2 @@ +{"key"="very_big_1";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; +{"key"="very_big_2";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.sql new file mode 100644 index 00000000000..cffd41a7f75 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F2.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.cfg new file mode 100644 index 00000000000..af5392127c5 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.cfg @@ -0,0 +1,2 @@ +in plato.Input BlockToUint64F3.in +xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in new file mode 100644 index 00000000000..fde0f1b4a6a --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in @@ -0,0 +1,2 @@ +{"key"="0";"value"="0"}; +{"key"="1";"value"="1"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.sql new file mode 100644 index 00000000000..e1a781e917d --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockToUint64F3.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::ToUint64(value, 1), +FROM Input + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in new file mode 100644 index 00000000000..c3bbe804f87 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in @@ -0,0 +1,8 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; +{"key"="invalid";"value"="hell"}; +{"key"="very_long";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.sql new file mode 100644 index 00000000000..b2173249508 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/BlockTryToUint64.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ + +pragma UseBlocks; + +SELECT + value as value, + Unicode::TryToUint64(value, 10), + Unicode::TryToUint64(value, 1), + Unicode::TryToUint64(value, 4), + Unicode::TryToUint64(value, 8), + Unicode::TryToUint64(value, 16) +From Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in new file mode 100644 index 00000000000..c9a2f32dfcc --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in @@ -0,0 +1,6 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql index 1cad57a4fd6..531322f2d1d 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql @@ -1,14 +1,29 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("0x1234abcd"), - Unicode::ToUint64("0X4"), - Unicode::ToUint64("0644"), - Unicode::ToUint64("0101010", 16), - Unicode::ToUint64("0101010", 2), - Unicode::ToUint64("0101010"), - Unicode::ToUint64("101"); + value AS value, + key AS key, + Unicode::ToUint64(value) +FROM Input +WHERE key = "with_format_1" + OR key = "with_format_2" + OR key = "with_format_3" + OR key = "binary_1" + OR key = "binary_2"; SELECT - Unicode::ToUint64("0", 8), - Unicode::ToUint64("0", 10), - Unicode::ToUint64("0", 16), - Unicode::ToUint64("0"); + value AS value, + key AS key, + Unicode::ToUint64(value, 2), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "binary_1" + OR key = "binary_2"; + +SELECT + value AS value, + key AS key, + Unicode::ToUint64(value, 8), + Unicode::ToUint64(value, 10), + Unicode::ToUint64(value, 16) +FROM Input +WHERE key = "zero"; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg index 83cfd96179a..4900d749105 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg @@ -1,2 +1,2 @@ +in plato.Input ToUint64F0.in xfail - diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in new file mode 100644 index 00000000000..c431fc6e9c2 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in @@ -0,0 +1,2 @@ +{"key"="not_a_number_1";"value"="hello"}; +{"key"="not_a_number_2";"value"="meow"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql index dd1182a562d..b84e287c508 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("hell"); + value as value, + Unicode::ToUint64(value), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg index 83cfd96179a..218c06cc4a3 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg @@ -1,2 +1,3 @@ +in plato.Input ToUint64F1.in xfail diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in new file mode 100644 index 00000000000..eb40ad3bf12 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in @@ -0,0 +1,2 @@ +{"key"="error1";"value"="01238"}; +{"key"="error2";"value"="01239"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql index f42380ee803..b84e287c508 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("01238"); + value as value, + Unicode::ToUint64(value), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg index 83cfd96179a..e377f6a260f 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg @@ -1,2 +1,2 @@ +in plato.Input ToUint64F2.in xfail - diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in new file mode 100644 index 00000000000..3895f453988 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in @@ -0,0 +1,2 @@ +{"key"="very_big_1";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; +{"key"="very_big_2";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql index 1a9b7e2449f..b84e287c508 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + value as value, + Unicode::ToUint64(value), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg index 83cfd96179a..83322ea2160 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.cfg @@ -1,2 +1,2 @@ +in plato.Input ToUint64F3.in xfail - diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in new file mode 100644 index 00000000000..fde0f1b4a6a --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in @@ -0,0 +1,2 @@ +{"key"="0";"value"="0"}; +{"key"="1";"value"="1"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql index 527fb1da1bd..cc2e70d71a4 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F3.sql @@ -1,3 +1,6 @@ +/* syntax version 1 */ SELECT - Unicode::ToUint64("0",1); + value as value, + Unicode::ToUint64(value, 1), +FROM Input diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in new file mode 100644 index 00000000000..c3bbe804f87 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in @@ -0,0 +1,8 @@ +{"key"="with_format_1";"value"="0x1234abcd"}; +{"key"="with_format_2";"value"="0X4"}; +{"key"="with_format_3";"value"="0644"}; +{"key"="binary_1";"value"="0101010"}; +{"key"="binary_2";"value"="101"}; +{"key"="zero";"value"="0"}; +{"key"="invalid";"value"="hell"}; +{"key"="very_long";"value"="0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in.attr new file mode 100644 index 00000000000..d5e5b2ca484 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.in.attr @@ -0,0 +1,8 @@ +{ + "_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + } +} diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql index b51ce72e6f7..aa07de57e96 100644 --- a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql @@ -1,26 +1,9 @@ -SELECT - Unicode::TryToUint64("hell", 10); - -SELECT - Unicode::TryToUint64("01238", 8); - -SELECT - Unicode::TryToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); - -SELECT - Unicode::TryToUint64("0", 1); - -SELECT - Unicode::TryToUint64("0x1234abcd", 16), - Unicode::TryToUint64("0X4", 16), - Unicode::TryToUint64("0644", 8), - Unicode::TryToUint64("0101010", 16), - Unicode::TryToUint64("0101010", 2), - Unicode::TryToUint64("0101010", 10), - Unicode::TryToUint64("101", 10); - -SELECT - Unicode::TryToUint64("0", 8), - Unicode::TryToUint64("0", 10), - Unicode::TryToUint64("0", 16), - Unicode::TryToUint64("0"); +/* syntax version 1 */ +SELECT + value as value, + Unicode::TryToUint64(value, 10), + Unicode::TryToUint64(value, 1), + Unicode::TryToUint64(value, 4), + Unicode::TryToUint64(value, 8), + Unicode::TryToUint64(value, 16) +From Input |
