diff options
| author | imunkin <[email protected]> | 2024-11-08 10:00:23 +0300 |
|---|---|---|
| committer | imunkin <[email protected]> | 2024-11-08 10:12:13 +0300 |
| commit | a784a2f943d6e15caa6241e2e96d80aac6dbf375 (patch) | |
| tree | 05f1e5366c916b988a8afb75bdab8ddeee0f6e6d /yql/essentials/udfs | |
| parent | d70137a7b530ccaa52834274913bbb5a3d1ca06e (diff) | |
Move yql/udfs/common/ to /yql/essentials YQL-19206
Except the following directories:
* clickhouse/client
* datetime
* knn
* roaring
commit_hash:c7da95636144d28db109d6b17ddc762e9bacb59f
Diffstat (limited to 'yql/essentials/udfs')
781 files changed, 76503 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/compress_base/compress_udf.cpp b/yql/essentials/udfs/common/compress_base/compress_udf.cpp new file mode 100644 index 00000000000..efd2d0b3c54 --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/compress_udf.cpp @@ -0,0 +1,17 @@ +#include "lib/compress_base_udf.h" + +using namespace NYql::NUdf; + +namespace NCompress { + SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF); +} + +namespace NDecompress { + SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF); +} + +namespace NTryDecompress { + SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF); +} + +REGISTER_MODULES(NCompress::TCompressModule, NDecompress::TDecompressModule, NTryDecompress::TTryDecompressModule); diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp new file mode 100644 index 00000000000..237abe271eb --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp @@ -0,0 +1 @@ +#include "compress_base_udf.h"
\ No newline at end of file diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h new file mode 100644 index 00000000000..58709134d6a --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h @@ -0,0 +1,218 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <library/cpp/streams/brotli/brotli.h> +#include <library/cpp/streams/bzip2/bzip2.h> +#include <library/cpp/streams/zstd/zstd.h> +#include <library/cpp/streams/lzma/lzma.h> +#include <library/cpp/streams/xz/decompress.h> + +#include <util/stream/mem.h> +#include <util/stream/zlib.h> + +#include <contrib/libs/snappy/snappy.h> + +using namespace NYql::NUdf; + +namespace NCompress { + SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TBrotliCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TLzmaCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TBZipCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { + TString result; + const TStringRef& input = args[0].AsStringRef(); + snappy::Compress(input.Data(), input.Size(), &result); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZstdCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); + } +} + +namespace NDecompress { + SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } + + SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } + + SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBrotliDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } + + SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TLzmaDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } + + SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBZipDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } + + SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { + TString result; + const auto& value = args->AsStringRef(); + if (snappy::Uncompress(value.Data(), value.Size(), &result)) { + return valueBuilder->NewString(result); + } + + ythrow yexception() << "failed to decompress message with snappy"; + } + + SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZstdDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } + + SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TXzDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } +} + +namespace NTryDecompress { + SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBrotliDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TLzmaDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBZipDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) { + TString result; + const auto& value = args->AsStringRef(); + if (snappy::Uncompress(value.Data(), value.Size(), &result)) { + return valueBuilder->NewString(result); + } + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZstdDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } + + SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TXzDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); + } catch (const std::exception&) { + return TUnboxedValuePod(); + } +} + +#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd +#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz +#define EXPORTED_TRY_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz diff --git a/yql/essentials/udfs/common/compress_base/lib/ya.make b/yql/essentials/udfs/common/compress_base/lib/ya.make new file mode 100644 index 00000000000..ca606d244a0 --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/lib/ya.make @@ -0,0 +1,23 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 23 + 0 +) + +SRCS( + compress_base_udf.cpp +) + +PEERDIR( + yql/essentials/public/udf + contrib/libs/snappy + library/cpp/streams/brotli + library/cpp/streams/bzip2 + library/cpp/streams/lzma + library/cpp/streams/xz + library/cpp/streams/zstd +) + +END() diff --git a/yql/essentials/udfs/common/compress_base/test/canondata/result.json b/yql/essentials/udfs/common/compress_base/test/canondata/result.json new file mode 100644 index 00000000000..5323168bebe --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/canondata/result.json @@ -0,0 +1,12 @@ +{ + "test.test[RoundTrip]": [ + { + "uri": "file://test.test_RoundTrip_/results.txt" + } + ], + "test.test[TryDecompress]": [ + { + "uri": "file://test.test_TryDecompress_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt new file mode 100644 index 00000000000..2c0cefa419d --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_RoundTrip_/results.txt @@ -0,0 +1,124 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "gzip"; + [ + "DataType"; + "String" + ] + ]; + [ + "zlib"; + [ + "DataType"; + "String" + ] + ]; + [ + "brotli"; + [ + "DataType"; + "String" + ] + ]; + [ + "lzma"; + [ + "DataType"; + "String" + ] + ]; + [ + "bzip2"; + [ + "DataType"; + "String" + ] + ]; + [ + "zstd"; + [ + "DataType"; + "String" + ] + ]; + [ + "snappy"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "H4sIAAAAAAAAAwMAAAAAAAAAAAA=" + ]; + "x^\3\0\0\0\0\1"; + "k\0\3"; + [ + "XQAAAAEAg//7///AAAAA" + ]; + [ + "QlpoNRdyRThQkAAAAAA=" + ]; + [ + "KLUv/SAAAQAA" + ]; + "\0" + ]; + [ + [ + "H4sIAAAAAAAAAzMEALfv3IMBAAAA" + ]; + "x^3\4\0\0002\0002"; + [ + "CwCAMQM=" + ]; + [ + "XQAAAAEAGMH7////4AAAAA==" + ]; + [ + "QlpoNTFBWSZTWWEEMGwAAAAIACAAIAAhGEaC7kinChIMIIYNgA==" + ]; + [ + "KLUv/QBYCQAAMQ==" + ]; + "\1\0001" + ]; + [ + [ + "H4sIAAAAAAAAAzM0MjYxNTO3sDQAAOWuHSYKAAAA" + ]; + [ + "eF4zNDI2MTUzt7A0AAALLAIO" + ]; + [ + "iwSAMTIzNDU2Nzg5MAM=" + ]; + [ + "XQAAAAEAGIyCtsQRNFxO4dpOCbf//KPgAA==" + ]; + [ + "QlpoNTFBWSZTWVBoU7YAAACIAH/gIAAiAaaYQAwVXmjj6Yu5IpwoSCg0KdsA" + ]; + [ + "KLUv/QBYUQAAMTIzNDU2Nzg5MA==" + ]; + "\n$1234567890" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt new file mode 100644 index 00000000000..649a6670a93 --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/canondata/test.test_TryDecompress_/results.txt @@ -0,0 +1,188 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "ok_Gzip"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_Gzip"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ok_Zlib"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_Zlib"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ok_Brotli"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_Brotli"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ok_Lzma"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_Lzma"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ok_BZip2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_BZip2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ok_Snappy"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_Snappy"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ok_Zstd"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_Zstd"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ]; + #; + [ + %true + ]; + #; + [ + %true + ]; + #; + [ + %true + ]; + #; + [ + %true + ]; + #; + [ + %true + ]; + #; + [ + %true + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql b/yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql new file mode 100644 index 00000000000..4c8eba4aab3 --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/cases/RoundTrip.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +$level = 5; + +SELECT + Ensure(Compress::Gzip(value, $level), Decompress::Gzip(Compress::Gzip(value, $level)) == value, "gzip failed at: " || value) AS gzip, + Ensure(Compress::Zlib(value, $level), Decompress::Zlib(Compress::Zlib(value, $level)) == value, "zlib failed at: " || value) AS zlib, + Ensure(Compress::Brotli(value, $level), Decompress::Brotli(Compress::Brotli(value, $level)) == value, "brotli failed at: " || value) AS brotli, + Ensure(Compress::Lzma(value, $level), Decompress::Lzma(Compress::Lzma(value, $level)) == value, "lzma failed at: " || value) AS lzma, + Ensure(Compress::BZip2(value, $level), Decompress::BZip2(Compress::BZip2(value, $level)) == value, "bzip2 failed at: " || value) AS bzip2, + Ensure(Compress::Zstd(value, $level), Decompress::Zstd(Compress::Zstd(value, $level)) == value, "zstd failed at: " || value) AS zstd, + Ensure(Compress::Snappy(value), Decompress::Snappy(Compress::Snappy(value)) == value, "Snappy failed at: " || value) AS snappy, +FROM Input; diff --git a/yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql b/yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql new file mode 100644 index 00000000000..a3e612ab6d5 --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/cases/TryDecompress.sql @@ -0,0 +1,19 @@ +/* syntax version 1 */ +$bad = "Is not compressed!"; + +SELECT + TryDecompress::Gzip(Compress::Gzip($bad, 3)) = $bad AS ok_Gzip, + TryDecompress::Gzip($bad) AS bad_Gzip, + TryDecompress::Zlib(Compress::Zlib($bad, 3)) = $bad AS ok_Zlib, + TryDecompress::Zlib($bad) AS bad_Zlib, + TryDecompress::Brotli(Compress::Brotli($bad, 3)) = $bad AS ok_Brotli, + TryDecompress::Brotli($bad) AS bad_Brotli, + TryDecompress::Lzma(Compress::Lzma($bad, 3)) = $bad AS ok_Lzma, + TryDecompress::Lzma($bad) AS bad_Lzma, + TryDecompress::BZip2(Compress::BZip2($bad, 3)) = $bad AS ok_BZip2, + TryDecompress::BZip2($bad) AS bad_BZip2, + TryDecompress::Snappy(Compress::Snappy($bad)) = $bad AS ok_Snappy, + TryDecompress::Snappy($bad) AS bad_Snappy, + TryDecompress::Zstd(Compress::Zstd($bad, 3)) = $bad AS ok_Zstd, + TryDecompress::Zstd($bad) AS bad_Zstd; + diff --git a/yql/essentials/udfs/common/compress_base/test/cases/default.in b/yql/essentials/udfs/common/compress_base/test/cases/default.in new file mode 100644 index 00000000000..8fee3ddb782 --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/cases/default.in @@ -0,0 +1,3 @@ +{"key"="1";"subkey"="2";"value"=""}; +{"key"="2";"subkey"="2";"value"="1"}; +{"key"="3";"subkey"="3";"value"="1234567890"}; diff --git a/yql/essentials/udfs/common/compress_base/test/ya.make b/yql/essentials/udfs/common/compress_base/test/ya.make new file mode 100644 index 00000000000..7bc954ca08c --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/test/ya.make @@ -0,0 +1,11 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/compress_base) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +SIZE(MEDIUM) + +END() diff --git a/yql/essentials/udfs/common/compress_base/ya.make b/yql/essentials/udfs/common/compress_base/ya.make new file mode 100644 index 00000000000..4859a4e53cd --- /dev/null +++ b/yql/essentials/udfs/common/compress_base/ya.make @@ -0,0 +1,22 @@ +YQL_UDF_CONTRIB(compress_udf) + +YQL_ABI_VERSION( + 2 + 23 + 0 +) + +SRCS( + compress_udf.cpp +) + +PEERDIR( + yql/essentials/public/udf + yql/essentials/udfs/common/compress_base/lib +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp new file mode 100644 index 00000000000..139890c9bd3 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -0,0 +1,2396 @@ +#include <yql/essentials/minikql/mkql_type_ops.h> +#include <yql/essentials/public/udf/tz/udf_tz.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/minikql/datetime/datetime.h> +#include <yql/essentials/minikql/datetime/datetime64.h> + +#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h> + +#include <util/datetime/base.h> + +using namespace NKikimr; +using namespace NUdf; +using namespace NYql::DateTime; + +extern const char SplitName[] = "Split"; +extern const char ToSecondsName[] = "ToSeconds"; +extern const char ToMillisecondsName[] = "ToMilliseconds"; +extern const char ToMicrosecondsName[] = "ToMicroseconds"; +extern const char GetHourName[] = "GetHour"; +extern const char GetMinuteName[] = "GetMinute"; +extern const char GetSecondName[] = "GetSecond"; +extern const char GetMillisecondOfSecondName[] = "GetMillisecondOfSecond"; +extern const char GetMicrosecondOfSecondName[] = "GetMicrosecondOfSecond"; + +extern const char TMResourceName[] = "DateTime2.TM"; +extern const char TM64ResourceName[] = "DateTime2.TM64"; + +const auto UsecondsInDay = 86400000000ll; +const auto UsecondsInHour = 3600000000ll; +const auto UsecondsInMinute = 60000000ll; +const auto UsecondsInSecond = 1000000ll; +const auto UsecondsInMilliseconds = 1000ll; + +template <const char* TFuncName, typename TResult, ui32 ScaleAfterSeconds> +class TToUnits { +public: + typedef bool TTypeAwareMarker; + using TSignedResult = typename std::make_signed<TResult>::type; + + static TResult DateCore(ui16 value) { + return value * ui32(86400) * TResult(ScaleAfterSeconds); + } + + template<typename TTzDate> + static TResult TzBlockCore(TBlockItem tzDate); + + template<> + static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) { + return DateCore(tzDate.Get<ui16>()); + } + + template<> + static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) { + return DatetimeCore(tzDate.Get<ui32>()); + } + + template<> + static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) { + return TimestampCore(tzDate.Get<ui64>()); + } + + static TResult DatetimeCore(ui32 value) { + return value * TResult(ScaleAfterSeconds); + } + + static TResult TimestampCore(ui64 value) { + return TResult(value / (1000000u / ScaleAfterSeconds)); + } + + static TSignedResult IntervalCore(i64 value) { + return TSignedResult(value / (1000000u / ScaleAfterSeconds)); + } + + static const TStringRef& Name() { + static auto name = TStringRef(TFuncName, std::strlen(TFuncName)); + return name; + } + + template<typename TTzDate, typename TOutput> + static auto MakeTzBlockExec() { + using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>; + return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } + + try { + auto typeInfoHelper = builder.TypeInfoHelper(); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple); + Y_ENSURE(tuple.GetElementsCount() > 0); + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple); + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Expected one argument"); + return true; + } + + + auto argType = argsTuple.GetElementType(0); + TVector<const TType*> argBlockTypes; + argBlockTypes.push_back(argType); + + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + Y_ENSURE(!block.IsScalar()); + argType = block.GetItemType(); + } + + bool isOptional = false; + if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) { + argType = opt.GetItemType(); + isOptional = true; + } + + + TDataTypeInspector data(*typeInfoHelper, argType); + if (!data) { + builder.SetError("Expected data type"); + return true; + } + + auto typeId = data.GetTypeId(); + if (!(typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id || + typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id || + typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id || + typeId == TDataType<TInterval>::Id)) { + builder.SetError(TStringBuilder() << "Type " << GetDataTypeInfo(GetDataSlot(typeId)).Name << " is not supported"); + } + + builder.Args()->Add(argsTuple.GetElementType(0)).Done(); + const TType* retType; + if (typeId != TDataType<TInterval>::Id) { + retType = builder.SimpleType<TResult>(); + } else { + retType = builder.SimpleType<TSignedResult>(); + } + + if (isOptional) { + retType = builder.Optional()->Item(retType).Build(); + } + + auto outputType = retType; + if (block) { + retType = builder.Block(block.IsScalar())->Item(retType).Build(); + } + + builder.Returns(retType); + builder.SupportsBlocks(); + builder.IsStrict(); + + builder.UserType(userType); + if (!typesOnly) { + if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) { + if (block) { + const auto exec = (typeId == TDataType<TTzDate>::Id) + ? MakeTzBlockExec<TTzDate, TResult>() + : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>; + + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>()); + } + } + + if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) { + if (block) { + const auto exec = (typeId == TDataType<TTzDatetime>::Id) + ? MakeTzBlockExec<TTzDatetime, TResult>() + : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>; + + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>()); + } + } + + if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) { + if (block) { + const auto exec = (typeId == TDataType<TTzTimestamp>::Id) + ? MakeTzBlockExec<TTzTimestamp, TResult>() + : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>; + + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>()); + } + } + + if (typeId == TDataType<TInterval>::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>()); + } + } + } + } catch (const std::exception& e) { + builder.SetError(TStringBuf(e.what())); + } + + return true; + } +}; + +template <const char* TFuncName, typename TFieldStorage, TFieldStorage (*FieldFunc)(const TUnboxedValuePod&), ui32 Divisor, ui32 Scale, ui32 Limit, bool Fractional> +struct TGetTimeComponent { + typedef bool TTypeAwareMarker; + + template <typename TInput, bool AlwaysZero, bool InputFractional> + static TFieldStorage Core(TInput val) { + if constexpr (AlwaysZero) { + return 0; + } + + if constexpr (InputFractional) { + if constexpr (Fractional) { + return (val / Scale) % Limit; + } else { + return (val / 1000000u / Scale) % Limit; + } + } else { + if constexpr (Fractional) { + return 0; + } else { + return (val / Scale) % Limit; + } + } + } + + class TImpl : public TBoxedValue { + public: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + if (!args[0]) { + return {}; + } + + return TUnboxedValuePod(TFieldStorage((FieldFunc(args[0])) / Divisor)); + } + }; + + static const TStringRef& Name() { + static auto name = TStringRef(TFuncName, std::strlen(TFuncName)); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } + + try { + auto typeInfoHelper = builder.TypeInfoHelper(); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + if (tuple) { + Y_ENSURE(tuple.GetElementsCount() > 0); + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple); + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Expected one argument"); + return true; + } + + + auto argType = argsTuple.GetElementType(0); + TVector<const TType*> argBlockTypes; + argBlockTypes.push_back(argType); + + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + Y_ENSURE(!block.IsScalar()); + argType = block.GetItemType(); + } + + bool isOptional = false; + if (auto opt = TOptionalTypeInspector(*typeInfoHelper, argType)) { + argType = opt.GetItemType(); + isOptional = true; + } + + TResourceTypeInspector res(*typeInfoHelper, argType); + if (!res) { + TDataTypeInspector data(*typeInfoHelper, argType); + if (!data) { + builder.SetError("Expected data type"); + return true; + } + + auto typeId = data.GetTypeId(); + if (typeId == TDataType<TDate>::Id || + typeId == TDataType<TDatetime>::Id || + typeId == TDataType<TTimestamp>::Id) { + + builder.Args()->Add(argsTuple.GetElementType(0)).Done(); + const TType* retType = builder.SimpleType<TFieldStorage>(); + + if (isOptional) { + retType = builder.Optional()->Item(retType).Build(); + } + + auto outputType = retType; + if (block) { + retType = builder.Block(block.IsScalar())->Item(retType).Build(); + } + + builder.Returns(retType); + builder.SupportsBlocks(); + builder.IsStrict(); + + builder.UserType(userType); + if (!typesOnly) { + if (typeId == TDataType<TDate>::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>()); + } + } + + if (typeId == TDataType<TDatetime>::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>()); + } + } + + if (typeId == TDataType<TTimestamp>::Id) { + if (block) { + builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), + UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + } else { + builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>()); + } + } + } + + return true; + } + } else { + Y_ENSURE(!block); + if (res.GetTag() != TStringRef::Of(TMResourceName)) { + builder.SetError("Unexpected resource tag"); + return true; + } + } + } + + // default implementation + builder.Args()->Add<TResource<TMResourceName>>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Done(); + builder.Returns<TFieldStorage>(); + builder.IsStrict(); + if (!typesOnly) { + builder.Implementation(new TImpl()); + } + } catch (const std::exception& e) { + builder.SetError(TStringBuf(e.what())); + } + + return true; + } +}; + +namespace { + +const TTMStorage& Reference(const NUdf::TUnboxedValuePod& value) { + return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr()); +} + +TTMStorage& Reference(NUdf::TUnboxedValuePod& value) { + return *reinterpret_cast<TTMStorage*>(value.GetRawPtr()); +} + +const TTMStorage& Reference(const TBlockItem& value) { + return *reinterpret_cast<const TTMStorage*>(value.GetRawPtr()); +} + +Y_DECLARE_UNUSED TTMStorage& Reference(TBlockItem& value) { + return *reinterpret_cast<TTMStorage*>(value.GetRawPtr()); +} + +const TTM64Storage& Reference64(const NUdf::TUnboxedValuePod& value) { + return *reinterpret_cast<const TTM64Storage*>(value.GetRawPtr()); +} + +TTM64Storage& Reference64(NUdf::TUnboxedValuePod& value) { + return *reinterpret_cast<TTM64Storage*>(value.GetRawPtr()); +} + +template<typename TValue> +TValue DoAddMonths(const TValue& date, i64 months, const NUdf::IDateBuilder& builder) { + auto result = date; + auto& storage = Reference(result); + if (!NYql::DateTime::DoAddMonths(storage, months, builder)) { + return TValue{}; + } + return result; +} + +template<typename TValue> +TValue DoAddQuarters(const TValue& date, i64 quarters, const NUdf::IDateBuilder& builder) { + return DoAddMonths(date, quarters * 3ll, builder); +} + +template<typename TValue> +TValue DoAddYears(const TValue& date, i64 years, const NUdf::IDateBuilder& builder) { + auto result = date; + auto& storage = Reference(result); + if (!NYql::DateTime::DoAddYears(storage, years, builder)) { + return TValue{}; + } + return result; +} + +#define ACCESSORS(field, type) \ + template<typename TValue> \ + inline type Get##field(const TValue& tm) { \ + return (type)Reference(tm).field; \ + } \ + template<typename TValue> \ + Y_DECLARE_UNUSED inline void Set##field(TValue& tm, type value) { \ + Reference(tm).field = value; \ + } + + ACCESSORS(Year, ui16) + ACCESSORS(DayOfYear, ui16) + ACCESSORS(WeekOfYear, ui8) + ACCESSORS(WeekOfYearIso8601, ui8) + ACCESSORS(DayOfWeek, ui8) + ACCESSORS(Month, ui8) + ACCESSORS(Day, ui8) + ACCESSORS(Hour, ui8) + ACCESSORS(Minute, ui8) + ACCESSORS(Second, ui8) + ACCESSORS(Microsecond, ui32) + ACCESSORS(TimezoneId, ui16) + +#undef ACCESSORS + + inline bool ValidateYear(ui16 year) { + return year >= NUdf::MIN_YEAR - 1 || year <= NUdf::MAX_YEAR + 1; + } + + inline bool ValidateMonth(ui8 month) { + return month >= 1 && month <= 12; + } + + inline bool ValidateDay(ui8 day) { + return day >= 1 && day <= 31; + } + + inline bool ValidateHour(ui8 hour) { + return hour < 24; + } + + inline bool ValidateMinute(ui8 minute) { + return minute < 60; + } + + inline bool ValidateSecond(ui8 second) { + return second < 60; + } + + inline bool ValidateMicrosecond(ui32 microsecond) { + return microsecond < 1000000; + } + + inline bool ValidateTimezoneId(ui16 timezoneId) { + const auto& zones = NUdf::GetTimezones(); + return timezoneId < zones.size() && !zones[timezoneId].empty(); + } + + inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) { + static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { + int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); + if (cmp == 0) + return a.size() < b.size(); + return cmp < 0; + }; + static const std::map<std::string_view, ui8, decltype(cmp)> mp = { + {"jan", 1}, + {"feb", 2}, + {"mar", 3}, + {"apr", 4}, + {"may", 5}, + {"jun", 6}, + {"jul", 7}, + {"aug", 8}, + {"sep", 9}, + {"oct", 10}, + {"nov", 11}, + {"dec", 12} + }; + const auto& it = mp.find(monthName); + if (it != mp.end()) { + month = it -> second; + return true; + } + return false; + } + + inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) { + static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { + int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); + if (cmp == 0) + return a.size() < b.size(); + return cmp < 0; + }; + static const std::map<std::string_view, ui8, decltype(cmp)> mp = { + {"january", 1}, + {"february", 2}, + {"march", 3}, + {"april", 4}, + {"may", 5}, + {"june", 6}, + {"july", 7}, + {"august", 8}, + {"september", 9}, + {"october", 10}, + {"november", 11}, + {"december", 12} + }; + const auto& it = mp.find(monthName); + if (it != mp.end()) { + month = it -> second; + return true; + } + return false; + } + + inline bool ValidateDatetime(ui32 datetime) { + return datetime < MAX_DATETIME; + } + + inline bool ValidateTimestamp(ui64 timestamp) { + return timestamp < MAX_TIMESTAMP; + } + + inline bool ValidateInterval(i64 interval) { + return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP); + } + + // Split + + template<typename TUserDataType, bool Nullable> + using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result, + TTzDateBlockReader<TUserDataType, Nullable>, + TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>; + + template<typename TUserDataType> + struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> { + static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder); + + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) { + try { + TBlockItem res {0}; + Split(arg, Reference(res), *valueBuilder); + sink(res); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << e.what()).data()); + } + } + }; + + template <typename TUserDataType> + class TSplit : public TBoxedValue { + const TSourcePosition Pos_; + + public: + explicit TSplit(TSourcePosition pos) + : Pos_(pos) + {} + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + + static bool DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + const auto typeInfoHelper = builder.TypeInfoHelper(); + + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple); + Y_ENSURE(tuple.GetElementsCount() > 0); + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple); + + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Expected one argument"); + return true; + } + auto argType = argsTuple.GetElementType(0); + + builder.UserType(userType); + builder.SupportsBlocks(); + builder.IsStrict(); + + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build(); + builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); + const auto* retType = builder.Resource(TMResourceName); + const auto* blockRetType = builder.Block(false)->Item(retType).Build(); + builder.Returns(blockRetType); + + if (!typesOnly) { + builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(), + TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE)); + } + } else { + builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap); + if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::BigDateType) { + builder.Returns(builder.Resource(TM64ResourceName)); + } else { + builder.Returns(builder.Resource(TMResourceName)); + } + + if (!typesOnly) { + builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition())); + } + } + + return true; + } + }; + + template <> + void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { + storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>()); + } + + template <> + void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { + storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>()); + } + + template <> + void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { + storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>()); + } + + template <> + void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { + storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId()); + } + + template <> + void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { + storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId()); + } + + template <> + void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { + storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId()); + } + + template <> + void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; + } + + template <> + void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; + } + + template <> + void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; + } + + template <> + TUnboxedValue TSplit<TDate>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.FromDate(builder, args[0].Get<ui16>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TDate32>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + TUnboxedValuePod result(0); + auto& storage = Reference64(result); + storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TDatetime>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.FromDatetime(builder, args[0].Get<ui32>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TDatetime64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + TUnboxedValuePod result(0); + auto& storage = Reference64(result); + storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TTimestamp>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.FromTimestamp(builder, args[0].Get<ui64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TTimestamp64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + TUnboxedValuePod result(0); + auto& storage = Reference64(result); + storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TTzDate>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TTzDatetime>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + TUnboxedValue TSplit<TTzTimestamp>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + // Make* + + template<typename TUserDataType, bool Nullable> + using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result, + TTzDateArrayBuilder<TUserDataType, Nullable>, + TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>; + + template<typename TUserDataType> + struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> { + static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder); + + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + auto& storage = Reference(item); + sink(TBlockItem(Make(storage, *valueBuilder))); + } + }; + + template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false)); + return res; + } + + template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); + return res; + } + + template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); + return res; + } + + template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true)); + res.SetTimezoneId(storage.TimezoneId); + return res; + } + + template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); + res.SetTimezoneId(storage.TimezoneId); + return res; + } + + template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); + res.SetTimezoneId(storage.TimezoneId); + return res; + } + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(args[0]); + return TUnboxedValuePod(storage.ToDate(builder, false)); + } + END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(args[0]); + return TUnboxedValuePod(storage.ToDatetime(builder)); + } + END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(args[0]); + return TUnboxedValuePod(storage.ToTimestamp(builder)); + } + END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(args[0]); + try { + TUnboxedValuePod result(storage.ToDate(builder, true)); + result.SetTimezoneId(storage.TimezoneId); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " + << storage.ToString() + << " cannot be casted to TzDate" + ).data()); + } + } + END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(args[0]); + TUnboxedValuePod result(storage.ToDatetime(builder)); + result.SetTimezoneId(storage.TimezoneId); + return result; + } + END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(args[0]); + TUnboxedValuePod result(storage.ToTimestamp(builder)); + result.SetTimezoneId(storage.TimezoneId); + return result; + } + END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do); + + + SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + TUnboxedValuePod result(0); + auto& arg = Reference(args[0]); + auto& storage = Reference64(result); + storage.From(arg); + return result; + } + + SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference64(args[0]); + return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder())); + } + + SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference64(args[0]); + return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder())); + } + + SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference64(args[0]); + return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); + } + + // Get* + +#define GET_METHOD(field, type) \ + SIMPLE_STRICT_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \ + Y_UNUSED(valueBuilder); \ + return TUnboxedValuePod(Get##field(args[0])); \ + } + +// #define GET_METHOD(field, type) \ +// struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \ +// template<typename TSink> \ +// static void Process(TBlockItem item, const IValueBuilder& valueBuilder, const TSink& sink) { \ +// Y_UNUSED(valueBuilder); \ +// sink(TBlockItem(Get##field(item))); \ +// } \ +// }; \ +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGet##field, type(TAutoMap<TResource<TMResourceName>>)) { \ +// Y_UNUSED(valueBuilder); \ +// return TUnboxedValuePod(Get##field(args[0])); \ +// } \ +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + + GET_METHOD(Year, ui16) + GET_METHOD(DayOfYear, ui16) + GET_METHOD(Month, ui8) + + // template<typename TValue> + // TValue GetMonthNameValue(size_t idx) { + // static const std::array<TValue, 12U> monthNames = {{ + // TValue::Embedded(TStringRef::Of("January")), + // TValue::Embedded(TStringRef::Of("February")), + // TValue::Embedded(TStringRef::Of("March")), + // TValue::Embedded(TStringRef::Of("April")), + // TValue::Embedded(TStringRef::Of("May")), + // TValue::Embedded(TStringRef::Of("June")), + // TValue::Embedded(TStringRef::Of("July")), + // TValue::Embedded(TStringRef::Of("August")), + // TValue::Embedded(TStringRef::Of("September")), + // TValue::Embedded(TStringRef::Of("October")), + // TValue::Embedded(TStringRef::Of("November")), + // TValue::Embedded(TStringRef::Of("December")) + // }}; + // return monthNames.at(idx); + // } + + // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { + // template<typename TSink> + // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + // Y_UNUSED(valueBuilder); + // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U)); + // } + // }; + + // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) { + // Y_UNUSED(valueBuilder); + // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U); + // } + // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + + SIMPLE_STRICT_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + static const std::array<TUnboxedValue, 12U> monthNames = {{ + TUnboxedValuePod::Embedded(TStringRef::Of("January")), + TUnboxedValuePod::Embedded(TStringRef::Of("February")), + TUnboxedValuePod::Embedded(TStringRef::Of("March")), + TUnboxedValuePod::Embedded(TStringRef::Of("April")), + TUnboxedValuePod::Embedded(TStringRef::Of("May")), + TUnboxedValuePod::Embedded(TStringRef::Of("June")), + TUnboxedValuePod::Embedded(TStringRef::Of("July")), + TUnboxedValuePod::Embedded(TStringRef::Of("August")), + TUnboxedValuePod::Embedded(TStringRef::Of("September")), + TUnboxedValuePod::Embedded(TStringRef::Of("October")), + TUnboxedValuePod::Embedded(TStringRef::Of("November")), + TUnboxedValuePod::Embedded(TStringRef::Of("December")) + }}; + return monthNames.at(GetMonth(*args) - 1U); + } + + GET_METHOD(WeekOfYear, ui8) + GET_METHOD(WeekOfYearIso8601, ui8) + + // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> { + // template<typename TSink> + // static void Process(TBlockItem item, const TSink& sink) { + // sink(GetDay(item)); + // } + // }; + + // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) { + // Y_UNUSED(valueBuilder); + // return TUnboxedValuePod(GetDay(args[0])); + // } + // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + + SIMPLE_STRICT_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(GetDay(args[0])); + } + + GET_METHOD(DayOfWeek, ui8) + + template<typename TValue> + TValue GetDayNameValue(size_t idx) { + static const std::array<TValue, 7U> dayNames = {{ + TValue::Embedded(TStringRef::Of("Monday")), + TValue::Embedded(TStringRef::Of("Tuesday")), + TValue::Embedded(TStringRef::Of("Wednesday")), + TValue::Embedded(TStringRef::Of("Thursday")), + TValue::Embedded(TStringRef::Of("Friday")), + TValue::Embedded(TStringRef::Of("Saturday")), + TValue::Embedded(TStringRef::Of("Sunday")) + }}; + return dayNames.at(idx); + } + + SIMPLE_STRICT_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U); + } + + // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { + // template<typename TSink> + // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + // Y_UNUSED(valueBuilder); + // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U)); + // } + // }; + + // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) { + // Y_UNUSED(valueBuilder); + // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U); + // } + // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + + GET_METHOD(TimezoneId, ui16) + + struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> { + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + Y_UNUSED(valueBuilder); + auto timezoneId = GetTimezoneId(item); + if (timezoneId >= NUdf::GetTimezones().size()) { + sink(TBlockItem{}); + } else { + sink(TBlockItem{NUdf::GetTimezones()[timezoneId]}); + } + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetTimezoneName, char*(TAutoMap<TResource<TMResourceName>>)) { + auto timezoneId = GetTimezoneId(args[0]); + if (timezoneId >= NUdf::GetTimezones().size()) { + return TUnboxedValuePod(); + } + return valueBuilder->NewString(NUdf::GetTimezones()[timezoneId]); + } + END_SIMPLE_ARROW_UDF(TGetTimezoneName, TTGetTimezoneNameKernelExec::Do); + + // Update + + class TUpdate : public TBoxedValue { + const TSourcePosition Pos_; + public: + explicit TUpdate(TSourcePosition pos) + : Pos_(pos) + {} + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + auto result = args[0]; + + if (args[1]) { + auto year = args[1].Get<ui16>(); + if (!ValidateYear(year)) { + return TUnboxedValuePod(); + } + SetYear(result, year); + } + if (args[2]) { + auto month = args[2].Get<ui8>(); + if (!ValidateMonth(month)) { + return TUnboxedValuePod(); + } + SetMonth(result, month); + } + if (args[3]) { + auto day = args[3].Get<ui8>(); + if (!ValidateDay(day)) { + return TUnboxedValuePod(); + } + SetDay(result, day); + } + if (args[4]) { + auto hour = args[4].Get<ui8>(); + if (!ValidateHour(hour)) { + return TUnboxedValuePod(); + } + SetHour(result, hour); + } + if (args[5]) { + auto minute = args[5].Get<ui8>(); + if (!ValidateMinute(minute)) { + return TUnboxedValuePod(); + } + SetMinute(result, minute); + } + if (args[6]) { + auto second = args[6].Get<ui8>(); + if (!ValidateSecond(second)) { + return TUnboxedValuePod(); + } + SetSecond(result, second); + } + if (args[7]) { + auto microsecond = args[7].Get<ui32>(); + if (!ValidateMicrosecond(microsecond)) { + return TUnboxedValuePod(); + } + SetMicrosecond(result, microsecond); + } + if (args[8]) { + auto timezoneId = args[8].Get<ui16>(); + if (!ValidateTimezoneId(timezoneId)) { + return TUnboxedValuePod(); + } + SetTimezoneId(result, timezoneId); + } + + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference(result); + if (!storage.Validate(builder)) { + return TUnboxedValuePod(); + } + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Update"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } + + auto resourceType = builder.Resource(TMResourceName); + auto optionalResourceType = builder.Optional()->Item(resourceType).Build(); + + builder.OptionalArgs(8).Args()->Add(resourceType).Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Add(builder.Optional()->Item<ui16>().Build()).Name("Year") + .Add(builder.Optional()->Item<ui8>().Build()).Name("Month") + .Add(builder.Optional()->Item<ui8>().Build()).Name("Day") + .Add(builder.Optional()->Item<ui8>().Build()).Name("Hour") + .Add(builder.Optional()->Item<ui8>().Build()).Name("Minute") + .Add(builder.Optional()->Item<ui8>().Build()).Name("Second") + .Add(builder.Optional()->Item<ui32>().Build()).Name("Microsecond") + .Add(builder.Optional()->Item<ui16>().Build()).Name("TimezoneId"); + + builder.Returns(optionalResourceType); + + if (!typesOnly) { + builder.Implementation(new TUpdate(builder.GetSourcePosition())); + } + + builder.IsStrict(); + return true; + } + }; + + // From* + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromSeconds, TOptional<TTimestamp>(TAutoMap<ui32>)) { + Y_UNUSED(valueBuilder); + auto res = args[0].Get<ui32>(); + if (!ValidateDatetime(res)) { + return TUnboxedValuePod(); + } + return TUnboxedValuePod((ui64)(res * 1000000ull)); + } + + using TFromSecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui32, ui64, + [] (ui32 seconds) { return std::make_pair(ui64(seconds * 1000000ull), ValidateDatetime(seconds)); }>; + END_SIMPLE_ARROW_UDF(TFromSeconds, TFromSecondsKernel::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromMilliseconds, TOptional<TTimestamp>(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + auto res = args[0].Get<ui64>(); + if (res >= MAX_TIMESTAMP / 1000u) { + return TUnboxedValuePod(); + } + return TUnboxedValuePod(res * 1000u); + } + + using TFromMillisecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui64, ui64, + [] (ui64 milliseconds) { return std::make_pair(ui64(milliseconds * 1000u), milliseconds < MAX_TIMESTAMP / 1000u); }>; + END_SIMPLE_ARROW_UDF(TFromMilliseconds, TFromMillisecondsKernel::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TFromMicroseconds, TOptional<TTimestamp>(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + auto res = args[0].Get<ui64>(); + if (!ValidateTimestamp(res)) { + return TUnboxedValuePod(); + } + return TUnboxedValuePod(res); + } + + using TFromMicrosecondsKernel = TUnaryUnsafeFixedSizeFilterKernel<ui64, ui64, + [] (ui64 timestamp) { return std::make_pair(timestamp, ValidateTimestamp(timestamp)); }>; + END_SIMPLE_ARROW_UDF(TFromMicroseconds, TFromMicrosecondsKernel::Do); + + template <typename TInput, i64 Multiplier> + using TIntervalFromKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, i64, + [] (TInput interval) { return std::make_pair(i64(interval * Multiplier), ValidateInterval(interval)); }>; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromDays, TOptional<TInterval>(TAutoMap<i32>)) { + Y_UNUSED(valueBuilder); + const i64 res = i64(args[0].Get<i32>()) * UsecondsInDay; + return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod(); + } + END_SIMPLE_ARROW_UDF(TIntervalFromDays, (TIntervalFromKernel<i32, UsecondsInDay>::Do)); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromHours, TOptional<TInterval>(TAutoMap<i32>)) { + Y_UNUSED(valueBuilder); + const i64 res = i64(args[0].Get<i32>()) * UsecondsInHour; + return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod(); + } + END_SIMPLE_ARROW_UDF(TIntervalFromHours, (TIntervalFromKernel<i32, UsecondsInHour>::Do)); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMinutes, TOptional<TInterval>(TAutoMap<i32>)) { + Y_UNUSED(valueBuilder); + const i64 res = i64(args[0].Get<i32>()) * UsecondsInMinute; + return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod(); + } + END_SIMPLE_ARROW_UDF(TIntervalFromMinutes, (TIntervalFromKernel<i32, UsecondsInMinute>::Do)); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromSeconds, TOptional<TInterval>(TAutoMap<i32>)) { + Y_UNUSED(valueBuilder); + const i64 res = i64(args[0].Get<i32>()) * UsecondsInSecond; + return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod(); + } + END_SIMPLE_ARROW_UDF(TIntervalFromSeconds, (TIntervalFromKernel<i32, UsecondsInSecond>::Do)); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMilliseconds, TOptional<TInterval>(TAutoMap<i64>)) { + Y_UNUSED(valueBuilder); + const i64 res = i64(args[0].Get<i64>()) * UsecondsInMilliseconds; + return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod(); + } + END_SIMPLE_ARROW_UDF(TIntervalFromMilliseconds, (TIntervalFromKernel<i64, UsecondsInMilliseconds>::Do)); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TIntervalFromMicroseconds, TOptional<TInterval>(TAutoMap<i64>)) { + Y_UNUSED(valueBuilder); + const i64 res = args[0].Get<i64>(); + return ValidateInterval(res) ? TUnboxedValuePod(res) : TUnboxedValuePod(); + } + END_SIMPLE_ARROW_UDF(TIntervalFromMicroseconds, (TIntervalFromKernel<i64, 1>::Do)); + + // To* + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TToDays, i32(TAutoMap<TInterval>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInDay)); + } + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToDays, + (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInDay); }>), + arrow::compute::NullHandling::INTERSECTION); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TToHours, i32(TAutoMap<TInterval>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInHour)); + } + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToHours, + (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInHour); }>), + arrow::compute::NullHandling::INTERSECTION); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TToMinutes, i32(TAutoMap<TInterval>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(i32(args[0].Get<i64>() / UsecondsInMinute)); + } + END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TToMinutes, + (UnaryPreallocatedExecImpl<i64, i32, [] (i64 arg) { return i32(arg / UsecondsInMinute); }>), + arrow::compute::NullHandling::INTERSECTION); + + // StartOf* + + template<auto Core> + struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> { + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + if (auto res = Core(Reference(item), *valueBuilder)) { + Reference(item) = res.GetRef(); + sink(item); + } else { + sink(TBlockItem{}); + } + + } + }; + + TMaybe<TTMStorage> StartOfYear(TTMStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = 1; + storage.Day = 1; + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; + } + return storage; + } + BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfYear, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) { + auto result = args[0]; + auto& storage = Reference(result); + if (auto res = StartOfYear(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TStartOfYear, TStartOfKernelExec<StartOfYear>::Do); + + TMaybe<TTMStorage> StartOfQuarter(TTMStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = (storage.Month - 1) / 3 * 3 + 1; + storage.Day = 1; + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; + } + return storage; + } + BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfQuarter, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) { + auto result = args[0]; + auto& storage = Reference(result); + if (auto res = StartOfQuarter(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TStartOfQuarter, TStartOfKernelExec<StartOfQuarter>::Do); + + TMaybe<TTMStorage> StartOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) { + storage.Day = 1; + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; + } + return storage; + } + BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) { + auto result = args[0]; + auto& storage = Reference(result); + if (auto res = StartOfMonth(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TStartOfMonth, TStartOfKernelExec<StartOfMonth>::Do); + + TMaybe<TTMStorage> EndOfMonth(TTMStorage storage, const IValueBuilder& valueBuilder) { + storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; + } + return storage; + } + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TEndOfMonth, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) { + auto result = args[0]; + auto& storage = Reference(result); + if (auto res = EndOfMonth(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TEndOfMonth, TStartOfKernelExec<EndOfMonth>::Do); + + TMaybe<TTMStorage> StartOfWeek(TTMStorage storage, const IValueBuilder& valueBuilder) { + const ui32 shift = 86400u * (storage.DayOfWeek - 1u); + if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) { + return {}; + } + storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + return storage; + } + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfWeek, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) { + auto result = args[0]; + auto& storage = Reference(result); + if (auto res = StartOfWeek(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TStartOfWeek, TStartOfKernelExec<StartOfWeek>::Do); + + TMaybe<TTMStorage> StartOfDay(TTMStorage storage, const IValueBuilder& valueBuilder) { + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; + } + return storage; + } + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOfDay, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>)) { + auto result = args[0]; + auto& storage = Reference(result); + if (auto res = StartOfDay(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TStartOfDay, TStartOfKernelExec<StartOfDay>::Do); + + TMaybe<TTMStorage> StartOf(TTMStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { + if (interval >= 86400000000ull) { + // treat as StartOfDay + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; + } else { + auto current = storage.ToTimeOfDay(); + auto rounded = current / interval * interval; + storage.FromTimeOfDay(rounded); + } + + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; + } + return storage; + } + + struct TStartOfBinaryKernelExec : TBinaryKernelExec<TStartOfBinaryKernelExec> { + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + auto& storage = Reference(arg1); + ui64 interval = std::abs(arg2.Get<i64>()); + if (interval == 0) { + sink(arg1); + return; + } + + if (auto res = StartOf(storage, interval, *valueBuilder)) { + storage = res.GetRef(); + sink(arg1); + } else { + sink(TBlockItem{}); + } + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TStartOf, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, TAutoMap<TInterval>)) { + auto result = args[0]; + ui64 interval = std::abs(args[1].Get<i64>()); + if (interval == 0) { + return result; + } + if (auto res = StartOf(Reference(result), interval, *valueBuilder)) { + Reference(result) = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; + } + END_SIMPLE_ARROW_UDF(TStartOf, TStartOfBinaryKernelExec::Do); + + struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> { + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + Y_UNUSED(valueBuilder); + auto& storage = Reference(item); + sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()}); + } + }; + + const auto timeOfDayKernelExecDo = TTimeOfDayKernelExec::Do; + BEGIN_SIMPLE_STRICT_ARROW_UDF(TTimeOfDay, TInterval(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + auto& storage = Reference(args[0]); + return TUnboxedValuePod((i64)storage.ToTimeOfDay()); + } + END_SIMPLE_ARROW_UDF(TTimeOfDay, timeOfDayKernelExecDo); + + + // Add ... + + template<auto Core> + struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> { + template<typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) { + sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder())); + } + }; + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftYears, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) { + return DoAddYears(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder()); + } + END_SIMPLE_ARROW_UDF(TShiftYears, TAddKernelExec<DoAddYears<TBlockItem>>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftQuarters, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) { + return DoAddQuarters(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder()); + } + END_SIMPLE_ARROW_UDF(TShiftQuarters, TAddKernelExec<DoAddQuarters<TBlockItem>>::Do); + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TShiftMonths, TOptional<TResource<TMResourceName>>(TAutoMap<TResource<TMResourceName>>, i32)) { + return DoAddMonths(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder()); + } + END_SIMPLE_ARROW_UDF(TShiftMonths, TAddKernelExec<DoAddMonths<TBlockItem>>::Do); + + template<size_t Digits, bool Exacly = true> + struct PrintNDigits; + + template<bool Exacly> + struct PrintNDigits<0U, Exacly> { + static constexpr ui32 Miltiplier = 1U; + + template <typename T> + static constexpr size_t Do(T, char*) { return 0U; } + }; + + template<size_t Digits, bool Exacly> + struct PrintNDigits { + using TNextPrint = PrintNDigits<Digits - 1U, Exacly>; + static constexpr ui32 Miltiplier = TNextPrint::Miltiplier * 10U; + + template <typename T> + static constexpr size_t Do(T in, char* out) { + in %= Miltiplier; + if (Exacly || in) { + *out = "0123456789"[in / TNextPrint::Miltiplier]; + return 1U + TNextPrint::Do(in, ++out); + } + return 0U; + } + }; + + // Format + + class TFormat : public TBoxedValue { + public: + explicit TFormat(TSourcePosition pos) + : Pos_(pos) + {} + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Format"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } + + auto resourceType = builder.Resource(TMResourceName); + + auto stringType = builder.SimpleType<char*>(); + + auto boolType = builder.SimpleType<bool>(); + auto optionalBoolType = builder.Optional()->Item(boolType).Build(); + + auto args = builder.Args(); + args->Add(stringType); + args->Add(optionalBoolType).Name("AlwaysWriteFractionalSeconds"); + args->Done(); + builder.OptionalArgs(1); + builder.Returns( + builder.Callable(1) + ->Returns(stringType) + .Arg(resourceType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Build() + ); + + if (!typesOnly) { + builder.Implementation(new TFormat(builder.GetSourcePosition())); + } + + builder.IsStrict(); + + return true; + } + + private: + using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>; + + struct TDataPrinter { + const std::string_view Data; + + size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const { + std::memcpy(out, Data.data(), Data.size()); + return Data.size(); + } + }; + + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + bool alwaysWriteFractionalSeconds = false; + if (auto val = args[1]) { + alwaysWriteFractionalSeconds = val.Get<bool>(); + } + + return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + class TImpl : public TBoxedValue { + public: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const auto value = args[0]; + + auto& builder = valueBuilder->GetDateBuilder(); + + auto result = valueBuilder->NewStringNotFilled(ReservedSize_); + auto pos = result.AsStringRef().Data(); + ui32 size = 0U; + + for (const auto& printer : Printers_) { + if (const auto plus = printer(pos, value, builder)) { + size += plus; + pos += plus; + } + } + + if (size < ReservedSize_) { + result = valueBuilder->SubString(result.Release(), 0U, size); + } + + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds) + : Pos_(pos) + , Format_(format) + { + const std::string_view formatView(Format_.AsStringRef()); + auto dataStart = formatView.begin(); + size_t dataSize = 0U; + + for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { + if (*ptr != '%') { + ++dataSize; + continue; + } + + if (dataSize) { + Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)}); + ReservedSize_ += dataSize; + dataSize = 0U; + } + + if (formatView.end() == ++ptr) { + ythrow yexception() << "format string ends with single %%"; + } + + switch (*ptr) { + case '%': { + static constexpr size_t size = 1; + Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) { + *out = '%'; + return size; + }); + ReservedSize_ += size; + break; + } + case 'Y': { + static constexpr size_t size = 4; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + return PrintNDigits<size>::Do(GetYear(value), out); + }); + ReservedSize_ += size; + break; + } + case 'm': { + static constexpr size_t size = 2; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + return PrintNDigits<size>::Do(GetMonth(value), out); + }); + ReservedSize_ += size; + break; + } + case 'd': { + static constexpr size_t size = 2; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + return PrintNDigits<size>::Do(GetDay(value), out); + }); + ReservedSize_ += size; + break; + } + case 'H': { + static constexpr size_t size = 2; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + return PrintNDigits<size>::Do(GetHour(value), out); + }); + ReservedSize_ += size; + break; + } + case 'M': { + static constexpr size_t size = 2; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + return PrintNDigits<size>::Do(GetMinute(value), out); + }); + ReservedSize_ += size; + break; + } + case 'S': + Printers_.emplace_back([alwaysWriteFractionalSeconds](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + constexpr size_t size = 2; + if (const auto microsecond = GetMicrosecond(value); microsecond || alwaysWriteFractionalSeconds) { + out += PrintNDigits<size>::Do(GetSecond(value), out); + *out++ = '.'; + constexpr size_t msize = 6; + auto addSz = alwaysWriteFractionalSeconds ? + PrintNDigits<msize, true>::Do(microsecond, out) : + PrintNDigits<msize, false>::Do(microsecond, out); + return size + 1U + addSz; + } + return PrintNDigits<size>::Do(GetSecond(value), out); + }); + ReservedSize_ += 9; + break; + + case 'z': { + static constexpr size_t size = 5; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder& builder) { + auto timezoneId = GetTimezoneId(value); + if (TTMStorage::IsUniversal(timezoneId)) { + std::memcpy(out, "+0000", size); + return size; + } + i32 shift; + if (!builder.GetTimezoneShift(GetYear(value), GetMonth(value), GetDay(value), + GetHour(value), GetMinute(value), GetSecond(value), timezoneId, shift)) + { + std::memcpy(out, "+0000", size); + return size; + } + + *out++ = shift > 0 ? '+' : '-'; + shift = std::abs(shift); + out += PrintNDigits<2U>::Do(shift / 60U, out); + out += PrintNDigits<2U>::Do(shift % 60U, out); + return size; + }); + ReservedSize_ += size; + break; + } + case 'Z': + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + const auto timezoneId = GetTimezoneId(value); + const auto tzName = NUdf::GetTimezones()[timezoneId]; + std::memcpy(out, tzName.data(), std::min(tzName.size(), MAX_TIMEZONE_NAME_LEN)); + return tzName.size(); + }); + ReservedSize_ += MAX_TIMEZONE_NAME_LEN; + break; + case 'b': { + static constexpr size_t size = 3; + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + static constexpr std::string_view mp[] { + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec" + }; + auto month = GetMonth(value); + Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value"); + std::memcpy(out, mp[month - 1].data(), size); + return size; + }); + ReservedSize_ += size; + break; + } + case 'B': { + Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { + static constexpr std::string_view mp[] { + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December" + }; + auto month = GetMonth(value); + Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value"); + const std::string_view monthFullName = mp[month - 1]; + std::memcpy(out, monthFullName.data(), monthFullName.size()); + return monthFullName.size(); + }); + ReservedSize_ += 9U; // MAX_MONTH_FULL_NAME_LEN + break; + } + default: + ythrow yexception() << "invalid format character: " << *ptr; + } + + dataStart = ptr + 1U; + } + + if (dataSize) { + Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)}); + ReservedSize_ += dataSize; + } + } + + private: + const TSourcePosition Pos_; + + TUnboxedValue Format_; + TPrintersList Printers_{}; + size_t ReservedSize_ = 0; + }; + + const TSourcePosition Pos_; + }; + + template<size_t Digits> + struct ParseExaclyNDigits; + + template<> + struct ParseExaclyNDigits<0U> { + template <typename T> + static constexpr bool Do(std::string_view::const_iterator&, T&) { + return true; + } + }; + + template<size_t Digits> + struct ParseExaclyNDigits { + template <typename T> + static constexpr bool Do(std::string_view::const_iterator& it, T& out) { + const auto d = *it; + if (!std::isdigit(d)) { + return false; + } + out *= 10U; + out += d - '0'; + return ParseExaclyNDigits<Digits - 1U>::Do(++it, out); + } + }; + + // Parse + + class TParse : public TBoxedValue { + public: + class TFactory : public TBoxedValue { + public: + explicit TFactory(TSourcePosition pos) + : Pos_(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TParse(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + const TSourcePosition Pos_; + }; + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Parse"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } + + auto resourceType = builder.Resource(TMResourceName); + auto optionalResourceType = builder.Optional()->Item(resourceType).Build(); + + builder.Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Add(builder.Optional()->Item<ui16>()) + .Done() + .OptionalArgs(1); + builder.RunConfig<char*>().Returns(optionalResourceType); + + if (!typesOnly) { + builder.Implementation(new TParse::TFactory(builder.GetSourcePosition())); + } + + return true; + } + + private: + const TSourcePosition Pos_; + const TUnboxedValue Format_; + + std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_; + + struct TDataScanner { + const std::string_view Data_; + + bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const { + if (limit < Data_.size() || !std::equal(Data_.begin(), Data_.end(), it)) { + return false; + } + std::advance(it, Data_.size()); + return true; + } + }; + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override + { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + + const std::string_view buffer = args[0].AsStringRef(); + + TUnboxedValuePod result(0); + auto& storage = Reference(result); + storage.MakeDefault(); + + auto& builder = valueBuilder->GetDateBuilder(); + + auto it = buffer.begin(); + for (const auto& scanner : Scanners_) { + if (!scanner(it, std::distance(it, buffer.end()), result, builder)) { + return TUnboxedValuePod(); + } + } + + if (buffer.end() != it || !storage.Validate(builder)) { + return TUnboxedValuePod(); + } + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + , Format_(runConfig) + { + const std::string_view formatView(Format_.AsStringRef()); + auto dataStart = formatView.begin(); + size_t dataSize = 0U; + + for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { + if (*ptr != '%') { + ++dataSize; + continue; + } + + if (dataSize) { + Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); + dataSize = 0; + } + + if (++ptr == formatView.end()) { + ythrow yexception() << "format string ends with single %%"; + } + + switch (*ptr) { + case '%': + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) { + return limit > 0U && *it++ == '%'; + }); + break; + + case 'Y': { + static constexpr size_t size = 4; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + ui32 year = 0U; + if (limit < size || !ParseExaclyNDigits<size>::Do(it, year) || !ValidateYear(year)) { + return false; + } + SetYear(result, year); + return true; + }); + break; + } + case 'm': { + static constexpr size_t size = 2; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + ui32 month = 0U; + if (limit < size || !ParseExaclyNDigits<size>::Do(it, month) || !ValidateMonth(month)) { + return false; + } + SetMonth(result, month); + return true; + }); + break; + } + case 'd': { + static constexpr size_t size = 2; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + ui32 day = 0U; + if (limit < size || !ParseExaclyNDigits<size>::Do(it, day) || !ValidateDay(day)) { + return false; + } + SetDay(result, day); + return true; + }); + break; + } + case 'H': { + static constexpr size_t size = 2; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + ui32 hour = 0U; + if (limit < size || !ParseExaclyNDigits<size>::Do(it, hour) || !ValidateHour(hour)) { + return false; + } + SetHour(result, hour); + return true; + }); + break; + } + case 'M': { + static constexpr size_t size = 2; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + ui32 minute = 0U; + if (limit < size || !ParseExaclyNDigits<size>::Do(it, minute) || !ValidateMinute(minute)) { + return false; + } + SetMinute(result, minute); + return true; + }); + break; + } + case 'S': { + static constexpr size_t size = 2; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + ui32 second = 0U; + if (limit < size || !ParseExaclyNDigits<size>::Do(it, second) || !ValidateSecond(second)) { + return false; + } + SetSecond(result, second); + limit -= size; + + if (!limit || *it != '.') { + return true; + } + + ++it; + --limit; + ui32 usec = 0U; + + size_t digits = 6U; + for (; limit; --limit) { + const auto c = *it; + if (!digits || !std::isdigit(c)) { + break; + } + usec *= 10U; + usec += c - '0'; + ++it; + --digits; + } + for (; !digits && limit && std::isdigit(*it); --limit, ++it); + while (digits--) { + usec *= 10U; + } + SetMicrosecond(result, usec); + return true; + }); + break; + } + case 'Z': + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder& builder) { + const auto start = it; + while (limit > 0 && (std::isalnum(*it) || *it == '/' || *it == '_' || *it == '-' || *it == '+')) { + ++it; + --limit; + } + const auto size = std::distance(start, it); + + ui32 timezoneId; + if (!builder.FindTimezoneId(TStringRef(&*start, size), timezoneId)) { + return false; + } + SetTimezoneId(result, timezoneId); + return true; + }); + break; + case 'b': { + static constexpr size_t size = 3; + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + const auto start = it; + size_t cnt = 0U; + while (limit > 0 && cnt < size && std::isalpha(*it)) { + ++it; + ++cnt; + --limit; + } + const std::string_view monthName{start, cnt}; + ui8 month = 0U; + if (cnt < size || !ValidateMonthShortName(monthName, month)) { + return false; + } + SetMonth(result, month); + return true; + }); + break; + } + case 'B': { + Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod& result, const IDateBuilder&) { + const auto start = it; + size_t cnt = 0U; + while (limit > 0 && std::isalpha(*it)) { + ++it; + ++cnt; + --limit; + } + + const std::string_view monthName{start, cnt}; + ui8 month = 0U; + if (!ValidateMonthFullName(monthName, month)) { + return false; + } + SetMonth(result, month); + return true; + }); + break; + } + default: + ythrow yexception() << "invalid format character: " << *ptr; + } + + dataStart = ptr + 1U; + } + + if (dataSize) { + Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); + } + } + }; + +#define PARSE_SPECIFIC_FORMAT(format) \ + SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \ + auto str = args[0].AsStringRef(); \ + TInstant instant; \ + if (!TInstant::TryParse##format(TStringBuf(str.Data(), str.Size()), instant) || instant.Seconds() >= NUdf::MAX_DATETIME) { \ + return TUnboxedValuePod(); \ + } \ + auto& builder = valueBuilder->GetDateBuilder(); \ + TUnboxedValuePod result(0); \ + auto& storage = Reference(result); \ + storage.FromTimestamp(builder, instant.MicroSeconds()); \ + return result; \ + } + + PARSE_SPECIFIC_FORMAT(Rfc822); + PARSE_SPECIFIC_FORMAT(Iso8601); + PARSE_SPECIFIC_FORMAT(Http); + PARSE_SPECIFIC_FORMAT(X509); + + SIMPLE_MODULE(TDateTime2Module, + TUserDataTypeFuncFactory<true, true, SplitName, TSplit, + TDate, + TDatetime, + TTimestamp, + TTzDate, + TTzDatetime, + TTzTimestamp, + TDate32, + TDatetime64, + TTimestamp64>, + + TMakeDate, + TMakeDatetime, + TMakeTimestamp, + TMakeTzDate, + TMakeTzDatetime, + TMakeTzTimestamp, + + TConvert, + + TMakeDate32, + TMakeDatetime64, + TMakeTimestamp64, + + TGetYear, + TGetDayOfYear, + TGetMonth, + TGetMonthName, + TGetWeekOfYear, + TGetWeekOfYearIso8601, + TGetDayOfMonth, + TGetDayOfWeek, + TGetDayOfWeekName, + TGetTimeComponent<GetHourName, ui8, GetHour, 1u, 3600u, 24u, false>, + TGetTimeComponent<GetMinuteName, ui8, GetMinute, 1u, 60u, 60u, false>, + TGetTimeComponent<GetSecondName, ui8, GetSecond, 1u, 1u, 60u, false>, + TGetTimeComponent<GetMillisecondOfSecondName, ui32, GetMicrosecond, 1000u, 1000u, 1000u, true>, + TGetTimeComponent<GetMicrosecondOfSecondName, ui32, GetMicrosecond, 1u, 1u, 1000000u, true>, + TGetTimezoneId, + TGetTimezoneName, + + TUpdate, + + TFromSeconds, + TFromMilliseconds, + TFromMicroseconds, + + TIntervalFromDays, + TIntervalFromHours, + TIntervalFromMinutes, + TIntervalFromSeconds, + TIntervalFromMilliseconds, + TIntervalFromMicroseconds, + + TToDays, + TToHours, + TToMinutes, + + TStartOfYear, + TStartOfQuarter, + TStartOfMonth, + TStartOfWeek, + TStartOfDay, + TStartOf, + TTimeOfDay, + + TShiftYears, + TShiftQuarters, + TShiftMonths, + + TEndOfMonth, + + TToUnits<ToSecondsName, ui32, 1>, + TToUnits<ToMillisecondsName, ui64, 1000>, + TToUnits<ToMicrosecondsName, ui64, 1000000>, + + TFormat, + TParse, + + TParseRfc822, + TParseIso8601, + TParseHttp, + TParseX509 + ) +} + +REGISTER_MODULES(TDateTime2Module) diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/result.json b/yql/essentials/udfs/common/datetime2/test/canondata/result.json new file mode 100644 index 00000000000..6e475365ea6 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/result.json @@ -0,0 +1,137 @@ +{ + "test.test[BlockFrom]": [ + { + "uri": "file://test.test_BlockFrom_/results.txt" + } + ], + "test.test[BlockGet]": [ + { + "uri": "file://test.test_BlockGet_/results.txt" + } + ], + "test.test[BlockSplitMake]": [ + { + "uri": "file://test.test_BlockSplitMake_/results.txt" + } + ], + "test.test[BlockStartOf]": [ + { + "uri": "file://test.test_BlockStartOf_/results.txt" + } + ], + "test.test[BlockTmGet]": [ + { + "uri": "file://test.test_BlockTmGet_/results.txt" + } + ], + "test.test[BlockTo]": [ + { + "uri": "file://test.test_BlockTo_/results.txt" + } + ], + "test.test[EndOf]": [ + { + "uri": "file://test.test_EndOf_/results.txt" + } + ], + "test.test[FormatMicroseconds]": [ + { + "uri": "file://test.test_FormatMicroseconds_/results.txt" + } + ], + "test.test[Format]": [ + { + "uri": "file://test.test_Format_/results.txt" + } + ], + "test.test[From]": [ + { + "uri": "file://test.test_From_/results.txt" + } + ], + "test.test[Get]": [ + { + "uri": "file://test.test_Get_/results.txt" + } + ], + "test.test[ImplicitSplit]": [ + { + "uri": "file://test.test_ImplicitSplit_/results.txt" + } + ], + "test.test[MultirowBlockTo]": [ + { + "uri": "file://test.test_MultirowBlockTo_/results.txt" + } + ], + "test.test[ParseIso8601]": [ + { + "uri": "file://test.test_ParseIso8601_/results.txt" + } + ], + "test.test[ParseLim]": [ + { + "uri": "file://test.test_ParseLim_/results.txt" + } + ], + "test.test[Parse]": [ + { + "uri": "file://test.test_Parse_/results.txt" + } + ], + "test.test[Repr]": [ + { + "uri": "file://test.test_Repr_/results.txt" + } + ], + "test.test[Shift]": [ + { + "uri": "file://test.test_Shift_/results.txt" + } + ], + "test.test[SplitMake1969]": [ + { + "uri": "file://test.test_SplitMake1969_/results.txt" + } + ], + "test.test[SplitMake]": [ + { + "uri": "file://test.test_SplitMake_/results.txt" + } + ], + "test.test[StartOf1969]": [ + { + "uri": "file://test.test_StartOf1969_/results.txt" + } + ], + "test.test[StartOf]": [ + { + "uri": "file://test.test_StartOf_/results.txt" + } + ], + "test.test[To]": [ + { + "uri": "file://test.test_To_/results.txt" + } + ], + "test.test[TzToDate]": [ + { + "uri": "file://test.test_TzToDate_/results.txt" + } + ], + "test.test[UpdateTz]": [ + { + "uri": "file://test.test_UpdateTz_/results.txt" + } + ], + "test.test[Update]": [ + { + "uri": "file://test.test_Update_/results.txt" + } + ], + "test.test[yql-14977]": [ + { + "uri": "file://test.test_yql-14977_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt new file mode 100644 index 00000000000..b053c8139f1 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockFrom_/results.txt @@ -0,0 +1,206 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "ts_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "ts_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "ts_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "ts_empty"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "interval_days"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_hours"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_minutes"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_days_overflow"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_null"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "3875345000000" + ]; + [ + "3875345000000" + ]; + [ + "3875345000000" + ]; + #; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "-604800000000" + ]; + [ + "8640000000000000" + ]; + # + ]; + [ + [ + "3875345000000" + ]; + [ + "3875345000000" + ]; + [ + "3875345000000" + ]; + #; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "-604800000000" + ]; + [ + "8640000000000000" + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt new file mode 100644 index 00000000000..9937150f592 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockGet_/results.txt @@ -0,0 +1,188 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "date_hour"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "date_minute"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "date_second"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "date_msec"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "date_usec"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "date_tz"; + [ + "DataType"; + "Uint16" + ] + ]; + [ + "date_tzname"; + [ + "DataType"; + "String" + ] + ]; + [ + "datetime_hour"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "datetime_minute"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "datetime_second"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "datetime_msec"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "datetime_usec"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "datetime_tz"; + [ + "DataType"; + "Uint16" + ] + ]; + [ + "datetime_tzname"; + [ + "DataType"; + "String" + ] + ]; + [ + "timestamp_hour"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "timestamp_minute"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "timestamp_second"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "timestamp_msec"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "timestamp_usec"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "timestamp_tz"; + [ + "DataType"; + "Uint16" + ] + ]; + [ + "timestamp_tzname"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "GMT"; + "21"; + "20"; + "19"; + "0"; + "0"; + "0"; + "GMT"; + "21"; + "20"; + "19"; + "345"; + "345678"; + "0"; + "GMT" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt new file mode 100644 index 00000000000..f60b4dd0263 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockSplitMake_/results.txt @@ -0,0 +1,76 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "rdate"; + [ + "DataType"; + "Date" + ] + ]; + [ + "rdatetime"; + [ + "DataType"; + "Datetime" + ] + ]; + [ + "rtimestamp"; + [ + "DataType"; + "Timestamp" + ] + ]; + [ + "rtzdate"; + [ + "DataType"; + "TzDate" + ] + ]; + [ + "rtzdatetime"; + [ + "DataType"; + "TzDatetime" + ] + ]; + [ + "rtztimestamp"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "17880"; + "1544835723"; + "1544835723456789"; + "2018-12-15,Europe/Moscow"; + "2018-12-15T01:02:03,Europe/Moscow"; + "2018-12-15T01:02:03.456789,Europe/Moscow" + ]; + [ + "13148"; + "1136073599"; + "1136073599999999"; + "2005-12-31,Canada/Central"; + "2005-12-31T16:00:00,Canada/Central"; + "2005-12-31T23:00:00,Canada/Central" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt new file mode 100644 index 00000000000..20890003833 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockStartOf_/results.txt @@ -0,0 +1,314 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + #; + [ + "1970-01-01T04:00:00,Europe/Moscow" + ]; + [ + "1970-01-01T05:00:00,Europe/Moscow" + ]; + [ + "1970-01-01T05:00:00,Europe/Moscow" + ]; + [ + "1970-01-01T04:59:57,Europe/Moscow" + ]; + [ + "18000000000" + ]; + [ + "1970-01-31T00:00:00,Europe/Moscow" + ] + ]; + [ + [ + "2018-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2018-10-01T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-01T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-10T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T00:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T01:00:00,Europe/Moscow" + ]; + [ + "2018-12-15T01:02:00,Europe/Moscow" + ]; + [ + "2018-12-15T01:01:57,Europe/Moscow" + ]; + [ + "3723456789" + ]; + [ + "2018-12-31T00:00:00,Europe/Moscow" + ] + ]; + [ + [ + "2105-01-01T00:00:00,GMT" + ]; + [ + "2105-10-01T00:00:00,GMT" + ]; + [ + "2105-12-01T00:00:00,GMT" + ]; + [ + "2105-12-28T00:00:00,GMT" + ]; + [ + "2105-12-31T00:00:00,GMT" + ]; + [ + "2105-12-31T13:00:00,GMT" + ]; + [ + "2105-12-31T16:00:00,GMT" + ]; + [ + "2105-12-31T16:15:00,GMT" + ]; + [ + "2105-12-31T16:23:40,GMT" + ]; + [ + "2105-12-31T16:23:44,GMT" + ]; + [ + "59025000000" + ]; + [ + "2105-12-31T00:00:00,GMT" + ] + ]; + [ + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2105-12-28T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T01:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T01:00:00,Europe/Moscow" + ]; + [ + "2106-01-01T00:59:58,Europe/Moscow" + ]; + [ + "3600000000" + ]; + # + ]; + [ + [ + "2019-01-01T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-01T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-01T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-22T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T00:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T12:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T12:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T12:00:00,Europe/Moscow" + ]; + [ + "2019-07-24T11:59:57,Europe/Moscow" + ]; + [ + "43200000000" + ]; + [ + "2019-07-31T00:00:00,Europe/Moscow" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt new file mode 100644 index 00000000000..262c45b5971 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTmGet_/results.txt @@ -0,0 +1,628 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "ryear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rdayofyear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rmonth"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rmonthname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "rweekofyear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rweekofyeariso8601"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofmonth"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofweek"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofweekname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rtz"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rtzname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1970" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "11" + ]; + [ + "14" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "2" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "5" + ]; + [ + "Friday" + ]; + [ + "14" + ]; + [ + "8" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "32" + ]; + [ + "2" + ]; + [ + "February" + ]; + [ + "5" + ]; + [ + "5" + ]; + [ + "1" + ]; + [ + "7" + ]; + [ + "Sunday" + ]; + [ + "17" + ]; + [ + "3" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "246" + ]; + [ + "9" + ]; + [ + "September" + ]; + [ + "36" + ]; + [ + "36" + ]; + [ + "3" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "7" + ]; + [ + "22" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "365" + ]; + [ + "12" + ]; + [ + "December" + ]; + [ + "53" + ]; + [ + "53" + ]; + [ + "31" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "23" + ]; + [ + "59" + ]; + [ + "59" + ]; + [ + "999" + ]; + [ + "999999" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1971" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "53" + ]; + [ + "1" + ]; + [ + "5" + ]; + [ + "Friday" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1971" + ]; + [ + "14" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "3" + ]; + [ + "2" + ]; + [ + "14" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1978" + ]; + [ + "25" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "5" + ]; + [ + "4" + ]; + [ + "25" + ]; + [ + "3" + ]; + [ + "Wednesday" + ]; + [ + "16" + ]; + [ + "15" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "477" + ]; + [ + "Europe/Uzhgorod" + ] + ]; + [ + [ + "2018" + ]; + [ + "335" + ]; + [ + "12" + ]; + [ + "December" + ]; + [ + "48" + ]; + [ + "48" + ]; + [ + "1" + ]; + [ + "6" + ]; + [ + "Saturday" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "3" + ]; + [ + "456" + ]; + [ + "456789" + ]; + [ + "1" + ]; + [ + "Europe/Moscow" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt new file mode 100644 index 00000000000..1936b498e58 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_BlockTo_/results.txt @@ -0,0 +1,356 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "interval_to_days"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_hours"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_minutes"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "interval_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "date_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "datetime_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "timestamp_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "tzdate_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "tzdatetime_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "tztimestamp_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "date_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "datetime_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "timestamp_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdate_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdatetime_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tztimestamp_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "date_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "datetime_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "timestamp_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdate_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdatetime_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tztimestamp_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "interval_null"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "negative_1d"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2" + ]; + [ + "58" + ]; + [ + "3480" + ]; + [ + "208800" + ]; + [ + "208800000" + ]; + [ + "208800000000" + ]; + [ + "1542844800" + ]; + [ + "1542921619" + ]; + [ + "1542921619" + ]; + [ + "1542758400" + ]; + [ + "1542910819" + ]; + [ + "1542910819" + ]; + [ + "1542844800000" + ]; + [ + "1542921619000" + ]; + [ + "1542921619345" + ]; + [ + "1542758400000" + ]; + [ + "1542910819000" + ]; + [ + "1542910819345" + ]; + [ + "1542844800000000" + ]; + [ + "1542921619000000" + ]; + [ + "1542921619345678" + ]; + [ + "1542758400000000" + ]; + [ + "1542910819000000" + ]; + [ + "1542910819345678" + ]; + #; + [ + "-1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt new file mode 100644 index 00000000000..508cd4438ed --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_EndOf_/results.txt @@ -0,0 +1,295 @@ +[ + { + "Label" = "Normal cases"; + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2023-07-31 00:00:00 Europe/Moscow" + ]; + [ + "2023-08-31 00:00:00 GMT" + ]; + [ + "2023-09-30 00:00:00 GMT" + ]; + [ + "2023-02-28 00:00:00 GMT" + ]; + [ + "2024-02-29 00:00:00 GMT" + ] + ] + ] + } + ] + }; + { + "Label" = "Minimal timestamp value"; + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1970-01-01 00:00:00 GMT" + ]; + [ + "1970-01-31 00:00:00 GMT" + ]; + # + ] + ] + } + ] + }; + { + "Label" = "Maximum timestamp value"; + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2105-12-31 23:59:59.999999 GMT" + ]; + [ + "2105-12-31 00:00:00 GMT" + ]; + [ + "2105-12-31 00:00:00 GMT" + ]; + # + ] + ] + } + ] + }; + { + "Label" = "Timestamp below minimum"; + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1969-12-31 23:59:59.999999 Atlantic/Azores" + ]; + # + ] + ] + } + ] + }; + { + "Label" = "Timestamp above maximum"; + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt new file mode 100644 index 00000000000..5d1dfe80a94 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_FormatMicroseconds_/results.txt @@ -0,0 +1,98 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2024-01-01 00:00:00" + ]; + [ + "2024-01-01 00:00:00.000000" + ]; + [ + "2024-01-01 00:00:00.000001" + ]; + [ + "2024-01-01 00:00:00.000001" + ]; + [ + "2024-01-01 00:00:00.05" + ]; + [ + "2024-01-01 00:00:00.050000" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt new file mode 100644 index 00000000000..31b8439bc94 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Format_/results.txt @@ -0,0 +1,48 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "% year 1970 monthFullName January monthShortName Jan month 01 day 01 hours 00 minutes 00 seconds 00 tz +0000 tzname GMT text" + ] + ]; + [ + [ + "% year 2018 monthFullName December monthShortName Dec month 12 day 01 hours 01 minutes 02 seconds 03.456789 tz +0300 tzname Europe/Moscow text" + ] + ]; + [ + [ + "% year 2011 monthFullName March monthShortName Mar month 03 day 13 hours 03 minutes 15 seconds 00 tz -0700 tzname America/Los_Angeles text" + ] + ]; + [ + [ + "% year 2011 monthFullName November monthShortName Nov month 11 day 06 hours 01 minutes 15 seconds 00 tz -0700 tzname America/Los_Angeles text" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt new file mode 100644 index 00000000000..2c503563e1c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_From_/results.txt @@ -0,0 +1,148 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "ts_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "ts_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "ts_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "ts_empty"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "interval_days"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_hours"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_minutes"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "interval_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "3875345000000" + ]; + [ + "3875345000000" + ]; + [ + "3875345000000" + ]; + #; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "604800000000" + ]; + [ + "-604800000000" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt new file mode 100644 index 00000000000..262c45b5971 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Get_/results.txt @@ -0,0 +1,628 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "ryear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rdayofyear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rmonth"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rmonthname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "rweekofyear"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rweekofyeariso8601"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofmonth"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofweek"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rdayofweekname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "rhour"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rminute"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rsecond"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "rmsec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rusec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "rtz"; + [ + "OptionalType"; + [ + "DataType"; + "Uint16" + ] + ] + ]; + [ + "rtzname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1970" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "11" + ]; + [ + "14" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "2" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "5" + ]; + [ + "Friday" + ]; + [ + "14" + ]; + [ + "8" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "32" + ]; + [ + "2" + ]; + [ + "February" + ]; + [ + "5" + ]; + [ + "5" + ]; + [ + "1" + ]; + [ + "7" + ]; + [ + "Sunday" + ]; + [ + "17" + ]; + [ + "3" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "246" + ]; + [ + "9" + ]; + [ + "September" + ]; + [ + "36" + ]; + [ + "36" + ]; + [ + "3" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "7" + ]; + [ + "22" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1970" + ]; + [ + "365" + ]; + [ + "12" + ]; + [ + "December" + ]; + [ + "53" + ]; + [ + "53" + ]; + [ + "31" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "23" + ]; + [ + "59" + ]; + [ + "59" + ]; + [ + "999" + ]; + [ + "999999" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1971" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "1" + ]; + [ + "53" + ]; + [ + "1" + ]; + [ + "5" + ]; + [ + "Friday" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1971" + ]; + [ + "14" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "3" + ]; + [ + "2" + ]; + [ + "14" + ]; + [ + "4" + ]; + [ + "Thursday" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "GMT" + ] + ]; + [ + [ + "1978" + ]; + [ + "25" + ]; + [ + "1" + ]; + [ + "January" + ]; + [ + "5" + ]; + [ + "4" + ]; + [ + "25" + ]; + [ + "3" + ]; + [ + "Wednesday" + ]; + [ + "16" + ]; + [ + "15" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "0" + ]; + [ + "477" + ]; + [ + "Europe/Uzhgorod" + ] + ]; + [ + [ + "2018" + ]; + [ + "335" + ]; + [ + "12" + ]; + [ + "December" + ]; + [ + "48" + ]; + [ + "48" + ]; + [ + "1" + ]; + [ + "6" + ]; + [ + "Saturday" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "3" + ]; + [ + "456" + ]; + [ + "456789" + ]; + [ + "1" + ]; + [ + "Europe/Moscow" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt new file mode 100644 index 00000000000..ff03b0fa5df --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ImplicitSplit_/results.txt @@ -0,0 +1,98 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "20181215 000000 GMT" + ]; + [ + "20181215 010203 GMT" + ]; + [ + "20181215 010203.456789 GMT" + ]; + [ + "20181215 000000 Europe/Moscow" + ]; + [ + "20181215 010203 Europe/Moscow" + ]; + [ + "20181215 010203.456789 Europe/Moscow" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt new file mode 100644 index 00000000000..4675d67fd0a --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_MultirowBlockTo_/results.txt @@ -0,0 +1,90 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "interval1"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval2"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval3"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval4"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + [ + "4" + ]; + # + ]; + [ + #; + [ + "0" + ]; + #; + # + ]; + [ + #; + [ + "0" + ]; + #; + # + ]; + [ + #; + #; + [ + "13" + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt new file mode 100644 index 00000000000..5f312e8bd8a --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseIso8601_/results.txt @@ -0,0 +1,53 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt new file mode 100644 index 00000000000..a4687cfa884 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_ParseLim_/results.txt @@ -0,0 +1,238 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column12"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column13"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column14"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column15"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column16"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column17"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2105-12-31T00:00:00,GMT" + ]; + #; + [ + "2105-12-31T23:59:59,GMT" + ]; + #; + [ + "2105-12-31T23:59:59.999999,GMT" + ]; + #; + [ + "2105-12-31T00:00:00,Etc/GMT+11" + ]; + [ + "2106-01-01T00:00:00,Etc/GMT-1" + ]; + #; + [ + "2105-12-31T22:59:59.999999,Etc/GMT+1" + ]; + [ + "1970-01-01T00:00:00,GMT" + ]; + #; + [ + "1970-01-01T00:00:00,GMT" + ]; + #; + #; + [ + "1970-01-01T00:00:00,GMT" + ]; + [ + "1969-12-31T23:00:00,Etc/GMT+1" + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt new file mode 100644 index 00000000000..14c088137b1 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Parse_/results.txt @@ -0,0 +1,242 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2011-03-08T01:02:03,Europe/Moscow" + ]; + [ + "20110308 010203 +0300" + ]; + [ + "2022-02-23T12:00:00,GMT" + ]; + [ + "20220223 120000 +0000" + ]; + [ + "20110308" + ]; + [ + "20110308" + ]; + [ + "2005-03-05T00:34:45,GMT" + ]; + [ + "2009-02-13T23:31:30,GMT" + ]; + [ + "1994-11-06T08:49:37,GMT" + ]; + [ + "2009-10-14T16:55:33,GMT" + ] + ]; + [ + [ + "2011-03-08T01:02:03.22,Europe/Moscow" + ]; + [ + "20110308 010203.22 +0300" + ]; + [ + "2022-02-23T12:00:00.666666,GMT" + ]; + [ + "20220223 120000.666666 +0000" + ]; + [ + "20111108" + ]; + [ + "20111108" + ]; + [ + "2005-03-04T23:04:00,GMT" + ]; + [ + "2009-09-18T23:37:03.012331,GMT" + ]; + [ + "1994-11-06T08:49:37,GMT" + ]; + [ + "1999-01-04T07:42:12,GMT" + ] + ]; + [ + #; + #; + [ + "2022-02-23T12:00:00.999999,GMT" + ]; + [ + "20220223 120000.999999 +0000" + ]; + [ + "20110108" + ]; + [ + "20110108" + ]; + #; + #; + #; + # + ]; + [ + #; + #; + [ + "2022-02-23T12:00:00.42,GMT" + ]; + [ + "20220223 120000.42 +0000" + ]; + [ + "20110108" + ]; + [ + "20110108" + ]; + #; + #; + #; + # + ]; + [ + #; + #; + [ + "2022-02-23T12:00:00.823874,GMT" + ]; + [ + "20220223 120000.823874 +0000" + ]; + [ + "20110108" + ]; + [ + "20110208" + ]; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt new file mode 100644 index 00000000000..f4ff733c507 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Repr_/results.txt @@ -0,0 +1,46 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2016-08-15T00:00:00,GMT" + ]; + [ + "2017-01-01T13:00:00,Europe/Moscow" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt new file mode 100644 index 00000000000..a7d9edbd5a5 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Shift_/results.txt @@ -0,0 +1,459 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column12"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column13"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column14"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2011-11-17T21:20:19.345678,GMT" + ]; + [ + "2011-11-17T21:20:19.345678,GMT" + ]; + [ + "2005-11-17T21:20:19.345678,GMT" + ]; + [ + "1997-11-17T21:20:19.345678,GMT" + ]; + [ + "2001-11-17T21:20:19.345678,GMT" + ]; + [ + "2001-12-17T21:20:19.345678,GMT" + ]; + [ + "2002-02-17T21:20:19.345678,GMT" + ]; + [ + "2002-10-17T21:20:19.345678,GMT" + ]; + [ + "2002-11-17T21:20:19.345678,GMT" + ]; + [ + "2012-02-17T21:20:19.345678,GMT" + ]; + [ + "2001-10-17T21:20:19.345678,GMT" + ]; + [ + "2001-08-17T21:20:19.345678,GMT" + ]; + [ + "2000-12-17T21:20:19.345678,GMT" + ]; + [ + "2000-11-17T21:20:19.345678,GMT" + ]; + [ + "1991-08-17T21:20:19.345678,GMT" + ] + ]; + [ + [ + "1980-01-01T11:14:00,GMT" + ]; + [ + "1980-01-01T11:14:00,GMT" + ]; + [ + "1974-01-01T11:14:00,GMT" + ]; + #; + [ + "1970-01-01T11:14:00,GMT" + ]; + [ + "1970-02-01T11:14:00,GMT" + ]; + [ + "1970-04-01T11:14:00,GMT" + ]; + [ + "1970-12-01T11:14:00,GMT" + ]; + [ + "1971-01-01T11:14:00,GMT" + ]; + [ + "1980-04-01T11:14:00,GMT" + ]; + #; + #; + #; + #; + # + ]; + [ + #; + #; + #; + [ + "2101-12-01T01:08:00,Europe/Moscow" + ]; + [ + "2105-12-01T01:08:00,Europe/Moscow" + ]; + [ + "2106-01-01T01:08:00,Europe/Moscow" + ]; + #; + #; + #; + #; + [ + "2105-11-01T01:08:00,Europe/Moscow" + ]; + [ + "2105-09-01T01:08:00,Europe/Moscow" + ]; + [ + "2105-01-01T01:08:00,Europe/Moscow" + ]; + [ + "2104-12-01T01:08:00,Europe/Moscow" + ]; + [ + "2095-09-01T01:08:00,Europe/Moscow" + ] + ]; + [ + [ + "2059-06-13T00:00:00,GMT" + ]; + [ + "2059-06-13T00:00:00,GMT" + ]; + [ + "2053-06-13T00:00:00,GMT" + ]; + [ + "2045-06-13T00:00:00,GMT" + ]; + [ + "2049-06-13T00:00:00,GMT" + ]; + [ + "2049-07-13T00:00:00,GMT" + ]; + [ + "2049-09-13T00:00:00,GMT" + ]; + [ + "2050-05-13T00:00:00,GMT" + ]; + [ + "2050-06-13T00:00:00,GMT" + ]; + [ + "2059-09-13T00:00:00,GMT" + ]; + [ + "2049-05-13T00:00:00,GMT" + ]; + [ + "2049-03-13T00:00:00,GMT" + ]; + [ + "2048-07-13T00:00:00,GMT" + ]; + [ + "2048-06-13T00:00:00,GMT" + ]; + [ + "2039-03-13T00:00:00,GMT" + ] + ]; + [ + [ + "2010-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "2010-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "2004-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "1996-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "2000-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "2000-02-29T16:15:00,Europe/Uzhgorod" + ]; + [ + "2000-04-30T16:15:00,Europe/Uzhgorod" + ]; + [ + "2000-12-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "2001-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "2010-04-30T16:15:00,Europe/Uzhgorod" + ]; + [ + "1999-12-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "1999-10-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "1999-02-28T16:15:00,Europe/Uzhgorod" + ]; + [ + "1999-01-31T16:15:00,Europe/Uzhgorod" + ]; + [ + "1989-10-31T16:15:00,Europe/Uzhgorod" + ] + ]; + [ + [ + "2034-02-28T01:02:03.456789,Europe/Moscow" + ]; + [ + "2034-02-28T01:02:03.456789,Europe/Moscow" + ]; + [ + "2028-02-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2020-02-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2024-02-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2024-03-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2024-05-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2025-01-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2025-02-28T01:02:03.456789,Europe/Moscow" + ]; + [ + "2034-05-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2024-01-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2023-11-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2023-03-29T01:02:03.456789,Europe/Moscow" + ]; + [ + "2023-02-28T01:02:03.456789,Europe/Moscow" + ]; + [ + "2013-11-29T01:02:03.456789,Europe/Moscow" + ] + ]; + [ + [ + "1980-02-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1980-02-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1974-02-01T02:00:00.444123,Europe/Moscow" + ]; + #; + [ + "1970-02-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1970-03-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1970-05-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1971-01-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1971-02-01T02:00:00.444123,Europe/Moscow" + ]; + [ + "1980-05-01T02:00:00.444123,Europe/Moscow" + ]; + #; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt new file mode 100644 index 00000000000..eb32dc98f66 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake1969_/results.txt @@ -0,0 +1,88 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "rdate"; + [ + "OptionalType"; + [ + "DataType"; + "Date" + ] + ] + ]; + [ + "rdatetime"; + [ + "OptionalType"; + [ + "DataType"; + "Datetime" + ] + ] + ]; + [ + "rtimestamp"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "rtzdate"; + [ + "OptionalType"; + [ + "DataType"; + "TzDate" + ] + ] + ]; + [ + "rtzdatetime"; + [ + "OptionalType"; + [ + "DataType"; + "TzDatetime" + ] + ] + ]; + [ + "rtztimestamp"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + [ + "1969-12-31T23:00:00,Canada/Central" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt new file mode 100644 index 00000000000..01b159b33d6 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_SplitMake_/results.txt @@ -0,0 +1,160 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "rdate"; + [ + "OptionalType"; + [ + "DataType"; + "Date" + ] + ] + ]; + [ + "rdatetime"; + [ + "OptionalType"; + [ + "DataType"; + "Datetime" + ] + ] + ]; + [ + "rtimestamp"; + [ + "OptionalType"; + [ + "DataType"; + "Timestamp" + ] + ] + ]; + [ + "rtzdate"; + [ + "OptionalType"; + [ + "DataType"; + "TzDate" + ] + ] + ]; + [ + "rtzdatetime"; + [ + "OptionalType"; + [ + "DataType"; + "TzDatetime" + ] + ] + ]; + [ + "rtztimestamp"; + [ + "OptionalType"; + [ + "DataType"; + "TzTimestamp" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + # + ]; + [ + [ + "0" + ]; + [ + "1800" + ]; + [ + "1800000000" + ]; + #; + #; + [ + "1970-01-01T05:00:00,Europe/Moscow" + ] + ]; + [ + [ + "17880" + ]; + [ + "1544835723" + ]; + [ + "1544835723456789" + ]; + [ + "2018-12-15,Europe/Moscow" + ]; + [ + "2018-12-15T01:02:03,Europe/Moscow" + ]; + [ + "2018-12-15T01:02:03.456789,Europe/Moscow" + ] + ]; + [ + [ + "49672" + ]; + [ + "4291747199" + ]; + [ + "4291747199999999" + ]; + [ + "2105-12-31,Canada/Central" + ]; + [ + "2105-12-31T16:00:00,Canada/Central" + ]; + # + ]; + [ + #; + #; + #; + [ + "2106-01-01,Europe/Moscow" + ]; + [ + "2106-01-01T01:00:00,Europe/Moscow" + ]; + # + ]; + [ + #; + #; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt new file mode 100644 index 00000000000..54614afb16b --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf1969_/results.txt @@ -0,0 +1,151 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + #; + [ + "1969-12-31 20:00:00 Canada/Central" + ]; + [ + "1969-12-31 23:00:00 Canada/Central" + ]; + [ + "1969-12-31 23:00:00 Canada/Central" + ]; + [ + "1969-12-31 22:59:56 Canada/Central" + ]; + [ + "82800000000" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt new file mode 100644 index 00000000000..b548b00a7c7 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_StartOf_/results.txt @@ -0,0 +1,314 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "Interval" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + #; + [ + "1970-01-01 04:00:00 Europe/Moscow" + ]; + [ + "1970-01-01 05:00:00 Europe/Moscow" + ]; + [ + "1970-01-01 05:00:00 Europe/Moscow" + ]; + [ + "1970-01-01 04:59:57 Europe/Moscow" + ]; + [ + "18000000000" + ]; + [ + "1970-01-31 00:00:00 Europe/Moscow" + ] + ]; + [ + [ + "2018-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2018-10-01 00:00:00 Europe/Moscow" + ]; + [ + "2018-12-01 00:00:00 Europe/Moscow" + ]; + [ + "2018-12-10 00:00:00 Europe/Moscow" + ]; + [ + "2018-12-15 00:00:00 Europe/Moscow" + ]; + [ + "2018-12-15 00:00:00 Europe/Moscow" + ]; + [ + "2018-12-15 00:00:00 Europe/Moscow" + ]; + [ + "2018-12-15 01:00:00 Europe/Moscow" + ]; + [ + "2018-12-15 01:02:00 Europe/Moscow" + ]; + [ + "2018-12-15 01:01:57 Europe/Moscow" + ]; + [ + "3723456789" + ]; + [ + "2018-12-31 00:00:00 Europe/Moscow" + ] + ]; + [ + [ + "2105-01-01 00:00:00 GMT" + ]; + [ + "2105-10-01 00:00:00 GMT" + ]; + [ + "2105-12-01 00:00:00 GMT" + ]; + [ + "2105-12-28 00:00:00 GMT" + ]; + [ + "2105-12-31 00:00:00 GMT" + ]; + [ + "2105-12-31 13:00:00 GMT" + ]; + [ + "2105-12-31 16:00:00 GMT" + ]; + [ + "2105-12-31 16:15:00 GMT" + ]; + [ + "2105-12-31 16:23:40 GMT" + ]; + [ + "2105-12-31 16:23:44 GMT" + ]; + [ + "59025000000" + ]; + [ + "2105-12-31 00:00:00 GMT" + ] + ]; + [ + [ + "2106-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2105-12-28 00:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 01:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 01:00:00 Europe/Moscow" + ]; + [ + "2106-01-01 00:59:58 Europe/Moscow" + ]; + [ + "3600000000" + ]; + # + ]; + [ + [ + "2019-01-01 00:00:00 Europe/Moscow" + ]; + [ + "2019-07-01 00:00:00 Europe/Moscow" + ]; + [ + "2019-07-01 00:00:00 Europe/Moscow" + ]; + [ + "2019-07-22 00:00:00 Europe/Moscow" + ]; + [ + "2019-07-24 00:00:00 Europe/Moscow" + ]; + [ + "2019-07-24 00:00:00 Europe/Moscow" + ]; + [ + "2019-07-24 12:00:00 Europe/Moscow" + ]; + [ + "2019-07-24 12:00:00 Europe/Moscow" + ]; + [ + "2019-07-24 12:00:00 Europe/Moscow" + ]; + [ + "2019-07-24 11:59:57 Europe/Moscow" + ]; + [ + "43200000000" + ]; + [ + "2019-07-31 00:00:00 Europe/Moscow" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt new file mode 100644 index 00000000000..c7d4e10a864 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_To_/results.txt @@ -0,0 +1,345 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "interval_to_days"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_hours"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_minutes"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "interval_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "interval_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "date_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "datetime_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "timestamp_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "tzdate_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "tzdatetime_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "tztimestamp_to_seconds"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "date_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "datetime_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "timestamp_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdate_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdatetime_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tztimestamp_to_msec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "date_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "datetime_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "timestamp_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdate_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tzdatetime_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "tztimestamp_to_usec"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "negative_1d"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2" + ]; + [ + "58" + ]; + [ + "3480" + ]; + [ + "208800" + ]; + [ + "208800000" + ]; + [ + "208800000000" + ]; + [ + "1542844800" + ]; + [ + "1542921619" + ]; + [ + "1542921619" + ]; + [ + "1542758400" + ]; + [ + "1542910819" + ]; + [ + "1542910819" + ]; + [ + "1542844800000" + ]; + [ + "1542921619000" + ]; + [ + "1542921619345" + ]; + [ + "1542758400000" + ]; + [ + "1542910819000" + ]; + [ + "1542910819345" + ]; + [ + "1542844800000000" + ]; + [ + "1542921619000000" + ]; + [ + "1542921619345678" + ]; + [ + "1542758400000000" + ]; + [ + "1542910819000000" + ]; + [ + "1542910819345678" + ]; + [ + "-1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt new file mode 100644 index 00000000000..c6fd6ea8b99 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_TzToDate_/results.txt @@ -0,0 +1,52 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "String" + ] + ]; + [ + "column2"; + [ + "DataType"; + "String" + ] + ]; + [ + "column3"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "2000-01-01"; + "2000-01-01,Europe/Moscow"; + "1999-12-31"; + "2000-01-01,Europe/Moscow" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt new file mode 100644 index 00000000000..9ebcadb565c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_UpdateTz_/results.txt @@ -0,0 +1,64 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1999-12-31T21:00:00Z" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1999-12-31T21:00:00Z" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt new file mode 100644 index 00000000000..75dfcd0b39a --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_Update_/results.txt @@ -0,0 +1,236 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column12"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column13"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column14"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column15"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column16"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column17"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2005-12-01T21:20:19.345678,GMT" + ]; + #; + [ + "2001-07-01T21:20:19.345678,GMT" + ]; + #; + [ + "2001-12-20T21:20:19.345678,GMT" + ]; + #; + #; + [ + "2001-12-01T11:10:09.345678,GMT" + ]; + [ + "2001-12-01T11:20:19.345678,GMT" + ]; + #; + [ + "2001-12-01T21:10:19.345678,GMT" + ]; + #; + [ + "2001-12-01T21:20:09.345678,GMT" + ]; + #; + [ + "2001-12-01T21:20:19.123456,GMT" + ]; + #; + [ + "2001-12-01T21:20:19.345678,America/Creston" + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt new file mode 100644 index 00000000000..c2ee1b5e2e5 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/canondata/test.test_yql-14977_/results.txt @@ -0,0 +1,33 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "May/15/2022" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in new file mode 100644 index 00000000000..96ebafbe3cc --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in @@ -0,0 +1,30 @@ +{ + "fts_seconds" = 3875345u; + "fts_msec" = 3875345000u; + "fts_usec" = 3875345000000u; + + "fdays" = 7; + "fhours" = 168; + "fminutes" = 10080; + "fseconds" = 604800; + "fmsec" = 604800000; + "fusec" = -604800000000; + + "fdays_overflow" = 100000; + "fdays_null" = #; +}; +{ + "fts_seconds" = 3875345u; + "fts_msec" = 3875345000u; + "fts_usec" = 3875345000000u; + + "fdays" = 7; + "fhours" = 168; + "fminutes" = 10080; + "fseconds" = 604800; + "fmsec" = 604800000; + "fusec" = -604800000000; + + "fdays_overflow" = 100000; + "fdays_null" = #; +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr new file mode 100644 index 00000000000..6f1c2afd899 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.in.attr @@ -0,0 +1,89 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fts_seconds"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "fts_msec"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "fts_usec"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "fdays"; + [ + "DataType"; + "Int16" + ] + ]; + [ + "fdays_overflow"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fdays_null"; + [ + "OptionalType"; + [ + "DataType"; + "Int16" + ] + ] + ]; + [ + "fhours"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fminutes"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fseconds"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fmsec"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "fusec"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql new file mode 100644 index 00000000000..fd23e1cfccb --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockFrom.sql @@ -0,0 +1,19 @@ +/* syntax version 1 */ +pragma UseBlocks; + +select + DateTime::FromSeconds(fts_seconds) as ts_seconds, + DateTime::FromMilliseconds(fts_msec) as ts_msec, + DateTime::FromMicroseconds(fts_usec) as ts_usec, + DateTime::FromMicroseconds(fts_msec * fts_msec) as ts_empty, + + DateTime::IntervalFromDays(fdays) as interval_days, + DateTime::IntervalFromHours(fhours) as interval_hours, + DateTime::IntervalFromMinutes(fminutes) as interval_minutes, + DateTime::IntervalFromSeconds(fseconds) as interval_seconds, + DateTime::IntervalFromMilliseconds(fmsec) as interval_msec, + DateTime::IntervalFromMicroseconds(fusec) as interval_usec, + + DateTime::IntervalFromDays(fdays_overflow) as interval_days_overflow, + DateTime::IntervalFromDays(fdays_null) as interval_null, +from Input diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in new file mode 100644 index 00000000000..bd5a96985b8 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in @@ -0,0 +1,6 @@ +{ + "fdate"="2018-11-22"; + "fdatetime"="2018-11-22T21:20:19Z"; + "ftimestamp"="2018-11-22T21:20:19.345678Z"; +}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr new file mode 100644 index 00000000000..d9a16bcd3d8 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.in.attr @@ -0,0 +1,31 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql new file mode 100644 index 00000000000..3a21ac8c81e --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockGet.sql @@ -0,0 +1,36 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + Unwrap(cast(fdate as Date)) as `date`, + Unwrap(cast(fdatetime as Datetime)) as `datetime`, + Unwrap(cast(ftimestamp as Timestamp)) as `timestamp`, + from Input; +commit; + +SELECT + DateTime::GetHour(`date`) as date_hour, + DateTime::GetMinute(`date`) as date_minute, + DateTime::GetSecond(`date`) as date_second, + DateTime::GetMillisecondOfSecond(`date`) as date_msec, + DateTime::GetMicrosecondOfSecond(`date`) as date_usec, + DateTime::GetTimezoneId(`date`) as date_tz, + DateTime::GetTimezoneName(`date`) as date_tzname, + + DateTime::GetHour(`datetime`) as datetime_hour, + DateTime::GetMinute(`datetime`) as datetime_minute, + DateTime::GetSecond(`datetime`) as datetime_second, + DateTime::GetMillisecondOfSecond(`datetime`) as datetime_msec, + DateTime::GetMicrosecondOfSecond(`datetime`) as datetime_usec, + DateTime::GetTimezoneId(`datetime`) as datetime_tz, + DateTime::GetTimezoneName(`datetime`) as datetime_tzname, + + DateTime::GetHour(`timestamp`) as timestamp_hour, + DateTime::GetMinute(`timestamp`) as timestamp_minute, + DateTime::GetSecond(`timestamp`) as timestamp_second, + DateTime::GetMillisecondOfSecond(`timestamp`) as timestamp_msec, + DateTime::GetMicrosecondOfSecond(`timestamp`) as timestamp_usec, + DateTime::GetTimezoneId(`timestamp`) as timestamp_tz, + DateTime::GetTimezoneName(`timestamp`) as timestamp_tzname, +FROM @t; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in new file mode 100644 index 00000000000..61aafc6f44d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in @@ -0,0 +1,16 @@ +{ + "fdate"="2018-12-15"; + "fdatetime"="2018-12-15T01:02:03Z"; + "ftimestamp"="2018-12-15T01:02:03.456789Z"; + "ftzdate"="2018-12-15,Europe/Moscow"; + "ftzdatetime"="2018-12-15T01:02:03,Europe/Moscow"; + "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" +}; +{ + "fdate"="2005-12-31"; + "fdatetime"="2005-12-31T23:59:59Z"; + "ftimestamp"="2005-12-31T23:59:59.999999Z"; + "ftzdate"="2005-12-31,Canada/Central"; + "ftzdatetime"="2005-12-31T16:00:00,Canada/Central"; + "ftztimestamp"="2005-12-31T23:00:00.000000,Canada/Central" +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr new file mode 100644 index 00000000000..876e4f8a19d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.in.attr @@ -0,0 +1,52 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql new file mode 100644 index 00000000000..a6c0d65a83c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockSplitMake.sql @@ -0,0 +1,21 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + Unwrap(cast(fdate as Date)) as `date`, + Unwrap(cast(fdatetime as Datetime)) as `datetime`, + Unwrap(cast(ftimestamp as Timestamp)) as `timestamp`, + Unwrap(cast(ftzdate as TzDate)) as `tzdate`, + Unwrap(cast(ftzdatetime as TzDatetime)) as `tzdatetime`, + Unwrap(cast(ftztimestamp as TzTimestamp)) as `tztimestamp` + from Input; +commit; + +select + DateTime::MakeDate(`date`) as rdate, + DateTime::MakeDatetime(`datetime`) as rdatetime, + DateTime::MakeTimestamp(`timestamp`) as rtimestamp, + DateTime::MakeTzDate(`tzdate`) as rtzdate, + DateTime::MakeTzDatetime(`tzdatetime`) as rtzdatetime, + DateTime::MakeTzTimestamp(`tztimestamp`) as rtztimestamp +from @t;
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.attr diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in new file mode 100644 index 00000000000..f482585e720 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in @@ -0,0 +1,15 @@ +{ + "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow" +}; +{ + "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" +}; +{ + "ftztimestamp"="2105-12-31T16:23:45.000000,GMT" +}; +{ + "ftztimestamp"="2106-01-01T01:00:00.000000,Europe/Moscow" +}; +{ + "ftztimestamp"="2019-07-24T12:00:00,Europe/Moscow" +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr new file mode 100644 index 00000000000..3915337be3c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql new file mode 100644 index 00000000000..e531d6f1c8d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockStartOf.sql @@ -0,0 +1,30 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + cast(ftztimestamp as TzTimestamp) as `tztimestamp`, + from Input; + +commit; + +select + DateTime::StartOfYear(`tztimestamp`), + + DateTime::StartOfQuarter(`tztimestamp`), + + DateTime::StartOfMonth(`tztimestamp`), + + DateTime::StartOfWeek(`tztimestamp`), + + DateTime::StartOfDay(`tztimestamp`), + + DateTime::StartOf(`tztimestamp`, Interval("PT13H")), + + DateTime::StartOf(`tztimestamp`, Interval("PT4H")), + DateTime::StartOf(`tztimestamp`, Interval("PT15M")), + DateTime::StartOf(`tztimestamp`, Interval("PT20S")), + DateTime::StartOf(`tztimestamp`, Interval("PT7S")), + DateTime::TimeOfDay(`tztimestamp`), + + DateTime::EndOfMonth(`tztimestamp`), +from @t; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in new file mode 100644 index 00000000000..06d60295808 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in @@ -0,0 +1,10 @@ +{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"}; +{"ftztimestamp"="1970-01-02T14:08:00.000000,GMT"}; +{"ftztimestamp"="1970-02-01T17:03:00.000000,GMT"}; +{"ftztimestamp"="1970-09-03T07:22:00.000000,GMT"}; +{"ftztimestamp"="1970-12-31T23:59:59.999999,GMT"}; +{"ftztimestamp"="1971-01-01T00:00:00.000000,GMT"}; +{"ftztimestamp"="1971-01-14T00:00:00.000000,GMT"}; +{"ftztimestamp"="1978-01-25T16:15:00.000000,Europe/Uzhgorod"}; +{"ftztimestamp"="2018-12-01T01:02:03.456789,Europe/Moscow"}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr new file mode 100644 index 00000000000..3915337be3c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql new file mode 100644 index 00000000000..3087d4e78d4 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTmGet.sql @@ -0,0 +1,27 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + cast(ftztimestamp as TzTimestamp) as `tm`, + from Input; + +commit; + +SELECT + DateTime::GetYear(tm) as ryear, + DateTime::GetDayOfYear(tm) as rdayofyear, + DateTime::GetMonth(tm) as rmonth, + DateTime::GetMonthName(tm) as rmonthname, + DateTime::GetWeekOfYear(tm) as rweekofyear, + DateTime::GetWeekOfYearIso8601(tm) as rweekofyeariso8601, + DateTime::GetDayOfMonth(tm) as rdayofmonth, + DateTime::GetDayOfWeek(tm) as rdayofweek, + DateTime::GetDayOfWeekName(tm) as rdayofweekname, + DateTime::GetHour(tm) as rhour, + DateTime::GetMinute(tm) as rminute, + DateTime::GetSecond(tm) as rsecond, + DateTime::GetMillisecondOfSecond(tm) as rmsec, + DateTime::GetMicrosecondOfSecond(tm) as rusec, + DateTime::GetTimezoneId(tm) as rtz, + DateTime::GetTimezoneName(tm) as rtzname +FROM @t; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in new file mode 100644 index 00000000000..feb9044f382 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in @@ -0,0 +1,11 @@ +{ + "fdate"="2018-11-22"; + "fdatetime"="2018-11-22T21:20:19Z"; + "ftimestamp"="2018-11-22T21:20:19.345678Z"; + "finterval"="P2DT10H"; + "ftzdate"="2018-11-22,Europe/Moscow"; + "ftzdatetime"="2018-11-22T21:20:19,Europe/Moscow"; + "ftztimestamp"="2018-11-22T21:20:19.345678,Europe/Moscow"; + "finterval_1day"="P1D" +}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr new file mode 100644 index 00000000000..cb97c1895c5 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.in.attr @@ -0,0 +1,66 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "finterval"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "finterval_1day"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql new file mode 100644 index 00000000000..5f5e68aa80f --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/BlockTo.sql @@ -0,0 +1,51 @@ +/* syntax version 1 */ +pragma UseBlocks; +insert into @t + select + cast(fdate as Date) as `date`, + cast(fdatetime as Datetime) as `datetime`, + cast(ftimestamp as Timestamp) as `timestamp`, + cast(finterval as Interval) as `interval`, + cast(ftzdate as TzDate) as `tzdate`, + cast(ftzdatetime as TzDatetime) as `tzdatetime`, + cast(ftztimestamp as TzTimestamp) as `tztimestamp`, + cast(null as Interval) as `interval_null`, + -cast(finterval_1day as Interval) as `negative_1d`, + from Input; + +commit; +select + DateTime::ToDays(`interval`) as interval_to_days, + DateTime::ToHours(`interval`) as interval_to_hours, + DateTime::ToMinutes(`interval`) as interval_to_minutes, + DateTime::ToSeconds(`interval`) as interval_to_seconds, + DateTime::ToMilliseconds(`interval`) as interval_to_msec, + DateTime::ToMicroseconds(`interval`) as interval_to_usec, + + DateTime::ToSeconds(`date`) as date_to_seconds, + DateTime::ToSeconds(`datetime`) as datetime_to_seconds, + DateTime::ToSeconds(`timestamp`) as timestamp_to_seconds, + DateTime::ToSeconds(`tzdate`) as tzdate_to_seconds, + DateTime::ToSeconds(`tzdatetime`) as tzdatetime_to_seconds, + DateTime::ToSeconds(`tztimestamp`) as tztimestamp_to_seconds, + + DateTime::ToMilliseconds(`date`) as date_to_msec, + DateTime::ToMilliseconds(`datetime`) as datetime_to_msec, + DateTime::ToMilliseconds(`timestamp`) as timestamp_to_msec, + DateTime::ToMilliseconds(`tzdate`) as tzdate_to_msec, + DateTime::ToMilliseconds(`tzdatetime`) as tzdatetime_to_msec, + DateTime::ToMilliseconds(`tztimestamp`) as tztimestamp_to_msec, + + DateTime::ToMicroseconds(`date`) as date_to_usec, + DateTime::ToMicroseconds(`datetime`) as datetime_to_usec, + DateTime::ToMicroseconds(`timestamp`) as timestamp_to_usec, + DateTime::ToMicroseconds(`tzdate`) as tzdate_to_usec, + DateTime::ToMicroseconds(`tzdatetime`) as tzdatetime_to_usec, + DateTime::ToMicroseconds(`tztimestamp`) as tztimestamp_to_usec, + + DateTime::ToDays(`interval_null`) as interval_null, + + /* Overflow test */ + DateTime::ToDays(`negative_1d`) as negative_1d, +from @t; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql b/yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql new file mode 100644 index 00000000000..61b4a29e536 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/EndOf.sql @@ -0,0 +1,34 @@ +/* syntax version 1 */ +$format = DateTime::Format("%Y-%m-%d %H:%M:%S %Z"); + +select + $format(DateTime::EndOfMonth(TzDateTime('2023-07-07T01:02:03,Europe/Moscow'))), + $format(DateTime::EndOfMonth(Date('2023-08-08'))), + $format(DateTime::EndOfMonth(Date('2023-09-09'))), + $format(DateTime::EndOfMonth(Date('2023-02-02'))), + $format(DateTime::EndOfMonth(Date('2024-02-02'))) +into result `Normal cases`; + +$tsMin = '1970-01-01T00:00:00.000000'; +$tsMax = '2105-12-31T23:59:59.999999'; +$tsBelow = '1969-12-31T23:59:59.999999'; +$tsAbove = '2106-01-01T00:00:00.000000'; + +select $format(cast($tsMin || 'Z' as Timestamp)) + , $format(DateTime::EndOfMonth(cast($tsMin || 'Z' as Timestamp))) + , $format(DateTime::EndOfMonth(cast($tsMin || ',Atlantic/Madeira' as Timestamp))) +into result `Minimal timestamp value`; + +select $format(cast($tsMax || 'Z' as Timestamp)) + , $format(DateTime::EndOfMonth(cast($tsMax || 'Z' as Timestamp))) + , $format(DateTime::EndOfMonth(cast('2105-12-12T00:00:00Z' as Timestamp))) + , $format(DateTime::EndOfMonth(cast($tsMax || ',Atlantic/Azores' as Timestamp))) +into result `Maximum timestamp value`; + +select $format(cast($tsBelow || ',Atlantic/Azores' as TzTimestamp)) + , $format(DateTime::EndOfMonth(cast($tsBelow || ',Atlantic/Azores' as TzTimestamp))) +into result `Timestamp below minimum`; + +select $format(cast($tsAbove || ',Atlantic/Madeira' as TzTimestamp)) + , $format(DateTime::EndOfMonth(cast($tsAbove || ',Atlantic/Madeira' as TzTimestamp))) +into result `Timestamp above maximum`; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Format.in b/yql/essentials/udfs/common/datetime2/test/cases/Format.in new file mode 100644 index 00000000000..f9390e7c2be --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Format.in @@ -0,0 +1,4 @@ +{"ftztimestamp" = "1970-01-01T00:00:00,GMT"}; +{"ftztimestamp" = "2018-12-01T01:02:03.456789,Europe/Moscow"}; +{"ftztimestamp" = "2011-03-13T02:15:00,America/Los_Angeles"}; +{"ftztimestamp" = "2011-11-06T01:15:00,America/Los_Angeles"}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr new file mode 100644 index 00000000000..2cc4f8c0d68 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Format.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Format.sql b/yql/essentials/udfs/common/datetime2/test/cases/Format.sql new file mode 100644 index 00000000000..25daf1105dc --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Format.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +$format = DateTime::Format("%% year %Y monthFullName %B monthShortName %b month %m day %d hours %H minutes %M seconds %S tz %z tzname %Z text"); + +select + $format(DateTime::Split(cast(ftztimestamp as TzTimestamp))) +from Input diff --git a/yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql b/yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql new file mode 100644 index 00000000000..3517da3bf35 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/FormatMicroseconds.sql @@ -0,0 +1,15 @@ +/* syntax version 1 */ +$parse = DateTime::Parse("%Y-%m-%d %H:%M:%S"); + +$dt0 = $parse("2024-01-01 00:00:00"); +$dt1 = $parse("2024-01-01 00:00:00.000001"); +$dt2 = $parse("2024-01-01 00:00:00.05"); + +$format = DateTime::Format("%Y-%m-%d %H:%M:%S"); +$format_ms = DateTime::Format("%Y-%m-%d %H:%M:%S", True as AlwaysWriteFractionalSeconds); + +SELECT + $format($dt0), $format_ms($dt0), + $format($dt1), $format_ms($dt1), + $format($dt2), $format_ms($dt2) +; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/From.in b/yql/essentials/udfs/common/datetime2/test/cases/From.in new file mode 100644 index 00000000000..dd293eaaa7d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/From.in @@ -0,0 +1,12 @@ +{ + "fts_seconds" = 3875345u; + "fts_msec" = 3875345000u; + "fts_usec" = 3875345000000u; + + "fdays" = 7; + "fhours" = 168; + "fminutes" = 10080; + "fseconds" = 604800; + "fmsec" = 604800000; + "fusec" = -604800000000; +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/From.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/From.in.attr new file mode 100644 index 00000000000..538f83ab720 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/From.in.attr @@ -0,0 +1,73 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fts_seconds"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "fts_msec"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "fts_usec"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "fdays"; + [ + "DataType"; + "Int16" + ] + ]; + [ + "fhours"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fminutes"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fseconds"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "fmsec"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "fusec"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/From.sql b/yql/essentials/udfs/common/datetime2/test/cases/From.sql new file mode 100644 index 00000000000..c596e33f586 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/From.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +select + DateTime::FromSeconds(fts_seconds) as ts_seconds, + DateTime::FromMilliseconds(fts_msec) as ts_msec, + DateTime::FromMicroseconds(fts_usec) as ts_usec, + DateTime::FromMicroseconds(fts_msec * fts_msec) as ts_empty, + + DateTime::IntervalFromDays(fdays) as interval_days, + DateTime::IntervalFromHours(fhours) as interval_hours, + DateTime::IntervalFromMinutes(fminutes) as interval_minutes, + DateTime::IntervalFromSeconds(fseconds) as interval_seconds, + DateTime::IntervalFromMilliseconds(fmsec) as interval_msec, + DateTime::IntervalFromMicroseconds(fusec) as interval_usec +from Input diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Get.in b/yql/essentials/udfs/common/datetime2/test/cases/Get.in new file mode 100644 index 00000000000..06d60295808 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Get.in @@ -0,0 +1,10 @@ +{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"}; +{"ftztimestamp"="1970-01-02T14:08:00.000000,GMT"}; +{"ftztimestamp"="1970-02-01T17:03:00.000000,GMT"}; +{"ftztimestamp"="1970-09-03T07:22:00.000000,GMT"}; +{"ftztimestamp"="1970-12-31T23:59:59.999999,GMT"}; +{"ftztimestamp"="1971-01-01T00:00:00.000000,GMT"}; +{"ftztimestamp"="1971-01-14T00:00:00.000000,GMT"}; +{"ftztimestamp"="1978-01-25T16:15:00.000000,Europe/Uzhgorod"}; +{"ftztimestamp"="2018-12-01T01:02:03.456789,Europe/Moscow"}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr new file mode 100644 index 00000000000..2cc4f8c0d68 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Get.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Get.sql b/yql/essentials/udfs/common/datetime2/test/cases/Get.sql new file mode 100644 index 00000000000..99ec9528fcb --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Get.sql @@ -0,0 +1,23 @@ +/* syntax version 1 */ +SELECT + DateTime::GetYear(tm) as ryear, + DateTime::GetDayOfYear(tm) as rdayofyear, + DateTime::GetMonth(tm) as rmonth, + DateTime::GetMonthName(tm) as rmonthname, + DateTime::GetWeekOfYear(tm) as rweekofyear, + DateTime::GetWeekOfYearIso8601(tm) as rweekofyeariso8601, + DateTime::GetDayOfMonth(tm) as rdayofmonth, + DateTime::GetDayOfWeek(tm) as rdayofweek, + DateTime::GetDayOfWeekName(tm) as rdayofweekname, + DateTime::GetHour(tm) as rhour, + DateTime::GetMinute(tm) as rminute, + DateTime::GetSecond(tm) as rsecond, + DateTime::GetMillisecondOfSecond(tm) as rmsec, + DateTime::GetMicrosecondOfSecond(tm) as rusec, + DateTime::GetTimezoneId(tm) as rtz, + DateTime::GetTimezoneName(tm) as rtzname +FROM ( + SELECT + DateTime::Split(CAST(ftztimestamp as TzTimestamp)) as tm + FROM Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in new file mode 100644 index 00000000000..26f8d006790 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in @@ -0,0 +1,9 @@ +{ + "fdate"="2018-12-15"; + "fdatetime"="2018-12-15T01:02:03Z"; + "ftimestamp"="2018-12-15T01:02:03.456789Z"; + "ftzdate"="2018-12-15,Europe/Moscow"; + "ftzdatetime"="2018-12-15T01:02:03,Europe/Moscow"; + "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" +}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr new file mode 100644 index 00000000000..876e4f8a19d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.in.attr @@ -0,0 +1,52 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql new file mode 100644 index 00000000000..df19228e828 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/ImplicitSplit.sql @@ -0,0 +1,20 @@ +/* syntax version 1 */ +$format = DateTime::Format("%Y%m%d %H%M%S %Z"); + +select + $format(`date`), + $format(`datetime`), + $format(`timestamp`), + $format(`tzdate`), + $format(`tzdatetime`), + $format(`tztimestamp`) +from ( + select + cast(fdate as Date) as `date`, + cast(fdatetime as Datetime) as `datetime`, + cast(ftimestamp as Timestamp) as `timestamp`, + cast(ftzdate as TzDate) as `tzdate`, + cast(ftzdatetime as TzDatetime) as `tzdatetime`, + cast(ftztimestamp as TzTimestamp) as `tztimestamp` + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in new file mode 100644 index 00000000000..c6d52b77b32 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in @@ -0,0 +1,24 @@ +{ + "finterval1"="P0Y2M"; + "finterval2"=null; + "finterval3"="P4DT5H"; + "finterval4"="P6Y7M" +}; +{ + "finterval1"="P20000Y4M"; + "finterval2"="PT6H12M"; + "finterval3"=null; + "finterval4"="P9Y10M" +}; +{ + "finterval1"=null; + "finterval2"="PT9H18M"; + "finterval3"="P100000D"; + "finterval4"="P12Y14M" +}; +{ + "finterval1"="P4Y8M"; + "finterval2"=null; + "finterval3"="P13DT14H"; + "finterval4"=null +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr new file mode 100644 index 00000000000..c8fb13b9d6e --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.in.attr @@ -0,0 +1,49 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "finterval1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "finterval2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "finterval3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "finterval4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + ] + ] + } +} diff --git a/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql new file mode 100644 index 00000000000..1670be24513 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/MultirowBlockTo.sql @@ -0,0 +1,20 @@ +/* syntax version 1 */ + +pragma UseBlocks; +insert into @t + select + cast(finterval1 as Interval) as `interval1`, + cast(finterval2 as Interval) as `interval2`, + cast(finterval3 as Interval) as `interval3`, + cast(finterval4 as Interval) as `interval4` +from Input; + +commit; + +select + DateTime::ToDays(`interval1`) as `interval1`, + DateTime::ToDays(`interval2`) as `interval2`, + DateTime::ToDays(`interval3`) as `interval3`, + DateTime::ToDays(`interval4`) as `interval4` +from @t; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Parse.in b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in new file mode 100644 index 00000000000..fa560d488cd --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in @@ -0,0 +1,55 @@ +{ + "fdatetime1"="2011 03 08 010203 Europe/Moscow text"; + "fdatetime2"="%text% 02/23/2022 12:00:00."; + "fdatetime3"="march/08/2011"; + "fdatetime4"="mar/08/2011"; + + "frfc822"="Fri, 4 Mar 2005 19:34:45 EST"; + "fiso8601"="2009-02-14T02:31:30+0300"; + "fhttp"="Sunday, 06-Nov-94 08:49:37 GMT"; + "fx509"="20091014165533Z"; +}; +{ + "fdatetime1"="2011 03 08 010203.22 Europe/Moscow text"; + "fdatetime2"="%text% 02/23/2022 12:00:00.666666"; + "fdatetime3"="November/08/2011"; + "fdatetime4"="Nov/08/2011"; + + "frfc822"="4 Mar 05 19:34 -0330"; + "fiso8601"="2009-09-19 03:37:03.012331+04:00"; + "fhttp"="Sun Nov 6 08:49:37 1994"; + "fx509"="990104074212Z"; +}; +{ + "fdatetime1"="2011 03 08 010203 Europe/Moscow bar"; + "fdatetime2"="%text% 02/23/2022 12:00:00.999999999"; + "fdatetime3"="JanUAry/08/2011"; + "fdatetime4"="jAN/08/2011"; + + "frfc822"="17 Nov 2008 19:34:45"; + "fiso8601"="1990-03-151Y15:16:17.18"; + "fhttp"="1990-03-151Y15:16:17.18"; + "fx509"="500101000000Z"; +}; +{ + "fdatetime1"="2011 03 08 010203 Europe/Moscow bar"; + "fdatetime2"="%text% 02/23/2022 12:00:00.42"; + "fdatetime3"="JanUArY/08/2011"; + "fdatetime4"="JAN/08/2011"; + + "frfc822"="17 Nov 2008 19:34:45"; + "fiso8601"="1990-03-151Y15:16:17.182"; + "fhttp"="1990-03-151Y15:16:17.182"; + "fx509"="500101000000Z"; +}; +{ + "fdatetime1"="2011 03 08 010203 Europe/Moscow bar"; + "fdatetime2"="%text% 02/23/2022 12:00:00.82387468293473839939483932923"; + "fdatetime3"="JanUArY/08/2011"; + "fdatetime4"="feb/08/2011"; + + "frfc822"="17 Nov 2008 19:34:45"; + "fiso8601"="1990-03-151Y15:16:17.182"; + "fhttp"="1990-03-151Y15:16:17.182"; + "fx509"="500101000000Z"; +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr new file mode 100644 index 00000000000..935646e0df7 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Parse.in.attr @@ -0,0 +1,66 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdatetime1"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime2"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime3"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime4"; + [ + "DataType"; + "String" + ] + ]; + [ + "frfc822"; + [ + "DataType"; + "String" + ] + ]; + [ + "fiso8601"; + [ + "DataType"; + "String" + ] + ]; + [ + "fhttp"; + [ + "DataType"; + "String" + ] + ]; + [ + "fx509"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Parse.sql b/yql/essentials/udfs/common/datetime2/test/cases/Parse.sql new file mode 100644 index 00000000000..b39fc6c2448 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Parse.sql @@ -0,0 +1,25 @@ +/* syntax version 1 */ +$parse1 = DateTime::Parse("%Y %m %d %H%M%S %Z text"); +$parse2 = DateTime::Parse("%%text%% %m/%d/%Y %H:%M:%S"); +$parse3 = DateTime::Parse("%B/%d/%Y"); +$parse4 = DateTime::Parse("%b/%d/%Y"); + + +$format1 = DateTime::Format("%Y-%m-%dT%H:%M:%S,%Z"); +$format2 = DateTime::Format("%Y%m%d %H%M%S %z"); +$format3 = DateTime::Format("%Y%m%d"); + +select + $format1($parse1(fdatetime1)), + $format2($parse1(fdatetime1)), + $format1($parse2(fdatetime2)), + $format2($parse2(fdatetime2)), + + $format3($parse3(fdatetime3)), + $format3($parse4(fdatetime4)), + + $format1(DateTime::ParseRfc822(frfc822)), + $format1(DateTime::ParseIso8601(fiso8601)), + $format1(DateTime::ParseHttp(fhttp)), + $format1(DateTime::ParseX509(fx509)) +from Input diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql b/yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql new file mode 100644 index 00000000000..c79bdbf5f11 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/ParseIso8601.sql @@ -0,0 +1 @@ +select DateTime::ParseIso8601("2106-01-01T00:00:00"), DateTime::ParseIso8601("2200-01-01T00:00:00"), DateTime::ParseIso8601("2106-02-01T00:00:00");
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql b/yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql new file mode 100644 index 00000000000..2ecd70fcbba --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/ParseLim.sql @@ -0,0 +1,19 @@ +$dt_parser1 = DateTime::Parse('%Y-%m-%d'); +$dt_parser2 = DateTime::Parse('%Y-%m-%d %H:%M:%S'); +$dt_parser1z = DateTime::Parse('%Y-%m-%d %Z'); +$dt_parser2z = DateTime::Parse('%Y-%m-%d %H:%M:%S %Z'); + +SELECT + $dt_parser1("2105-12-31"), $dt_parser1("2106-01-01"), + $dt_parser2("2105-12-31 23:59:59"), $dt_parser2("2106-01-01 00:00:00"), + $dt_parser2("2105-12-31 23:59:59.999999"), $dt_parser2("2106-01-01 00:00:00.000000"), + $dt_parser1z("2105-12-31 Etc/GMT+11"), + $dt_parser1z("2106-01-01 Etc/GMT-1"), + $dt_parser2z("2105-12-31 23:00:00 Etc/GMT+1"), + $dt_parser2z("2105-12-31 22:59:59.999999 Etc/GMT+1"), + $dt_parser1("1970-01-01"), $dt_parser1("1969-12-31"), + $dt_parser2("1970-01-01 00:00:00"), $dt_parser2("1969-12-31 23:59:59"), + $dt_parser2("1969-12-31 23:59:59.999999"), $dt_parser2("1970-01-01 00:00:00.000000"), + $dt_parser2z("1969-12-31 23:00:00 Etc/GMT+1"), + $dt_parser2z("1969-12-31 22:59:59.999999 Etc/GMT+1"); + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Repr.in b/yql/essentials/udfs/common/datetime2/test/cases/Repr.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Repr.in diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Repr.sql b/yql/essentials/udfs/common/datetime2/test/cases/Repr.sql new file mode 100644 index 00000000000..d92b0ade4e1 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Repr.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +select + DateTime::Parse("%Y.%m.%d")("2016.08.15"), + DateTime::Split(AddTimezone(DateTime("2017-01-01T10:00:00Z"),"Europe/Moscow")) diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Shift.in b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in new file mode 100644 index 00000000000..7f81c5d0746 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in @@ -0,0 +1,8 @@ +{"ftztimestamp"="2001-11-17T21:20:19.345678,GMT"; }; +{"ftztimestamp"="1970-01-01T11:14:00.000000,GMT"}; +{"ftztimestamp"="2105-12-01T01:08:00.000000,Europe/Moscow"}; +{"ftztimestamp"="2049-06-13T00:00:00.000000,GMT"}; +{"ftztimestamp"="2000-01-31T16:15:00.000000,Europe/Uzhgorod"}; +{"ftztimestamp"="2024-02-29T01:02:03.456789,Europe/Moscow"}; +{"ftztimestamp"="1970-02-01T02:00:00.444123,Europe/Moscow"}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr new file mode 100644 index 00000000000..3915337be3c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Shift.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Shift.sql b/yql/essentials/udfs/common/datetime2/test/cases/Shift.sql new file mode 100644 index 00000000000..b421c558683 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Shift.sql @@ -0,0 +1,22 @@ +SELECT + cast(DateTime::MakeTzTimestamp(DateTime::ShiftYears(tm, 10)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftYears(tm, 10)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftQuarters(tm, 16)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftQuarters(tm, -16)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 0)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 1)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 3)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 11)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 12)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, 123)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -1)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -3)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -11)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -12)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::ShiftMonths(tm, -123)) as String) +from ( + select + cast(ftztimestamp as TzTimestamp) as tm + from Input +); + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in new file mode 100644 index 00000000000..580acf3f863 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in @@ -0,0 +1,49 @@ +{ + "fdate"="1945-05-09"; + "fdatetime"="1945-05-09T00:00:00Z"; + "ftimestamp"="1945-05-09T00:00:00.000000Z"; + "ftzdate"="1945-05-09,Europe/Moscow"; + "ftzdatetime"="1945-05-09T00:00:00,Europe/Moscow"; + "ftztimestamp"="1945-05-09T00:00:00.000000,Europe/Moscow" +}; +{ + "fdate"="1970-01-01"; + "fdatetime"="1970-01-01T00:30:00Z"; + "ftimestamp"="1970-01-01T00:30:00.000000Z"; + "ftzdate"="1970-01-01,Europe/Moscow"; + "ftzdatetime"="1970-01-01T01:00:00,Europe/Moscow"; + "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow" +}; +{ + "fdate"="2018-12-15"; + "fdatetime"="2018-12-15T01:02:03Z"; + "ftimestamp"="2018-12-15T01:02:03.456789Z"; + "ftzdate"="2018-12-15,Europe/Moscow"; + "ftzdatetime"="2018-12-15T01:02:03,Europe/Moscow"; + "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" +}; +{ + "fdate"="2105-12-31"; + "fdatetime"="2105-12-31T23:59:59Z"; + "ftimestamp"="2105-12-31T23:59:59.999999Z"; + "ftzdate"="2105-12-31,Canada/Central"; + "ftzdatetime"="2105-12-31T16:00:00,Canada/Central"; + "ftztimestamp"="2105-12-31T23:00:00.000000,Canada/Central" +}; +{ + "fdate"="2106-01-01"; + "fdatetime"="2106-01-01T00:00:00Z"; + "ftimestamp"="2106-01-01T00:00:00.000000Z"; + "ftzdate"="2106-01-01,Europe/Moscow"; + "ftzdatetime"="2106-01-01T01:00:00,Europe/Moscow"; + "ftztimestamp"="2106-01-01T05:00:00.000000,Europe/Moscow" +}; +{ + "fdate"="2117-11-07"; + "fdatetime"="2117-11-07T00:00:00Z"; + "ftimestamp"="2117-11-07T00:00:00.000000Z"; + "ftzdate"="2117-11-07,Europe/Moscow"; + "ftzdatetime"="2117-11-07T00:00:00,Europe/Moscow"; + "ftztimestamp"="2117-11-07T00:00:00.000000,Europe/Moscow" +}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr new file mode 100644 index 00000000000..876e4f8a19d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.in.attr @@ -0,0 +1,52 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql new file mode 100644 index 00000000000..9a8c08a8db1 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake.sql @@ -0,0 +1,18 @@ +/* syntax version 1 */ +select + DateTime::MakeDate(DateTime::Split(`date`)) as rdate, + DateTime::MakeDatetime(DateTime::Split(`datetime`)) as rdatetime, + DateTime::MakeTimestamp(DateTime::Split(`timestamp`)) as rtimestamp, + DateTime::MakeTzDate(DateTime::Split(`tzdate`)) as rtzdate, + DateTime::MakeTzDatetime(DateTime::Split(`tzdatetime`)) as rtzdatetime, + DateTime::MakeTzTimestamp(DateTime::Split(`tztimestamp`)) as rtztimestamp +from ( + select + cast(fdate as Date) as `date`, + cast(fdatetime as Datetime) as `datetime`, + cast(ftimestamp as Timestamp) as `timestamp`, + cast(ftzdate as TzDate) as `tzdate`, + cast(ftzdatetime as TzDatetime) as `tzdatetime`, + cast(ftztimestamp as TzTimestamp) as `tztimestamp` + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg new file mode 100644 index 00000000000..b5a6eac7ad4 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.cfg @@ -0,0 +1 @@ +in plato.Input SplitMake1969.in diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in new file mode 100644 index 00000000000..70a53282f38 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in @@ -0,0 +1,8 @@ +{ + "fdate"="1969-12-31"; + "fdatetime"="1969-12-31T23:00:00Z"; + "ftimestamp"="1969-12-31T23:00:00.000000Z"; + "ftzdate"="1969-12-31,Canada/Central"; + "ftzdatetime"="1969-12-31T16:00:00,Canada/Central"; + "ftztimestamp"="1969-12-31T23:00:00.000000,Canada/Central" +};
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr new file mode 100644 index 00000000000..876e4f8a19d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.in.attr @@ -0,0 +1,52 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql new file mode 100644 index 00000000000..9a8c08a8db1 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/SplitMake1969.sql @@ -0,0 +1,18 @@ +/* syntax version 1 */ +select + DateTime::MakeDate(DateTime::Split(`date`)) as rdate, + DateTime::MakeDatetime(DateTime::Split(`datetime`)) as rdatetime, + DateTime::MakeTimestamp(DateTime::Split(`timestamp`)) as rtimestamp, + DateTime::MakeTzDate(DateTime::Split(`tzdate`)) as rtzdate, + DateTime::MakeTzDatetime(DateTime::Split(`tzdatetime`)) as rtzdatetime, + DateTime::MakeTzTimestamp(DateTime::Split(`tztimestamp`)) as rtztimestamp +from ( + select + cast(fdate as Date) as `date`, + cast(fdatetime as Datetime) as `datetime`, + cast(ftimestamp as Timestamp) as `timestamp`, + cast(ftzdate as TzDate) as `tzdate`, + cast(ftzdatetime as TzDatetime) as `tzdatetime`, + cast(ftztimestamp as TzTimestamp) as `tztimestamp` + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in new file mode 100644 index 00000000000..f482585e720 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in @@ -0,0 +1,15 @@ +{ + "ftztimestamp"="1970-01-01T05:00:00.000000,Europe/Moscow" +}; +{ + "ftztimestamp"="2018-12-15T01:02:03.456789,Europe/Moscow" +}; +{ + "ftztimestamp"="2105-12-31T16:23:45.000000,GMT" +}; +{ + "ftztimestamp"="2106-01-01T01:00:00.000000,Europe/Moscow" +}; +{ + "ftztimestamp"="2019-07-24T12:00:00,Europe/Moscow" +}; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr new file mode 100644 index 00000000000..2cc4f8c0d68 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql new file mode 100644 index 00000000000..201db382300 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf.sql @@ -0,0 +1,21 @@ +/* syntax version 1 */ +$format = DateTime::Format("%Y-%m-%d %H:%M:%S %Z"); + +select + $format(DateTime::StartOfYear(`tztimestamp`)), + $format(DateTime::StartOfQuarter(`tztimestamp`)), + $format(DateTime::StartOfMonth(`tztimestamp`)), + $format(DateTime::StartOfWeek(`tztimestamp`)), + $format(DateTime::StartOfDay(`tztimestamp`)), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT13H"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT4H"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT15M"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT20S"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT7S"))), + DateTime::TimeOfDay(`tztimestamp`), + $format(DateTime::EndOfMonth(`tztimestamp`)), +from ( + select + cast(ftztimestamp as TzTimestamp) as `tztimestamp` + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg new file mode 100644 index 00000000000..d012f94fa22 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.cfg @@ -0,0 +1 @@ +in plato.Input StartOf1969.in diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in new file mode 100644 index 00000000000..1711aa38134 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in @@ -0,0 +1,3 @@ +{ + "ftztimestamp"="1969-12-31T23:00:00.000000,Canada/Central" +};
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr new file mode 100644 index 00000000000..2cc4f8c0d68 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql new file mode 100644 index 00000000000..81fad126328 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/StartOf1969.sql @@ -0,0 +1,20 @@ +/* syntax version 1 */ +$format = DateTime::Format("%Y-%m-%d %H:%M:%S %Z"); + +select + $format(DateTime::StartOfYear(`tztimestamp`)), + $format(DateTime::StartOfQuarter(`tztimestamp`)), + $format(DateTime::StartOfMonth(`tztimestamp`)), + $format(DateTime::StartOfWeek(`tztimestamp`)), + $format(DateTime::StartOfDay(`tztimestamp`)), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT13H"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT4H"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT15M"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT20S"))), + $format(DateTime::StartOf(`tztimestamp`, Interval("PT7S"))), + DateTime::TimeOfDay(`tztimestamp`) +from ( + select + cast(ftztimestamp as TzTimestamp) as `tztimestamp` + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/To.in b/yql/essentials/udfs/common/datetime2/test/cases/To.in new file mode 100644 index 00000000000..03be7f7e67e --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/To.in @@ -0,0 +1,11 @@ +{ + "fdate"="2018-11-22"; + "fdatetime"="2018-11-22T21:20:19Z"; + "ftimestamp"="2018-11-22T21:20:19.345678Z"; + "finterval"="P2DT10H"; + "ftzdate"="2018-11-22,Europe/Moscow"; + "ftzdatetime"="2018-11-22T21:20:19,Europe/Moscow"; + "ftztimestamp"="2018-11-22T21:20:19.345678,Europe/Moscow"; + "finterval_1day"="P1D"; +}; + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/To.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/To.in.attr new file mode 100644 index 00000000000..cb97c1895c5 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/To.in.attr @@ -0,0 +1,66 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "fdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "fdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "finterval"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdate"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftzdatetime"; + [ + "DataType"; + "String" + ] + ]; + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + [ + "finterval_1day"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/To.sql b/yql/essentials/udfs/common/datetime2/test/cases/To.sql new file mode 100644 index 00000000000..53a1289b60c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/To.sql @@ -0,0 +1,44 @@ +/* syntax version 1 */ +select + DateTime::ToDays(`interval`) as interval_to_days, + DateTime::ToHours(`interval`) as interval_to_hours, + DateTime::ToMinutes(`interval`) as interval_to_minutes, + DateTime::ToSeconds(`interval`) as interval_to_seconds, + DateTime::ToMilliseconds(`interval`) as interval_to_msec, + DateTime::ToMicroseconds(`interval`) as interval_to_usec, + + DateTime::ToSeconds(`date`) as date_to_seconds, + DateTime::ToSeconds(`datetime`) as datetime_to_seconds, + DateTime::ToSeconds(`timestamp`) as timestamp_to_seconds, + DateTime::ToSeconds(`tzdate`) as tzdate_to_seconds, + DateTime::ToSeconds(`tzdatetime`) as tzdatetime_to_seconds, + DateTime::ToSeconds(`tztimestamp`) as tztimestamp_to_seconds, + + DateTime::ToMilliseconds(`date`) as date_to_msec, + DateTime::ToMilliseconds(`datetime`) as datetime_to_msec, + DateTime::ToMilliseconds(`timestamp`) as timestamp_to_msec, + DateTime::ToMilliseconds(`tzdate`) as tzdate_to_msec, + DateTime::ToMilliseconds(`tzdatetime`) as tzdatetime_to_msec, + DateTime::ToMilliseconds(`tztimestamp`) as tztimestamp_to_msec, + + DateTime::ToMicroseconds(`date`) as date_to_usec, + DateTime::ToMicroseconds(`datetime`) as datetime_to_usec, + DateTime::ToMicroseconds(`timestamp`) as timestamp_to_usec, + DateTime::ToMicroseconds(`tzdate`) as tzdate_to_usec, + DateTime::ToMicroseconds(`tzdatetime`) as tzdatetime_to_usec, + DateTime::ToMicroseconds(`tztimestamp`) as tztimestamp_to_usec, + + /* Overflow test */ + DateTime::ToDays(`negative_1d`) as negative_1d, +from ( + select + cast(fdate as Date) as `date`, + cast(fdatetime as Datetime) as `datetime`, + cast(ftimestamp as Timestamp) as `timestamp`, + cast(finterval as Interval) as `interval`, + cast(ftzdate as TzDate) as `tzdate`, + cast(ftzdatetime as TzDatetime) as `tzdatetime`, + cast(ftztimestamp as TzTimestamp) as `tztimestamp`, + -cast(finterval_1day as Interval) as `negative_1d`, + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql b/yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql new file mode 100644 index 00000000000..383e2d831e9 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/TzToDate.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +select +cast(DateTime::MakeDate(TzDatetime("2000-01-01T12:00:00,Europe/Moscow") ) as String), +cast(DateTime::MakeTzDate(TzDatetime("2000-01-01T12:00:00,Europe/Moscow") ) as String), + +cast(DateTime::MakeDate(TzDatetime("2000-01-01T00:00:00,Europe/Moscow") ) as String), +cast(DateTime::MakeTzDate(TzDatetime("2000-01-01T00:00:00,Europe/Moscow") ) as String); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Update.in b/yql/essentials/udfs/common/datetime2/test/cases/Update.in new file mode 100644 index 00000000000..07ac5350517 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Update.in @@ -0,0 +1 @@ +{ "ftztimestamp"="2001-12-01T21:20:19.345678,GMT"; }; diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr b/yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr new file mode 100644 index 00000000000..3915337be3c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Update.in.attr @@ -0,0 +1,17 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "ftztimestamp"; + [ + "DataType"; + "String" + ] + ]; + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test/cases/Update.sql b/yql/essentials/udfs/common/datetime2/test/cases/Update.sql new file mode 100644 index 00000000000..59221221b37 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/Update.sql @@ -0,0 +1,25 @@ +/* syntax version 1 */ +SELECT + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2005)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2200 as Year)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, NULL, 7)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 13 as Month)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, NULL, NULL, 20)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 32 as Day)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2018, 2, 30)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, NULL, NULL, NULL, 11, 10, 9)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 11 as Hour)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 24 as Hour)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 10 as Minute)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 60 as Minute)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 9 as Second)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 60 as Second)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 123456 as Microsecond)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 2000000 as Microsecond)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 100 as TimezoneId)) as String), + cast(DateTime::MakeTzTimestamp(DateTime::Update(tm, 1000 as TimezoneId)) as String) +from ( + select + cast(ftztimestamp as TzTimestamp) as tm + from Input +); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql b/yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql new file mode 100644 index 00000000000..b756270ef7d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/UpdateTz.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +pragma warning("disable","4510"); +select cast(DateTime::MakeDatetime( + DateTime::Update(Datetime("2000-01-01T00:00:00Z"), Yql::TimezoneId("Europe/Moscow") as TimezoneId) +) as string); + +select cast(DateTime::MakeDatetime( + DateTime::Update(Datetime("2000-01-01T00:00:00Z"), "Europe/Moscow" as Timezone) +) as string); diff --git a/yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql b/yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql new file mode 100644 index 00000000000..92d2660425b --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/cases/yql-14977.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +$parse = DateTime::Parse("%B/%d/%Y"); +$format = DateTime::Format("%b/%d/%Y"); + +select $format($parse("mAy/15/2022")); + diff --git a/yql/essentials/udfs/common/datetime2/test/ya.make b/yql/essentials/udfs/common/datetime2/test/ya.make new file mode 100644 index 00000000000..78f345b118c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/datetime2) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json new file mode 100644 index 00000000000..f83bc10798d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/result.json @@ -0,0 +1,7 @@ +{ + "test.test[SplitMake]": [ + { + "uri": "file://test.test_SplitMake_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt new file mode 100644 index 00000000000..1adffe191a5 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/canondata/test.test_SplitMake_/results.txt @@ -0,0 +1,486 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "dd"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "sdd"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "ddt"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "sddt"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "dts"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "sdts"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "dtd"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "sdtd"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "dtdt"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "sdtdt"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "dtts"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "sdtts"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "tsd"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "stsd"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "tsdt"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "stsdt"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "tsts"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "ststs"; + [ + "DataType"; + "Timestamp64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "-53375809"; + "-53375809"; + "-53375809"; + "-53375809"; + "-53375809"; + "-53375809"; + "-4611669897600"; + "-4611669897600"; + "-4611669897600"; + "-4611669897600"; + "-4611669897600"; + "-4611669897600"; + "-4611669897600000000"; + "-4611669897600000000"; + "-4611669897600000000"; + "-4611669897600000000"; + "-4611669897600000000"; + "-4611669897600000000" + ]; + [ + "-719163"; + "-719163"; + "-719163"; + "-719163"; + "-719163"; + "-719163"; + "-62135683200"; + "-62135683200"; + "-62135596801"; + "-62135596801"; + "-62135596801"; + "-62135596801"; + "-62135683200000000"; + "-62135683200000000"; + "-62135596801000000"; + "-62135596801000000"; + "-62135596800000001"; + "-62135596800000001" + ]; + [ + "-719162"; + "-719162"; + "-719162"; + "-719162"; + "-719162"; + "-719162"; + "-62135596800"; + "-62135596800"; + "-62135596800"; + "-62135596800"; + "-62135596800"; + "-62135596800"; + "-62135596800000000"; + "-62135596800000000"; + "-62135596800000000"; + "-62135596800000000"; + "-62135596800000000"; + "-62135596800000000" + ]; + [ + "-1"; + "-1"; + "-1"; + "-1"; + "-1"; + "-1"; + "-86400"; + "-86400"; + "-1"; + "-1"; + "-1"; + "-1"; + "-86400000000"; + "-86400000000"; + "-1000000"; + "-1000000"; + "-1"; + "-1" + ]; + [ + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0" + ]; + [ + "1"; + "1"; + "0"; + "0"; + "0"; + "0"; + "86400"; + "86400"; + "1"; + "1"; + "0"; + "0"; + "86400000000"; + "86400000000"; + "1000000"; + "1000000"; + "1"; + "1" + ]; + [ + "53375807"; + "53375807"; + "53375807"; + "53375807"; + "53375807"; + "53375807"; + "4611669724800"; + "4611669724800"; + "4611669811199"; + "4611669811199"; + "4611669811199"; + "4611669811199"; + "4611669724800000000"; + "4611669724800000000"; + "4611669811199000000"; + "4611669811199000000"; + "4611669811199999999"; + "4611669811199999999" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "dd"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "sdd"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "ddt"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "sddt"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "dts"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "sdts"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "dtd"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "sdtd"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "dtdt"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "sdtdt"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "dtts"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "sdtts"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "tsd"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "stsd"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "tsdt"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "stsdt"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "tsts"; + [ + "DataType"; + "Timestamp64" + ] + ]; + [ + "ststs"; + [ + "DataType"; + "Timestamp64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0"; + "0" + ]; + [ + "1"; + "1"; + "0"; + "0"; + "0"; + "0"; + "86400"; + "86400"; + "1"; + "1"; + "0"; + "0"; + "86400000000"; + "86400000000"; + "1000000"; + "1000000"; + "1"; + "1" + ]; + [ + "49672"; + "49672"; + "49672"; + "49672"; + "49672"; + "49672"; + "4291660800"; + "4291660800"; + "4291747199"; + "4291747199"; + "4291747199"; + "4291747199"; + "4291660800000000"; + "4291660800000000"; + "4291747199000000"; + "4291747199000000"; + "4291747199999999"; + "4291747199999999" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql new file mode 100644 index 00000000000..4ae3a8962cf --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/SplitMake.sql @@ -0,0 +1,27 @@ +/* syntax version 1 */ + +select + DateTime::MakeDate32(d32) as dd, DateTime::MakeDate32(DateTime::Split(d32)) as sdd, + DateTime::MakeDate32(dt64) as ddt, DateTime::MakeDate32(DateTime::Split(dt64)) as sddt, + DateTime::MakeDate32(ts64) as dts, DateTime::MakeDate32(DateTime::Split(ts64)) as sdts, + DateTime::MakeDatetime64(d32) as dtd, DateTime::MakeDatetime64(DateTime::Split(d32)) as sdtd, + DateTime::MakeDatetime64(dt64) as dtdt, DateTime::MakeDatetime64(DateTime::Split(dt64)) as sdtdt, + DateTime::MakeDatetime64(ts64) as dtts, DateTime::MakeDatetime64(DateTime::Split(ts64)) as sdtts, + DateTime::MakeTimestamp64(d32) as tsd, DateTime::MakeTimestamp64(DateTime::Split(d32)) as stsd, + DateTime::MakeTimestamp64(dt64) as tsdt, DateTime::MakeTimestamp64(DateTime::Split(dt64)) as stsdt, + DateTime::MakeTimestamp64(ts64) as tsts, DateTime::MakeTimestamp64(DateTime::Split(ts64)) as ststs +from Input +order by d32; + +select + DateTime::MakeDate32(d) as dd, DateTime::MakeDate32(DateTime::Split(d)) as sdd, + DateTime::MakeDate32(dt) as ddt, DateTime::MakeDate32(DateTime::Split(dt)) as sddt, + DateTime::MakeDate32(ts) as dts, DateTime::MakeDate32(DateTime::Split(ts)) as sdts, + DateTime::MakeDatetime64(d) as dtd, DateTime::MakeDatetime64(DateTime::Split(d)) as sdtd, + DateTime::MakeDatetime64(dt) as dtdt, DateTime::MakeDatetime64(DateTime::Split(dt)) as sdtdt, + DateTime::MakeDatetime64(ts) as dtts, DateTime::MakeDatetime64(DateTime::Split(ts)) as sdtts, + DateTime::MakeTimestamp64(d) as tsd, DateTime::MakeTimestamp64(DateTime::Split(d)) as stsd, + DateTime::MakeTimestamp64(dt) as tsdt, DateTime::MakeTimestamp64(DateTime::Split(dt)) as stsdt, + DateTime::MakeTimestamp64(ts) as tsts, DateTime::MakeTimestamp64(DateTime::Split(ts)) as ststs +from InputNarrow +order by d; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg new file mode 100644 index 00000000000..864fb2ddcb4 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/default.cfg @@ -0,0 +1,4 @@ +in plato.Input input.txt +in plato.InputTz input_tz.txt +in plato.InputNarrow input_narrow.txt +in plato.Tz tz.txt diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt new file mode 100644 index 00000000000..f24562bbc6d --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt @@ -0,0 +1,35 @@ +{ + "d32"=-53375809; + "dt64"=-4611669897600; + "ts64"=-4611669897600000000; +}; +{ + "d32"=-719163; + "dt64"=-62135596801; + "ts64"=-62135596800000001; +}; +{ + "d32"=-719162; + "dt64"=-62135596800; + "ts64"=-62135596800000000; +}; +{ + "d32"=-1; + "dt64"=-1; + "ts64"=-1; +}; +{ + "d32"=0; + "dt64"=0; + "ts64"=0; +}; +{ + "d32"=1; + "dt64"=1; + "ts64"=1; +}; +{ + "d32"=53375807; + "dt64"=4611669811199; + "ts64"=4611669811199999999; +}; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr new file mode 100644 index 00000000000..773be61fbea --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input.txt.attr @@ -0,0 +1,31 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "d32"; + [ + "DataType"; + "Date32" + ] + ]; + [ + "dt64"; + [ + "DataType"; + "Datetime64" + ] + ]; + [ + "ts64"; + [ + "DataType"; + "Timestamp64" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt new file mode 100644 index 00000000000..d14b15511ad --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt @@ -0,0 +1,15 @@ +{ + "d"=0u; + "dt"=0u; + "ts"=0u; +}; +{ + "d"=1u; + "dt"=1u; + "ts"=1u; +}; +{ + "d"=49672u; + "dt"=4291747199u; + "ts"=4291747199999999u; +}; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr new file mode 100644 index 00000000000..ba95961d80b --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_narrow.txt.attr @@ -0,0 +1,31 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "d"; + [ + "DataType"; + "Date" + ] + ]; + [ + "dt"; + [ + "DataType"; + "Datetime" + ] + ]; + [ + "ts"; + [ + "DataType"; + "Timestamp" + ] + ] + ] + ] + } +} + diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt new file mode 100644 index 00000000000..e113d3564aa --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt @@ -0,0 +1,48 @@ +{ + "rn"=-144169; + "d32"="-144169-1-1"; + "dt64"="-144169-1-1T0:0:0"; + "ts64"="-144169-1-1T0:0:0"; +}; +{ + "rn"=-1; + "d32"="-1-1-1"; + "dt64"="-1-1-1T23:59:59"; + "ts64"="-1-1-1T23:59:59.999999"; +}; +{ + "rn"=1; + "d32"="1-1-1"; + "dt64"="1-1-1T0:0:0"; + "ts64"="1-1-1T0:0:0"; +}; +{ + "rn"=1969; + "d32"="1969-12-31"; + "dt64"="1969-12-31T23:59:59"; + "ts64"="1969-12-31T23:59:59.999999"; +}; +{ + "rn"=1970; + "d32"="1970-1-1"; + "dt64"="1970-1-1T0:0:0"; + "ts64"="1970-1-1T0:0:0"; +}; +{ + "rn"=2024; + "d32"="2024-7-1"; + "dt64"="2024-7-1T0:0:0"; + "ts64"="2024-7-1T0:0:0"; +}; +{ + "rn"=2106; + "d32"="2106-1-1"; + "dt64"="2106-1-1T0:0:0"; + "ts64"="2106-1-1T0:0:0"; +}; +{ + "rn"=148107; + "d32"="148107-12-31"; + "dt64"="148107-12-31T23:59:59"; + "ts64"="148107-12-31T23:59:59.999999"; +}; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr new file mode 100644 index 00000000000..59fc7869772 --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/input_tz.txt.attr @@ -0,0 +1,37 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "rn"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "d32"; + [ + "DataType"; + "String" + ] + ]; + [ + "dt64"; + [ + "DataType"; + "String" + ] + ]; + [ + "ts64"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt new file mode 100644 index 00000000000..77d1cf3174f --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt @@ -0,0 +1,15 @@ +{ + "tz"="GMT"; +}; +{ + "tz"="UTC"; +}; +{ + "tz"="Europe/London"; +}; +{ + "tz"="Europe/Moscow"; +}; +{ + "tz"="Atlantic/Azores"; +}; diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr new file mode 100644 index 00000000000..847643ec33f --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/cases/tz.txt.attr @@ -0,0 +1,16 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "tz"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/datetime2/test_bigdates/ya.make b/yql/essentials/udfs/common/datetime2/test_bigdates/ya.make new file mode 100644 index 00000000000..78f345b118c --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/test_bigdates/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/datetime2) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/datetime2/ya.make b/yql/essentials/udfs/common/datetime2/ya.make new file mode 100644 index 00000000000..cc8b450369f --- /dev/null +++ b/yql/essentials/udfs/common/datetime2/ya.make @@ -0,0 +1,30 @@ +IF (YQL_PACKAGED) + PACKAGE() + + FROM_SANDBOX(FILE 7319895543 OUT_NOAUTO libdatetime2_udf.so) + + END() +ELSE() +YQL_UDF_CONTRIB(datetime2_udf) + YQL_ABI_VERSION( + 2 + 40 + 0 + ) + SRCS( + datetime_udf.cpp + ) + PEERDIR( + util/draft + yql/essentials/public/udf/arrow + yql/essentials/minikql + yql/essentials/minikql/datetime + yql/essentials/public/udf/tz + ) + END() +ENDIF() + +RECURSE_FOR_TESTS( + test + test_bigdates +) diff --git a/yql/essentials/udfs/common/digest/digest_udf.cpp b/yql/essentials/udfs/common/digest/digest_udf.cpp new file mode 100644 index 00000000000..491fe7a66ca --- /dev/null +++ b/yql/essentials/udfs/common/digest/digest_udf.cpp @@ -0,0 +1,410 @@ +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +#include <util/digest/murmur.h> +#include <util/digest/city.h> +#include <util/digest/numeric.h> +#include <util/digest/fnv.h> + +#include <library/cpp/digest/argonish/argon2.h> +#include <library/cpp/digest/argonish/blake2b.h> +#include <library/cpp/digest/crc32c/crc32c.h> +#include <library/cpp/digest/md5/md5.h> +#include <library/cpp/digest/murmur/murmur.h> +#include <library/cpp/digest/old_crc/crc.h> +#include <library/cpp/digest/sfh/sfh.h> + +#include <contrib/libs/highwayhash/highwayhash/c_bindings.h> +#include <contrib/libs/highwayhash/highwayhash/sip_hash.h> + +#include <contrib/libs/farmhash/farmhash.h> +#include <contrib/libs/xxhash/xxhash.h> + +#include <openssl/sha.h> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = Crc32c(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TCrc64, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui64 hash = crc64(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TFnv32, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = FnvHash<ui32>(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TFnv64, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui64 hash = FnvHash<ui64>(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TMurMurHash, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui64 hash = MurmurHash<ui64>(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TMurMurHash32, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = MurmurHash<ui32>(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TMurMurHash2A, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui64 hash = TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value(); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TMurMurHash2A32, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value(); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TCityHash, ui64(TAutoMap<char*>, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui64 hash; + if (args[1]) { + hash = CityHash64WithSeed(inputRef.Data(), inputRef.Size(), args[1].Get<ui64>()); + } else { + hash = CityHash64(inputRef.Data(), inputRef.Size()); + } + return TUnboxedValuePod(hash); + } + + using TUi64Pair = NUdf::TTuple<ui64, ui64>; + + class TCityHash128: public TBoxedValue { + public: + static TStringRef Name() { + static auto name = TStringRef::Of("CityHash128"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TCityHash128); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + uint128 hash = CityHash128(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(hash.first); + items[1] = TUnboxedValuePod(hash.second); + return val; + } + }; + + SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 input = args[0].Get<ui64>(); + ui64 hash = (ui64)NumericHash(input); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + MD5 md5; + const TString& hash = md5.Calc(inputRef); + return valueBuilder->NewString(hash); + } + + SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + MD5 md5; + const TString& hash = md5.CalcRaw(inputRef); + return valueBuilder->NewString(hash); + } + + SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef())); + } + + SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) { + const static ui32 outSize = 32; + const static NArgonish::TArgon2Factory afactory; + const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create( + NArgonish::EArgon2Type::Argon2d, 1, 32, 1); + + const TStringRef inputRef = args[0].AsStringRef(); + const TStringRef saltRef = args[1].AsStringRef(); + ui8 out[outSize]; + argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(), + reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(), + out, outSize); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); + } + + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) { + const static ui32 outSize = 32; + const static NArgonish::TBlake2BFactory bfactory; + const TStringRef inputRef = args[0].AsStringRef(); + + THolder<NArgonish::IBlake2Base> blake2b; + if (args[1]) { + const TStringRef keyRef = args[1].AsStringRef(); + if (keyRef.Size() == 0) { + blake2b = bfactory.Create(outSize); + } else { + blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size()); + } + } else { + blake2b = bfactory.Create(outSize); + } + + ui8 out[outSize]; + blake2b->Update(inputRef.Data(), inputRef.Size()); + blake2b->Final(out, outSize); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); + } + + SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) { + using namespace highwayhash; + Y_UNUSED(valueBuilder); + const TStringRef inputRef = args[2].AsStringRef(); + const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()}; + ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) { + using namespace highwayhash; + Y_UNUSED(valueBuilder); + const TStringRef inputRef = args[4].AsStringRef(); + const uint64_t key[4] = { + args[0].Get<ui64>(), + args[1].Get<ui64>(), + args[2].Get<ui64>(), + args[3].Get<ui64>()}; + ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 input = args[0].Get<ui64>(); + ui64 hash = util::Fingerprint(input); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 low = args[0].Get<ui64>(); + ui64 high = args[1].Get<ui64>(); + ui64 hash = util::Fingerprint(util::Uint128(low, high)); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(ui32(hash)); + } + + SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(ui64(hash)); + } + + class TFarmHashFingerprint128: public TBoxedValue { + public: + static TStringRef Name() { + static auto name = TStringRef::Of("FarmHashFingerprint128"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TFarmHashFingerprint128); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first)); + items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second)); + return val; + } + }; + + SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + SHA_CTX sha; + SHA1_Init(&sha); + SHA1_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA_DIGEST_LENGTH]; + SHA1_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); + } + + SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + SHA256_CTX sha; + SHA256_Init(&sha); + SHA256_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA256_DIGEST_LENGTH]; + SHA256_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); + } + + SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 x = args[0].Get<ui64>(); + x ^= 0x4CF2D2BAAE6DA887ULL; + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return TUnboxedValuePod(x); + } + + SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); + } + + class TXXH3_128: public TBoxedValue { + public: + static TStringRef Name() { + static auto name = TStringRef::Of("XXH3_128"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TXXH3_128); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(ui64(hash.low64)); + items[1] = TUnboxedValuePod(ui64(hash.high64)); + return val; + } + }; + + SIMPLE_MODULE(TDigestModule, + TCrc32c, + TCrc64, + TFnv32, + TFnv64, + TMurMurHash, + TMurMurHash32, + TMurMurHash2A, + TMurMurHash2A32, + TCityHash, + TCityHash128, + TNumericHash, + TMd5Hex, + TMd5Raw, + TMd5HalfMix, + TArgon2, + TBlake2B, + TSipHash, + THighwayHash, + TFarmHashFingerprint, + TFarmHashFingerprint2, + TFarmHashFingerprint32, + TFarmHashFingerprint64, + TFarmHashFingerprint128, + TSuperFastHash, + TSha1, + TSha256, + TIntHash64, + TXXH3, + TXXH3_128 + ) + +} + +REGISTER_MODULES(TDigestModule) diff --git a/yql/essentials/udfs/common/digest/test/canondata/result.json b/yql/essentials/udfs/common/digest/test/canondata/result.json new file mode 100644 index 00000000000..fb6112fc5bc --- /dev/null +++ b/yql/essentials/udfs/common/digest/test/canondata/result.json @@ -0,0 +1,7 @@ +{ + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..f5b7b0fe785 --- /dev/null +++ b/yql/essentials/udfs/common/digest/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,506 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "crc32c"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "crc64"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "fnv32"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "fnv64"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "murmur"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "murmur32"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "murmur2a"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "murmur2a32"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "city"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "cityWithSeed"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "city128"; + [ + "TupleType"; + [ + [ + "DataType"; + "Uint64" + ]; + [ + "DataType"; + "Uint64" + ] + ] + ] + ]; + [ + "numeric"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "md5hex"; + [ + "DataType"; + "String" + ] + ]; + [ + "md5raw"; + [ + "DataType"; + "String" + ] + ]; + [ + "md5halfmix"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "argon2"; + [ + "DataType"; + "String" + ] + ]; + [ + "blake2b"; + [ + "DataType"; + "String" + ] + ]; + [ + "blake2bunkeyed"; + [ + "DataType"; + "String" + ] + ]; + [ + "blake2bkeyed"; + [ + "DataType"; + "String" + ] + ]; + [ + "sip"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "highway"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "farmfing"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "farmfing2"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "farmfing32"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "farmfing64"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "farmfing128"; + [ + "TupleType"; + [ + [ + "DataType"; + "Uint64" + ]; + [ + "DataType"; + "Uint64" + ] + ] + ] + ]; + [ + "sfh"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "sha1"; + [ + "DataType"; + "String" + ] + ]; + [ + "sha256"; + [ + "DataType"; + "String" + ] + ]; + [ + "inthash64"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "xxhash"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "xxhash128"; + [ + "TupleType"; + [ + [ + "DataType"; + "Uint64" + ]; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "2432014819"; + "17728638330159804320"; + "84696366"; + "12638153115695167470"; + "746762829127501960"; + "1228156847"; + "5654386555365545660"; + "1466639702"; + "11413460447292444913"; + "684814019408231284"; + [ + "125830901799957853"; + "7569582475522398857" + ]; + "2320827452992767577"; + "c4ca4238a0b923820dcc509a6f75849b"; + [ + "xMpCOKC5I4INzFCab3WEmw==" + ]; + "14973526590288695970"; + [ + "vjqkEHgWMA9RlnF/Dlkiqoxsc4vOI5/ULLMuB4GuMME=" + ]; + [ + "ks31eMRwhaWZIlbw3Pl9Cxnx8cneTV/jDDrOYZG25ds=" + ]; + [ + "ks31eMRwhaWZIlbw3Pl9Cxnx8cneTV/jDDrOYZG25ds=" + ]; + [ + "j04DEIdeE9LFquAFu4i7Q2YAyca0FDA3J8r1atHQF58=" + ]; + "1602385837086584249"; + "5348322356177288628"; + "10105606910506535461"; + [ + "2871837063151915165" + ]; + "2552028077"; + "9304157803607034849"; + [ + "5308794677172709703"; + "4629402678501957187" + ]; + "3927678806"; + [ + "NWoZK3kTsExUV00Ywo1G5jlUKKs=" + ]; + [ + "a4ayc/80/OGda4BO/1o/V0etpOqiLx1JwB5S3beHW0s=" + ]; + "10577349846663553072"; + "7335560060985733464"; + [ + "7335560060985733464"; + "16085986526811796301" + ] + ]; + [ + "2208655895"; + "4363576337578352290"; + "84696365"; + "12638153115695167469"; + "602994839685422785"; + "772897149"; + "16472888669357673283"; + "2351653828"; + "17472595041006102391"; + "8016373356242392939"; + [ + "13426016195983081906"; + "17051066397148972982" + ]; + "6174653592142994962"; + "c81e728d9d4c2f636f067f89cc14862c"; + [ + "yB5yjZ1ML2NvBn+JzBSGLA==" + ]; + "5861621074593582340"; + [ + "ixHhYBlEBiZ446+zgg0hd5Eocp+xgMpVyaLfhjJqxV8=" + ]; + [ + "MSN823muHfp/+4fN5+qKgDUtMA7lrHWKbN3RnWcZJew=" + ]; + [ + "MSN823muHfp/+4fN5+qKgDUtMA7lrHWKbN3RnWcZJew=" + ]; + [ + "NBaDJdCQRJye6B+WJdI/OX/mIxQk3AgEHm4hM9qmLu8=" + ]; + "8789615690042391357"; + "17360383380415224727"; + "970024650806116628"; + [ + "5310514165246837948" + ]; + "3946386795"; + "6920640749119438759"; + [ + "2374933113219823160"; + "4520448414947048260" + ]; + "2190005025"; + [ + "2kuSN7rMzfGcB2DKt67EqDWQELA=" + ]; + [ + "1HNeOiZeFu7gP1lxi5tdAwGcB9i2xR+Q2jpmbuwTqzU=" + ]; + "18198135717204167749"; + "18128579709034668820"; + [ + "18128579709034668820"; + "14642767882163838550" + ] + ]; + [ + "1909385492"; + "15694391695266948643"; + "84696364"; + "12638153115695167468"; + "15180167692696242062"; + "2292183779"; + "6734453432295282525"; + "2128480519"; + "11275350073939794026"; + "1669883546352889947"; + [ + "15168680716710346397"; + "13490672353767795293" + ]; + "13529992206878991808"; + "eccbc87e4b5ce2fe28308fd9f2a7baf3"; + [ + "7MvIfktc4v4oMI/Z8qe68w==" + ]; + "13401401932333664167"; + [ + "kRpsyYine3lH5Es1XuUlgXRBY6HLMD001QCPKqdjG7w=" + ]; + [ + "WBNIM3sPPhSGIBc9qqX5TQDYgXBdy/Cqg+/aumHS7eE=" + ]; + [ + "WBNIM3sPPhSGIBc9qqX5TQDYgXBdy/Cqg+/aumHS7eE=" + ]; + [ + "BQK9GdKOJxlDH7wMrQ1gHOPyviB18JDuIg2i0JFwkL4=" + ]; + "2874396847657928730"; + "6919389025651885183"; + "14522245769643814311"; + [ + "9221007817131939736" + ]; + "1678875853"; + "11991475895402502921"; + [ + "9295019677823677360"; + "3668607519738437716" + ]; + "2634537178"; + [ + "d95o2uzYI7q7tY7bHI4U1xBug7s=" + ]; + [ + "TgdAhWK+24tgzgXB3s/jrRa3IjCWfeAfZAt+Rym0n84=" + ]; + "9624464864560415994"; + "8296998437054084336"; + [ + "8296998437054084336"; + "6903416366538802245" + ] + ]; + [ + "0"; + "18446744073709551615"; + "2166136261"; + "14695981039346656037"; + "0"; + "0"; + "0"; + "0"; + "11160318154034397263"; + "12607432989128692740"; + [ + "18085479540095642321"; + "11079402499652051579" + ]; + "7654268697807496793"; + "d41d8cd98f00b204e9800998ecf8427e"; + [ + "1B2M2Y8AsgTpgAmY7PhCfg==" + ]; + "7203772011789518145"; + [ + "sW8qMzZE+95eqaAsJqn4Ne3l7QwOAklHIexxpYMRsPo=" + ]; + [ + "DldRwCblQ7Loqy6wYJnaodHl30d3j3eH+qtFzfEv46g=" + ]; + [ + "DldRwCblQ7Loqy6wYJnaodHl30d3j3eH+qtFzfEv46g=" + ]; + [ + "gtMZwEiUXpGivpA1k/ith+ZulxJ3iI6tC6aVNsDsnk0=" + ]; + "16558958598623574096"; + "9185752494698444901"; + "0"; + #; + "3696677242"; + "11160318154034397263"; + [ + "4463240938071824939"; + "4374473821787594281" + ]; + "0"; + [ + "2jmj7l5rSw0yVb/vlWAYkK/YBwk=" + ]; + [ + "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=" + ]; + "4761183170873013810"; + "3244421341483603138"; + [ + "6918025063187695999"; + "11072670137173121240" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/digest/test/cases/Basic.in b/yql/essentials/udfs/common/digest/test/cases/Basic.in new file mode 100644 index 00000000000..c90696db42e --- /dev/null +++ b/yql/essentials/udfs/common/digest/test/cases/Basic.in @@ -0,0 +1,4 @@ +{"key"="1";"subkey"="1";"value"=""}; +{"key"="2";"subkey"="2";"value"=""}; +{"key"="3";"subkey"="3";"value"=""}; +{"key"="";"subkey"="";"value"=""}; diff --git a/yql/essentials/udfs/common/digest/test/cases/Basic.sql b/yql/essentials/udfs/common/digest/test/cases/Basic.sql new file mode 100644 index 00000000000..fbf6f218fba --- /dev/null +++ b/yql/essentials/udfs/common/digest/test/cases/Basic.sql @@ -0,0 +1,35 @@ +/* syntax version 1 */ +SELECT + Digest::Crc32c(key) AS crc32c, + Digest::Crc64(key) AS crc64, + Digest::Fnv32(key) AS fnv32, + Digest::Fnv64(key) AS fnv64, + Digest::MurMurHash(key) AS murmur, + Digest::MurMurHash32(key) AS murmur32, + Digest::MurMurHash2A(key) AS murmur2a, + Digest::MurMurHash2A32(key) AS murmur2a32, + Digest::CityHash(key) AS city, + Digest::CityHash(key, 111) AS cityWithSeed, + Digest::CityHash128(key) AS city128, + Digest::NumericHash(COALESCE(CAST(key AS Uint64), 0)) AS numeric, + Digest::Md5Hex(key) AS md5hex, + Digest::Md5Raw(key) AS md5raw, + Digest::Md5HalfMix(key) AS md5halfmix, + Digest::Argon2(key, "12345678") AS argon2, + Digest::Blake2B(key) AS blake2b, + Digest::Blake2B(key, "") AS blake2bunkeyed, + Digest::Blake2B(key, "12345678") AS blake2bkeyed, + Digest::SipHash(111, 222, key) AS sip, + Digest::HighwayHash(111, 222, 333, 444, key) AS highway, + Digest::FarmHashFingerprint(COALESCE(CAST(key AS Uint64), 0u)) AS farmfing, + Digest::FarmHashFingerprint2(123ul, CAST(key AS Uint64)) AS farmfing2, + Digest::FarmHashFingerprint32(key) AS farmfing32, + Digest::FarmHashFingerprint64(key) AS farmfing64, + Digest::FarmHashFingerprint128(key) AS farmfing128, + Digest::SuperFastHash(key) AS sfh, + Digest::Sha1(key) as sha1, + Digest::Sha256(key) as sha256, + Digest::IntHash64(COALESCE(CAST(key AS Uint64), 0)) AS inthash64, + Digest::XXH3(key) AS xxhash, + Digest::XXH3_128(key) AS xxhash128 +FROM Input; diff --git a/yql/essentials/udfs/common/digest/test/ya.make b/yql/essentials/udfs/common/digest/test/ya.make new file mode 100644 index 00000000000..6c3cce54db0 --- /dev/null +++ b/yql/essentials/udfs/common/digest/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/digest) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/digest/ya.make b/yql/essentials/udfs/common/digest/ya.make new file mode 100644 index 00000000000..90ee1b02f36 --- /dev/null +++ b/yql/essentials/udfs/common/digest/ya.make @@ -0,0 +1,42 @@ +IF (YQL_PACKAGED) + PACKAGE() + + FROM_SANDBOX( + FILE 7319896345 OUT_NOAUTO libdigest_udf.so + ) + + END() +ELSE() +YQL_UDF_CONTRIB(digest_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + digest_udf.cpp + ) + + PEERDIR( + contrib/libs/farmhash + contrib/libs/highwayhash + contrib/libs/openssl + contrib/libs/xxhash + library/cpp/digest/argonish + library/cpp/digest/crc32c + library/cpp/digest/md5 + library/cpp/digest/old_crc + library/cpp/digest/sfh + ) + + ADDINCL(contrib/libs/highwayhash) + + END() + +ENDIF() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/file/file_udf.cpp b/yql/essentials/udfs/common/file/file_udf.cpp new file mode 100644 index 00000000000..57db826591c --- /dev/null +++ b/yql/essentials/udfs/common/file/file_udf.cpp @@ -0,0 +1,623 @@ +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <util/generic/yexception.h> +#include <util/stream/buffered.h> +#include <util/stream/file.h> +#include <util/string/cast.h> +#include <util/ysaveload.h> + +#include <functional> + +using namespace NKikimr; +using namespace NUdf; + +extern const char ByLineFuncName[]; +const char ByLineFuncName[] = "ByLines"; + +namespace { + namespace Helper { + template <class TUserType> + inline bool ConvertToUnboxed(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + Y_UNUSED(valueBuilder); + TUserType userType; + if (!TryFromString<TUserType>(curLine, userType)) { + return false; + } + result = TUnboxedValuePod(userType); + return true; + } + + template <> + inline bool ConvertToUnboxed<const char*>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; + } + + template <> + inline bool ConvertToUnboxed<TUtf8>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; + } + + template <> + inline bool ConvertToUnboxed<TYson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; + } + + template <> + inline bool ConvertToUnboxed<TJson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; + } + + template <typename T> + struct TypeToTypeName { + static const char* Name() { + return "Unknown"; + } + }; + template <> + struct TypeToTypeName<bool> { + static constexpr const char* Name() { + return "Bool"; + } + }; + template <> + struct TypeToTypeName<i8> { + static constexpr const char* Name() { + return "Int8"; + } + }; + template <> + struct TypeToTypeName<ui8> { + static constexpr const char* Name() { + return "Uint8"; + } + }; + template <> + struct TypeToTypeName<i16> { + static constexpr const char* Name() { + return "Int16"; + } + }; + template <> + struct TypeToTypeName<ui16> { + static constexpr const char* Name() { + return "Uint16"; + } + }; + template <> + struct TypeToTypeName<ui32> { + static constexpr const char* Name() { + return "Uint32"; + } + }; + template <> + struct TypeToTypeName<ui64> { + static constexpr const char* Name() { + return "Uint64"; + } + }; + template <> + struct TypeToTypeName<i32> { + static constexpr const char* Name() { + return "Int32"; + } + }; + template <> + struct TypeToTypeName<i64> { + static constexpr const char* Name() { + return "Int64"; + } + }; + template <> + struct TypeToTypeName<float> { + static constexpr const char* Name() { + return "Float"; + } + }; + template <> + struct TypeToTypeName<double> { + static constexpr const char* Name() { + return "Double"; + } + }; + template <> + struct TypeToTypeName<const char*> { + static constexpr const char* Name() { + return "String"; + } + }; + template <> + struct TypeToTypeName<TUtf8> { + static constexpr const char* Name() { + return "Utf8"; + } + }; + template <> + struct TypeToTypeName<TYson> { + static constexpr const char* Name() { + return "Yson"; + } + }; + template <> + struct TypeToTypeName<TJson> { + static constexpr const char* Name() { + return "Json"; + } + }; + } + + static const ui64 TAKE_UNLIM = -1; + + bool SkipElements(IBoxedValue& iter, ui64 skip) { + for (; skip > 0; --skip) { + if (!TBoxedValueAccessor::Skip(iter)) { + return false; + } + } + return true; + } + + typedef std::function<void(const TString& message)> TTerminateFunc; + + class TStreamMeta: public TThrRefBase { + public: + typedef TBuffered<TUnbufferedFileInput> TStream; + typedef TIntrusivePtr<TStreamMeta> TPtr; + + TStreamMeta(TString filePath) + : FilePath(filePath) + { + // work in greedy mode to catch error on creation + Cached = DoCreateStream(); + } + + std::unique_ptr<TStream> CreateStream(TTerminateFunc terminateFunc) { + if (Cached) { + return std::move(Cached); + } + + terminateFunc("The file iterator was already created. To scan file data multiple times please use ListCollect either over ParseFile or over some lazy function over it, e.g. ListMap"); + Y_ABORT("Terminate unstoppable!"); + } + + bool GetLinesCount(ui64& count) const { + if (LinesCount == Unknown) + return false; + count = LinesCount; + return true; + } + void SetLinesCount(ui64 count) { + Y_DEBUG_ABORT_UNLESS(LinesCount == Unknown || count == LinesCount, "Set another value of count lines"); + if (LinesCount == Unknown) { + LinesCount = count; + } + } + + const TString& GetFilePath() const { + return FilePath; + } + + private: + std::unique_ptr<TStream> DoCreateStream() { + static const auto bufferSize = 1 << 12; + TFile file(FilePath, OpenExisting | RdOnly | Seq); + if (FileSize == Unknown) { + FileSize = file.GetLength(); + } + return std::make_unique<TBuffered<TUnbufferedFileInput>>(bufferSize, file); + } + + TString FilePath; + static const ui64 Unknown = -1; + ui64 FileSize = Unknown; + ui64 LinesCount = Unknown; + std::unique_ptr<TStream> Cached; + }; + + class TEmptyIter: public TBoxedValue { + private: + bool Skip() override { + return false; + } + bool Next(TUnboxedValue&) override { + return false; + } + + public: + TEmptyIter(TTerminateFunc terminateFunc) + : TerminateFunc(terminateFunc) + { + } + + private: + const TTerminateFunc TerminateFunc; + }; + + class TLineSplitter { + public: + TLineSplitter(IInputStream& stream) + : Stream_(stream) + { + } + + size_t Next(TString& st) { + st.clear(); + char c; + size_t ret = 0; + if (HasPendingLineChar_) { + st.push_back(PendingLineChar_); + HasPendingLineChar_ = false; + ++ret; + } + + while (Stream_.ReadChar(c)) { + ++ret; + if (c == '\n') { + break; + } else if (c == '\r') { + if (Stream_.ReadChar(c)) { + ++ret; + if (c != '\n') { + --ret; + PendingLineChar_ = c; + HasPendingLineChar_ = true; + } + } + + break; + } else { + st.push_back(c); + } + } + + return ret; + } + + private: + IInputStream& Stream_; + bool HasPendingLineChar_ = false; + char PendingLineChar_ = 0; + }; + + template <class TUserType> + class TLineByLineBoxedValueIterator: public TBoxedValue { + public: + TLineByLineBoxedValueIterator(TStreamMeta::TPtr metaPtr, std::unique_ptr<TStreamMeta::TStream>&& stream, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc) + : MetaPtr(metaPtr) + , ValueBuilder(valueBuilder) + , Stream(std::move(stream)) + , Splitter(*Stream) + , TerminateFunc(terminateFunc) + { + } + + void SetLimit(ui64 limit = TAKE_UNLIM) { + Limit = limit; + } + + private: + bool SkipLimit() { + if (Limit != TAKE_UNLIM) { + if (Limit == 0) { + return false; + } + --Limit; + } + return true; + } + + bool Skip() final { + ++CurLineNum; + return Splitter.Next(CurLine) && SkipLimit(); + } + + bool Next(TUnboxedValue& value) override { + if (!Skip()) { + return false; + } + if (!Helper::ConvertToUnboxed<TUserType>(ValueBuilder, CurLine, value)) { + TStringBuilder sb; + sb << "File::ByLines failed to cast string '" << CurLine << "' to " << Helper::TypeToTypeName<TUserType>::Name() << Endl; + sb << "- path: " << MetaPtr->GetFilePath() << Endl; + sb << "- line: " << CurLineNum << Endl; + TerminateFunc(sb); + Y_ABORT("Terminate unstoppable!"); + } + return true; + } + + TStreamMeta::TPtr MetaPtr; + const IValueBuilder& ValueBuilder; + + std::unique_ptr<TStreamMeta::TStream> Stream; + TLineSplitter Splitter; + TTerminateFunc TerminateFunc; + TString CurLine; + ui64 CurLineNum = 0; + ui64 Limit = TAKE_UNLIM; + TUnboxedValue Result; + }; + + template <class TUserType> + class TListByLineBoxedValue: public TBoxedValue { + public: + TListByLineBoxedValue(TStreamMeta::TPtr metaPtr, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc, ui64 skip = 0ULL, ui64 take = TAKE_UNLIM) + : MetaPtr(metaPtr) + , ValueBuilder(valueBuilder) + , TerminateFunc(terminateFunc) + , Skip(skip) + , Take(take) + {} + private: + bool HasFastListLength() const override { + ui64 tmp; + return MetaPtr->GetLinesCount(tmp); + } + ui64 GetListLength() const override { + ui64 length; + if (!MetaPtr->GetLinesCount(length)) { + length = Skip; + for (const auto iter = GetListIterator(); iter.Skip(); ++length) + continue; + if (Take == TAKE_UNLIM) { + MetaPtr->SetLinesCount(length); + } + } + if (length <= Skip) { + return 0; + } + return Min(length - Skip, Take); + } + ui64 GetEstimatedListLength() const override { + /// \todo some optimisation? + return GetListLength(); + } + + TUnboxedValue GetListIterator() const override { + try { + auto stream = MetaPtr->CreateStream(TerminateFunc); + IBoxedValuePtr iter(new TLineByLineBoxedValueIterator<TUserType>(MetaPtr, std::move(stream), ValueBuilder, TerminateFunc)); + if (!Take || !SkipElements(*iter, Skip)) { + return TUnboxedValuePod(new TEmptyIter(TerminateFunc)); + } + static_cast<TLineByLineBoxedValueIterator<TUserType>*>(iter.Get())->SetLimit(Take); + return TUnboxedValuePod(std::move(iter)); + } catch (const std::exception& e) { + TerminateFunc(CurrentExceptionMessage()); + Y_ABORT("Terminate unstoppable!"); + } + } + + IBoxedValuePtr SkipListImpl(const IValueBuilder& builder, ui64 count) const override { + return new TListByLineBoxedValue(MetaPtr, builder, TerminateFunc, Skip + count, Take == TAKE_UNLIM ? TAKE_UNLIM : Take - std::min(Take, count)); + } + IBoxedValuePtr TakeListImpl(const IValueBuilder& builder, ui64 count) const override { + return new TListByLineBoxedValue(MetaPtr, builder, TerminateFunc, Skip, std::min(Take, count)); + } + + bool HasListItems() const override { + return true; + } + + TStreamMeta::TPtr MetaPtr; + const IValueBuilder& ValueBuilder; + TTerminateFunc TerminateFunc; + ui64 Skip = 0ULL; + ui64 Take = TAKE_UNLIM; + }; + + template <class TUserType> + class TByLinesFunc: public TBoxedValue { + private: + TSourcePosition Pos_; + + TByLinesFunc(TSourcePosition pos) + : Pos_(pos) + {} + + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + try { + TString filePath(args[0].AsStringRef()); + TStreamMeta::TPtr metaPtr(new TStreamMeta(filePath)); + auto pos = Pos_; + auto terminateFunc = [pos](const TString& message) { + UdfTerminate((TStringBuilder() << pos << " " << message).data()); + }; + return TUnboxedValuePod(new TListByLineBoxedValue<TUserType>(metaPtr, *valueBuilder, terminateFunc)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static void DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + Y_UNUSED(name); + builder.UserType(userType); + builder.SimpleSignature<TListType<TUserType>(char*)>(); + if (!typesOnly) { + builder.Implementation(new TByLinesFunc<TUserType>(builder.GetSourcePosition())); + } + } + }; + + class TFolderListFromFile: public TBoxedValue { + private: + class TIterator : public TBoxedValue { + public: + TIterator(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + , Input_(filePath) + { + } + + private: + bool Next(NUdf::TUnboxedValue& value) override { + try { + TString type; + TString path; + TString attrs; + ::Load(&Input_, type); + if (!type) { + return false; + } + ::Load(&Input_, path); + ::Load(&Input_, attrs); + + NUdf::TUnboxedValue* items = nullptr; + value = ValueBuilder_.NewArray(3, items); + items[IndexT_] = ValueBuilder_.NewString(type); + items[IndexP_] = ValueBuilder_.NewString(path); + items[IndexA_] = ValueBuilder_.NewString(attrs); + } + catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + return true; + } + + private: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const IValueBuilder& ValueBuilder_; + const TSourcePosition Pos_; + TIFStream Input_; + }; + + class TList: public TBoxedValue { + public: + TList(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + , FilePath_(std::move(filePath)) + { + } + + protected: + NUdf::TUnboxedValue GetListIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(IndexP_, IndexT_, IndexA_, ValueBuilder_, Pos_, FilePath_)); + } + + bool HasFastListLength() const override { + return bool(Length); + } + + ui64 GetListLength() const override { + if (!Length) { + ui64 length = 0ULL; + for (const auto it = GetListIterator(); it.Skip();) { + ++length; + } + + Length = length; + } + + return *Length; + } + + ui64 GetEstimatedListLength() const override { + return GetListLength(); + } + + bool HasListItems() const override { + if (HasItems) { + return *HasItems; + } + + if (Length) { + HasItems = (*Length != 0); + return *HasItems; + } + + auto iter = GetListIterator(); + HasItems = iter.Skip(); + return *HasItems; + } + + protected: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const IValueBuilder& ValueBuilder_; + const TSourcePosition Pos_; + const TString FilePath_; + mutable TMaybe<ui64> Length; + mutable TMaybe<bool> HasItems; + }; + + public: + TFolderListFromFile(ui32 indexP, ui32 indexT, ui32 indexA, const TSourcePosition& pos) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , Pos_(pos) + { + } + + static const ::NYql::NUdf::TStringRef& Name() { + static auto name = ::NYql::NUdf::TStringRef::Of("FolderListFromFile"); + return name; + } + + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + try { + TString filePath(args[0].AsStringRef()); + return TUnboxedValuePod(new TList(IndexP_, IndexT_, IndexA_, *valueBuilder, Pos_, filePath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() != name) { + // the only case when we return false + return false; + } + + builder.UserType(userType); + + ui32 indexP, indexT, indexA; + auto itemType = builder.Struct() + ->AddField<const char*>("Path", &indexP) + .AddField<const char*>("Type", &indexT) + .AddField<TYson>("Attributes", &indexA) + .Build(); + auto resultType = builder.List()->Item(itemType).Build(); + + builder.Args()->Add<const char*>().Done().Returns(resultType); + if (!typesOnly) { + builder.Implementation(new TFolderListFromFile(indexP, indexT, indexA, builder.GetSourcePosition())); + } + return true; + } + + private: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const TSourcePosition Pos_; + }; + + SIMPLE_MODULE(TFileModule, + TUserDataTypeFuncFactory<false, false, ByLineFuncName, TByLinesFunc, const char*, TUtf8, TYson, TJson, i8, ui8, i16, ui16, ui32, ui64, i32, i64, float, double, bool>, + TFolderListFromFile + ) + +} + +REGISTER_MODULES(TFileModule) diff --git a/yql/essentials/udfs/common/file/ya.make b/yql/essentials/udfs/common/file/ya.make new file mode 100644 index 00000000000..250f0722d8e --- /dev/null +++ b/yql/essentials/udfs/common/file/ya.make @@ -0,0 +1,17 @@ +YQL_UDF_CONTRIB(file_udf) + +YQL_ABI_VERSION( + 2 + 27 + 0 +) + +SRCS( + file_udf.cpp +) + +PEERDIR( + yql/essentials/core +) + +END() diff --git a/yql/essentials/udfs/common/histogram/histogram_udf.cpp b/yql/essentials/udfs/common/histogram/histogram_udf.cpp new file mode 100644 index 00000000000..3dcb2ca98ec --- /dev/null +++ b/yql/essentials/udfs/common/histogram/histogram_udf.cpp @@ -0,0 +1,1018 @@ +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <library/cpp/histogram/adaptive/adaptive_histogram.h> +#include <library/cpp/histogram/adaptive/block_histogram.h> + +#include <util/string/printf.h> +#include <util/stream/format.h> + +#include <cmath> + +using namespace NKikimr; +using namespace NUdf; +using namespace NKiwiAggr; + +namespace { +#define REGISTER_METHOD_UDF(name) \ + T##name, + +#define HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(XX) \ + XX(GetSumAboveBound) \ + XX(GetSumBelowBound) \ + XX(CalcUpperBound) \ + XX(CalcLowerBound) \ + XX(CalcUpperBoundSafe) \ + XX(CalcLowerBoundSafe) + +#define HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(XX) \ + XX(GetSumInRange) + +#define HISTOGRAM_ALGORITHMS_MAP(XX) \ + XX(AdaptiveDistance) \ + XX(AdaptiveWeight) \ + XX(AdaptiveWard) \ + XX(BlockWeight) \ + XX(BlockWard) + +#define HISTOGRAM_FUNCTION_MAP(XX, arg) \ + XX(Create, arg) \ + XX(AddValue, arg) \ + XX(GetResult, arg) \ + XX(Serialize, arg) \ + XX(Deserialize, arg) \ + XX(Merge, arg) + +#define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name; + HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) + DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) + DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) + + class TLinearHistogram: public TAdaptiveWardHistogram { + public: + TLinearHistogram(double step, double begin, double end) + : TAdaptiveWardHistogram(1ULL << 24) + , Step(step) + , Begin(begin) + , End(end) + { + } + + void Add(double value, double weight) override { + if (value < Begin) { + value = Begin; + } else if (value > End) { + value = End; + } else { + value = std::floor(value / Step + 0.5) * Step; + } + TAdaptiveWardHistogram::Add(value, weight); + } + + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } + + protected: + double Step; + double Begin; + double End; + }; + + class TLogarithmicHistogram: public TLinearHistogram { + public: + TLogarithmicHistogram(double step, double begin, double end) + : TLinearHistogram(step, begin, end) + { + } + + void Add(double value, double weight) override { + double base = std::log(value) / std::log(Step); + double prev = std::pow(Step, std::floor(base)); + double next = std::pow(Step, std::ceil(base)); + if (std::abs(value - next) > std::abs(value - prev)) { + value = prev; + } else { + value = next; + } + + if (value < Begin) { + value = Begin; + } else if (value > End) { + value = End; + } + + if (!std::isnan(value)) { + TAdaptiveWardHistogram::Add(value, weight); + } + } + + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } + }; + + template <typename THistogramType, const char* ResourceName> + class THistogram_Create: public TBoxedValue { + public: + THistogram_Create(TSourcePosition pos) + : Pos_(pos) + {} + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; + static auto nameRef = TStringRef(name); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); + histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + template <typename THistogramType, const char* ResourceName> + class THistogram_AddValue: public TBoxedValue { + public: + THistogram_AddValue(TSourcePosition pos) + : Pos_(pos) + {} + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; + static auto nameRef = TStringRef(name); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); + resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + template <typename THistogramType, const char* ResourceName> + class THistogram_Serialize: public TBoxedValue { + public: + THistogram_Serialize(TSourcePosition pos) + : Pos_(pos) + {} + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + THistogram proto; + TString result; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); + return valueBuilder->NewString(result); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<char*(TResource<ResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + template <typename THistogramType, const char* ResourceName> + class THistogram_Deserialize: public TBoxedValue { + public: + THistogram_Deserialize(TSourcePosition pos) + : Pos_(pos) + {} + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + template <typename THistogramType, const char* ResourceName> + class THistogram_Merge: public TBoxedValue { + public: + THistogram_Merge(TSourcePosition pos) + : Pos_(pos) + {} + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; + static auto nameRef = TStringRef(name); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THistogram proto; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); + return TUnboxedValuePod(args[1]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + struct THistogramIndexes { + static constexpr ui32 BinFieldsCount = 2U; + static constexpr ui32 ResultFieldsCount = 5U; + + THistogramIndexes(IFunctionTypeInfoBuilder& builder) { + const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); + const auto binsList = builder.List()->Item(binStructType).Build(); + ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); + } + + ui32 Kind; + ui32 Min; + ui32 Max; + ui32 WeightsSum; + ui32 Bins; + + ui32 Position; + ui32 Frequency; + + TType* ResultStructType; + }; + + template <typename THistogramType, const char* ResourceName> + class THistogram_GetResult: public TBoxedValue { + public: + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes(histogramIndexes) + , Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; + static auto nameRef = TStringRef(name); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + THistogram proto; + auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); + histogram->ToProto(proto); + + auto size = proto.FreqSize(); + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields); + fields[HistogramIndexes.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); + if (size) { + TUnboxedValue* items = nullptr; + fields[HistogramIndexes.Bins] = valueBuilder->NewArray(size, items); + fields[HistogramIndexes.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); + fields[HistogramIndexes.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); + fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); + for (ui64 i = 0; i < size; ++i) { + TUnboxedValue* binFields = nullptr; + *items++ = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields); + binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); + binFields[HistogramIndexes.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); + } + } else { + fields[HistogramIndexes.Bins] = valueBuilder->NewEmptyList(); + fields[HistogramIndexes.Min] = TUnboxedValuePod(0.0); + fields[HistogramIndexes.Max] = TUnboxedValuePod(0.0); + fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(0.0); + } + + return result; + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); + + THistogramIndexes histogramIndexes(builder); + + builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); + + if (!typesOnly) { + builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + const THistogramIndexes HistogramIndexes; + TSourcePosition Pos_; + }; + + template <> + TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + template <> + TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + template <> + TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + template <> + TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + template <> + bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + class THistogramPrint: public TBoxedValue { + public: + THistogramPrint(const THistogramIndexes& histogramIndexes) + : HistogramIndexes(histogramIndexes) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Print"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto kind = args[0].GetElement(HistogramIndexes.Kind); + auto bins = args[0].GetElement(HistogramIndexes.Bins); + double min = args[0].GetElement(HistogramIndexes.Min).Get<double>(); + double max = args[0].GetElement(HistogramIndexes.Max).Get<double>(); + double weightsSum = args[0].GetElement(HistogramIndexes.WeightsSum).Get<double>(); + auto binsIterator = bins.GetListIterator(); + + TStringBuilder result; + result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; + result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", + bins.GetListLength(), weightsSum, min, max); + double maxFrequency = 0.0; + size_t maxPositionLength = 0; + size_t maxFrequencyLength = 0; + const ui8 bars = args[1].GetOrDefault<ui8>(25); + + for (TUnboxedValue current; binsIterator.Next(current);) { + if (bars) { + double frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>(); + if (frequency > maxFrequency) { + maxFrequency = frequency; + } + } + size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes.Position).Get<double>()).length(); + size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes.Frequency).Get<double>()).length(); + + if (positionLength > maxPositionLength) { + maxPositionLength = positionLength; + } + if (frequencyLength > maxFrequencyLength) { + maxFrequencyLength = frequencyLength; + } + } + + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + double position = current.GetElement(HistogramIndexes.Position).Get<double>(); + double frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>(); + result << "\n"; + if (bars && maxFrequency > 0) { + ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); + for (ui8 i = 0; i < bars; ++i) { + if (i < filledBars) { + result << "█"; + } else { + result << "░"; + } + } + } + result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); + result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); + } + + return valueBuilder->NewString(result); + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); + + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); + + if (!typesOnly) { + builder.Implementation(new THistogramPrint(histogramIndexes)); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + const THistogramIndexes HistogramIndexes; + }; + + class THistogramToCumulativeDistributionFunction: public TBoxedValue { + public: + THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) + : HistogramIndexes(histogramIndexes) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes.Bins); + double minValue = args[0].GetElement(HistogramIndexes.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes.Max).Get<double>(); + double sum = 0.0; + double weightsSum = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) + resultBins.reserve(bins.GetListLength()); + const auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields); + const auto frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>(); + sum += frequency; + weightsSum += sum; + binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(sum); + binFields[HistogramIndexes.Position] = current.GetElement(HistogramIndexes.Position); + resultBins.emplace_back(std::move(resultCurrent)); + } + + auto kind = args[0].GetElement(HistogramIndexes.Kind); + fields[HistogramIndexes.Kind] = valueBuilder->AppendString(kind, "Cdf"); + fields[HistogramIndexes.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); + + if (!typesOnly) { + builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + const THistogramIndexes HistogramIndexes; + }; + + class THistogramNormalize: public TBoxedValue { + public: + THistogramNormalize(const THistogramIndexes& histogramIndexes) + : HistogramIndexes(histogramIndexes) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Normalize"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes.Bins); + double minValue = args[0].GetElement(HistogramIndexes.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes.Max).Get<double>(); + double area = args[1].GetOrDefault<double>(100.0); + bool cdfNormalization = args[2].GetOrDefault<bool>(false); + double sum = 0.0; + double weightsSum = 0.0; + double lastBinFrequency = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) + resultBins.reserve(bins.GetListLength()); + auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + sum += current.GetElement(HistogramIndexes.Frequency).Get<double>(); + lastBinFrequency = current.GetElement(HistogramIndexes.Frequency).Get<double>(); + } + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields); + double frequency = current.GetElement(HistogramIndexes.Frequency).Get<double>(); + if (cdfNormalization) { + frequency = area * frequency / lastBinFrequency; + } else { + frequency = area * frequency / sum; + } + weightsSum += frequency; + binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(frequency); + binFields[HistogramIndexes.Position] = current.GetElement(HistogramIndexes.Position); + resultBins.emplace_back(std::move(resultCurrent)); + } + + TUnboxedValue kind = args[0].GetElement(HistogramIndexes.Kind); + if (cdfNormalization) { + kind = valueBuilder->AppendString(kind, "Cdf"); + } + + fields[HistogramIndexes.Kind] = kind; + fields[HistogramIndexes.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalDouble = builder.Optional()->Item<double>().Build(); + auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); + builder.OptionalArgs(1); + builder.OptionalArgs(2); + if (!typesOnly) { + builder.Implementation(new THistogramNormalize(histogramIndexes)); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + const THistogramIndexes HistogramIndexes; + }; + + template <bool twoArgs> + class THistogramMethodBase: public TBoxedValue { + public: + THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes(histogramIndexes) + , Pos_(pos) + { + } + + virtual TUnboxedValue GetResult( + const THistogram& input, + const TUnboxedValuePod* args) const = 0; + + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + auto bins = args[0].GetElement(HistogramIndexes.Bins); + double min = args[0].GetElement(HistogramIndexes.Min).template Get<double>(); + double max = args[0].GetElement(HistogramIndexes.Max).template Get<double>(); + auto binsIterator = bins.GetListIterator(); + + THistogram histogram; + histogram.SetType(HT_ADAPTIVE_HISTOGRAM); + histogram.SetMinValue(min); + histogram.SetMaxValue(max); + for (TUnboxedValue current; binsIterator.Next(current);) { + double frequency = current.GetElement(HistogramIndexes.Frequency).template Get<double>(); + double position = current.GetElement(HistogramIndexes.Position).template Get<double>(); + histogram.AddFreq(frequency); + histogram.AddPosition(position); + } + + return GetResult(histogram, args); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { + THistogramIndexes histogramIndexes(builder); + + if (twoArgs) { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); + } else { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); + } + return histogramIndexes; + } + + protected: + const THistogramIndexes HistogramIndexes; + TSourcePosition Pos_; + }; + +#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ + class T##name: public THistogramMethodBase<false> { \ + public: \ + T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ + : THistogramMethodBase<false>(histogramIndexes, pos) { \ + } \ + static const TStringRef& Name() { \ + static auto name = TStringRef::Of(#name); \ + return name; \ + } \ + static bool DeclareSignature( \ + const TStringRef& name, \ + TType* userType, \ + IFunctionTypeInfoBuilder& builder, \ + bool typesOnly) { \ + Y_UNUSED(userType); \ + if (Name() == name) { \ + const auto& histogramIndexes = DeclareSignatureBase(builder); \ + if (!typesOnly) { \ + builder.Implementation(new T##name(histogramIndexes, \ + builder.GetSourcePosition())); \ + } \ + return true; \ + } else { \ + return false; \ + } \ + } \ + TUnboxedValue GetResult( \ + const THistogram& input, \ + const TUnboxedValuePod* args) const override { \ + TAdaptiveWardHistogram histo(input, input.FreqSize()); \ + double result = histo.name(args[1].Get<double>()); \ + return TUnboxedValuePod(result); \ + } \ + }; + +#define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \ + class T##name: public THistogramMethodBase<true> { \ + public: \ + T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ + : THistogramMethodBase<true>(histogramIndexes, pos) { \ + } \ + static const TStringRef& Name() { \ + static auto name = TStringRef::Of(#name); \ + return name; \ + } \ + static bool DeclareSignature( \ + const TStringRef& name, \ + TType* userType, \ + IFunctionTypeInfoBuilder& builder, \ + bool typesOnly) { \ + Y_UNUSED(userType); \ + if (Name() == name) { \ + const auto& histogramIndexes = DeclareSignatureBase(builder); \ + if (!typesOnly) { \ + builder.Implementation(new T##name(histogramIndexes, \ + builder.GetSourcePosition())); \ + } \ + return true; \ + } else { \ + return false; \ + } \ + } \ + TUnboxedValue GetResult( \ + const THistogram& input, \ + const TUnboxedValuePod* args) const override { \ + TAdaptiveWardHistogram histo(input, input.FreqSize()); \ + double result = histo.name(args[1].Get<double>(), args[2].Get<double>()); \ + return TUnboxedValuePod(result); \ + } \ + }; + +#define DECLARE_HISTOGRAM_UDF(functionName, histogramName) \ + THistogram_##functionName<T##histogramName##Histogram, histogramName##HistogramResourceName>, + +#define DECLARE_HISTOGRAM_UDFS(name) \ + HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name) + + HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) + HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) + + SIMPLE_MODULE(THistogramModule, + HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) + HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + DECLARE_HISTOGRAM_UDFS(Linear) + DECLARE_HISTOGRAM_UDFS(Logarithmic) + THistogramPrint, + THistogramNormalize, + THistogramToCumulativeDistributionFunction) +} + +REGISTER_MODULES(THistogramModule) diff --git a/yql/essentials/udfs/common/histogram/test/canondata/result.json b/yql/essentials/udfs/common/histogram/test/canondata/result.json new file mode 100644 index 00000000000..06f9e726a92 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/result.json @@ -0,0 +1,57 @@ +{ + "test.test[Algorithms]": [ + { + "uri": "file://test.test_Algorithms_/results.txt" + } + ], + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ], + "test.test[Distinct]": [ + { + "uri": "file://test.test_Distinct_/results.txt" + } + ], + "test.test[Intervals]": [ + { + "uri": "file://test.test_Intervals_/results.txt" + } + ], + "test.test[Linear]": [ + { + "uri": "file://test.test_Linear_/results.txt" + } + ], + "test.test[Logarithmic]": [ + { + "uri": "file://test.test_Logarithmic_/results.txt" + } + ], + "test.test[Methods]": [ + { + "uri": "file://test.test_Methods_/results.txt" + } + ], + "test.test[Normalize]": [ + { + "uri": "file://test.test_Normalize_/results.txt" + } + ], + "test.test[Print]": [ + { + "uri": "file://test.test_Print_/results.txt" + } + ], + "test.test[ToCumulativeDistributionFunction]": [ + { + "uri": "file://test.test_ToCumulativeDistributionFunction_/results.txt" + } + ], + "test.test[Weights]": [ + { + "uri": "file://test.test_Weights_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt new file mode 100644 index 00000000000..37e9d6a36a2 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Algorithms_/results.txt @@ -0,0 +1,476 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "adaptive_distance"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "adaptive_weight"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "adaptive_ward"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "block_weight"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "block_ward"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "algo_equality_check"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "quality_equality_check"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "8"; + "2" + ]; + [ + "3"; + "5" + ]; + [ + "5"; + "7" + ] + ]; + "AdaptiveDistance"; + "7"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "6"; + "2" + ]; + [ + "5"; + "3.799999952316284" + ]; + [ + "5"; + "7" + ] + ]; + "AdaptiveWeight"; + "7"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "2"; + "0.5" + ]; + [ + "6"; + "2.5" + ]; + [ + "8"; + "6.25" + ] + ]; + "AdaptiveWard"; + "7"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "5"; + "1.399999976158142" + ]; + [ + "6"; + "4" + ]; + [ + "5"; + "7" + ] + ]; + "BlockWeight"; + "7"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "8"; + "2" + ]; + [ + "3"; + "5" + ]; + [ + "5"; + "7" + ] + ]; + "BlockWard"; + "7"; + "0"; + "16" + ] + ]; + [ + %true + ]; + [ + %true + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..75ce4f96b2a --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,338 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "subkey_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "value_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "0" + ]; + [ + "1"; + "1" + ]; + [ + "3"; + "2" + ]; + [ + "3"; + "3" + ]; + [ + "1"; + "4" + ]; + [ + "1"; + "5" + ]; + [ + "1"; + "6" + ]; + [ + "5"; + "7" + ] + ]; + "AdaptiveWard"; + "7"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "1"; + "0" + ]; + [ + "1"; + "1" + ]; + [ + "3"; + "2" + ]; + [ + "3"; + "4" + ]; + [ + "1"; + "8" + ]; + [ + "1"; + "16" + ]; + [ + "1"; + "32" + ]; + [ + "5"; + "64" + ] + ]; + "AdaptiveWard"; + "64"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "1"; + "-1" + ]; + [ + "1"; + "1" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "8" + ]; + [ + "1"; + "32" + ]; + [ + "1"; + "128" + ]; + [ + "1"; + "512" + ]; + [ + "9"; + "2048" + ] + ]; + "AdaptiveWard"; + "2048"; + "-1"; + "16" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt new file mode 100644 index 00000000000..721c3a7f669 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Distinct_/results.txt @@ -0,0 +1,139 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "is_different"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "0" + ]; + [ + "1"; + "1" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "3" + ]; + [ + "1"; + "4" + ]; + [ + "1"; + "5" + ]; + [ + "1"; + "6" + ]; + [ + "1"; + "7" + ] + ]; + "AdaptiveWard"; + "7"; + "0"; + "8" + ] + ]; + [ + %true + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt new file mode 100644 index 00000000000..b19e1b600c5 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Intervals_/results.txt @@ -0,0 +1,290 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "subkey_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "value_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "16"; + "4.125" + ] + ]; + "AdaptiveWard"; + "4.124999999999999"; + "4.124999999999999"; + "16" + ] + ]; + [ + [ + [ + [ + "9"; + "3" + ]; + [ + "2"; + "24" + ]; + [ + "5"; + "64" + ] + ]; + "AdaptiveWard"; + "64"; + "0"; + "16" + ] + ]; + [ + [ + [ + [ + "1"; + "-1" + ]; + [ + "1"; + "1" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "8" + ]; + [ + "1"; + "32" + ]; + [ + "1"; + "128" + ]; + [ + "1"; + "512" + ]; + [ + "9"; + "2048" + ] + ]; + "AdaptiveWard"; + "2048"; + "-1"; + "16" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt new file mode 100644 index 00000000000..4ec10ae8d39 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Linear_/results.txt @@ -0,0 +1,330 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "default"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "linear_size"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "linear_min_max"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "-2150" + ]; + [ + "1"; + "-2050" + ]; + [ + "1"; + "-50" + ]; + [ + "2"; + "0" + ]; + [ + "1"; + "10" + ]; + [ + "1"; + "30" + ]; + [ + "1"; + "90" + ]; + [ + "1"; + "130" + ]; + [ + "1"; + "510" + ]; + [ + "9"; + "2050" + ] + ]; + "Linear"; + "2050"; + "-2150"; + "19" + ] + ]; + [ + [ + [ + [ + "1"; + "-2145" + ]; + [ + "1"; + "-2046" + ]; + [ + "1"; + "-33" + ]; + [ + "3"; + "0" + ]; + [ + "1"; + "33" + ]; + [ + "1"; + "99" + ]; + [ + "1"; + "132" + ]; + [ + "1"; + "528" + ]; + [ + "9"; + "2046" + ] + ]; + "Linear"; + "2046"; + "-2145"; + "19" + ] + ]; + [ + [ + [ + [ + "9"; + "100" + ]; + [ + "1"; + "500" + ]; + [ + "9"; + "1000" + ] + ]; + "Linear"; + "1000"; + "100"; + "19" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt new file mode 100644 index 00000000000..f465dcea214 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Logarithmic_/results.txt @@ -0,0 +1,310 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "default"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "log_size"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "log_min_max"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "1" + ]; + [ + "2"; + "10" + ]; + [ + "3"; + "100" + ]; + [ + "9"; + "1000" + ] + ]; + "Logarithmic"; + "1000"; + "1"; + "15" + ] + ]; + [ + [ + [ + [ + "1"; + "2" + ]; + [ + "1"; + "8" + ]; + [ + "1"; + "32" + ]; + [ + "1"; + "64" + ]; + [ + "1"; + "128" + ]; + [ + "1"; + "512" + ]; + [ + "9"; + "2048" + ] + ]; + "Logarithmic"; + "2048"; + "2"; + "15" + ] + ]; + [ + [ + [ + [ + "2"; + "10" + ]; + [ + "1"; + "32" + ]; + [ + "1"; + "64" + ]; + [ + "1"; + "128" + ]; + [ + "1"; + "512" + ]; + [ + "9"; + "2048" + ] + ]; + "Logarithmic"; + "2048"; + "10"; + "15" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt new file mode 100644 index 00000000000..0f0b131b12d --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Methods_/results.txt @@ -0,0 +1,85 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "GetSumAboveBound"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "GetSumBelowBound"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "CalcUpperBound"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "CalcLowerBound"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "GetSumInRange"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "6.5" + ]; + [ + "9.5" + ]; + [ + "2.5" + ]; + [ + "6.166666666666667" + ]; + [ + "6.5" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt new file mode 100644 index 00000000000..c81b76c18a1 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Normalize_/results.txt @@ -0,0 +1,338 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "subkey_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "value_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "6.25"; + "0" + ]; + [ + "6.25"; + "1" + ]; + [ + "18.75"; + "2" + ]; + [ + "18.75"; + "3" + ]; + [ + "6.25"; + "4" + ]; + [ + "6.25"; + "5" + ]; + [ + "6.25"; + "6" + ]; + [ + "31.25"; + "7" + ] + ]; + "AdaptiveWard"; + "7"; + "0"; + "100" + ] + ]; + [ + [ + [ + [ + "0.0625"; + "0" + ]; + [ + "0.0625"; + "1" + ]; + [ + "0.1875"; + "2" + ]; + [ + "0.1875"; + "4" + ]; + [ + "0.0625"; + "8" + ]; + [ + "0.0625"; + "16" + ]; + [ + "0.0625"; + "32" + ]; + [ + "0.3125"; + "64" + ] + ]; + "AdaptiveWard"; + "64"; + "0"; + "1" + ] + ]; + [ + [ + [ + [ + "-0.0625"; + "-1" + ]; + [ + "-0.0625"; + "1" + ]; + [ + "-0.0625"; + "2" + ]; + [ + "-0.0625"; + "8" + ]; + [ + "-0.0625"; + "32" + ]; + [ + "-0.0625"; + "128" + ]; + [ + "-0.0625"; + "512" + ]; + [ + "-0.5625"; + "2048" + ] + ]; + "AdaptiveWard"; + "2048"; + "-1"; + "-1" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt new file mode 100644 index 00000000000..bab67bf3e8c --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Print_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key_histogram"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "subkey_histogram"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "value_histogram"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "Kind: AdaptiveWard Bins: 8 WeightsSum: 16.000 Min: 0.000 Max: 7.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 0.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 2.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 3.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 4.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 5.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 6.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88 P: 7.000 F: 5.000" + ]; + [ + "Kind: AdaptiveWard Bins: 8 WeightsSum: 16.000 Min: 0.000 Max: 64.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 0.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 2.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 4.000 F: 3.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 8.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 16.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 32.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88 P: 64.000 F: 5.000" + ]; + [ + "Kind: AdaptiveWard Bins: 8 WeightsSum: 16.000 Min: -1.000 Max: 2048.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: -1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 1.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 2.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 8.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 32.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 128.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91\xE2\x96\x91 P: 512.000 F: 1.000\n\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88\xE2\x96\x88 P: 2048.000 F: 9.000" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt new file mode 100644 index 00000000000..9e2b2a96892 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_ToCumulativeDistributionFunction_/results.txt @@ -0,0 +1,444 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "subkey_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "value_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "subkey_norm_cdf_histogram"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "0" + ]; + [ + "2"; + "1" + ]; + [ + "5"; + "2" + ]; + [ + "8"; + "3" + ]; + [ + "9"; + "4" + ]; + [ + "10"; + "5" + ]; + [ + "11"; + "6" + ]; + [ + "16"; + "7" + ] + ]; + "AdaptiveWardCdf"; + "7"; + "0"; + "62" + ] + ]; + [ + [ + [ + [ + "1"; + "0" + ]; + [ + "2"; + "1" + ]; + [ + "5"; + "2" + ]; + [ + "8"; + "4" + ]; + [ + "9"; + "8" + ]; + [ + "10"; + "16" + ]; + [ + "11"; + "32" + ]; + [ + "16"; + "64" + ] + ]; + "AdaptiveWardCdf"; + "64"; + "0"; + "62" + ] + ]; + [ + [ + [ + [ + "1"; + "-1" + ]; + [ + "2"; + "1" + ]; + [ + "3"; + "2" + ]; + [ + "4"; + "8" + ]; + [ + "5"; + "32" + ]; + [ + "6"; + "128" + ]; + [ + "7"; + "512" + ]; + [ + "16"; + "2048" + ] + ]; + "AdaptiveWardCdf"; + "2048"; + "-1"; + "44" + ] + ]; + [ + [ + [ + [ + "6.25"; + "0" + ]; + [ + "12.5"; + "1" + ]; + [ + "31.25"; + "2" + ]; + [ + "50"; + "4" + ]; + [ + "56.25"; + "8" + ]; + [ + "62.5"; + "16" + ]; + [ + "68.75"; + "32" + ]; + [ + "100"; + "64" + ] + ]; + "AdaptiveWardCdfCdf"; + "64"; + "0"; + "387.5" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt new file mode 100644 index 00000000000..6b9aac15ec2 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/canondata/test.test_Weights_/results.txt @@ -0,0 +1,221 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "basic_weight"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "weight_and_bins"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "Bins"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Position"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "Kind"; + [ + "DataType"; + "String" + ] + ]; + [ + "Max"; + [ + "DataType"; + "Double" + ] + ]; + [ + "Min"; + [ + "DataType"; + "Double" + ] + ]; + [ + "WeightsSum"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "equality_check"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "1" + ]; + [ + "2049"; + "2" + ]; + [ + "1026"; + "3" + ]; + [ + "4"; + "4" + ]; + [ + "8"; + "5" + ]; + [ + "16"; + "6" + ]; + [ + "160"; + "7" + ] + ]; + "AdaptiveWard"; + "7"; + "1"; + "3264" + ] + ]; + [ + [ + [ + [ + "8192"; + "2.5" + ]; + [ + "683"; + "5.664714336395264" + ]; + [ + "10240"; + "7" + ] + ]; + "AdaptiveWard"; + "7"; + "1"; + "19115" + ] + ]; + [ + %true + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql b/yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql new file mode 100644 index 00000000000..02b2bf65fc6 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Algorithms.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ +SELECT + ADAPTIVE_DISTANCE_HISTOGRAM(key, 3) AS adaptive_distance, + ADAPTIVE_WEIGHT_HISTOGRAM(key, 3) AS adaptive_weight, + ADAPTIVE_WARD_HISTOGRAM(key, 3) AS adaptive_ward, + BLOCK_WEIGHT_HISTOGRAM(key, 3) AS block_weight, + BLOCK_WARD_HISTOGRAM(key, 3) AS block_ward, + Histogram::Print(ADAPTIVE_WEIGHT_HISTOGRAM(key, 3)) <> Histogram::Print(BLOCK_WEIGHT_HISTOGRAM(key, 3)) AS algo_equality_check, + Histogram::Print(ADAPTIVE_WEIGHT_HISTOGRAM(key, 3)) <> Histogram::Print(ADAPTIVE_WARD_HISTOGRAM(key, 3)) AS quality_equality_check +FROM ( + SELECT + CAST(key AS Double) AS key, + CAST(subkey AS Double) AS subkey, + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Basic.sql b/yql/essentials/udfs/common/histogram/test/cases/Basic.sql new file mode 100644 index 00000000000..2e080bd2ba7 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Basic.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + HISTOGRAM(key) AS key_histogram, + HISTOGRAM(subkey) AS subkey_histogram, + HISTOGRAM(value) AS value_histogram +FROM ( + SELECT + CAST(key AS Double) AS key, + CAST(subkey AS Double) AS subkey, + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Distinct.sql b/yql/essentials/udfs/common/histogram/test/cases/Distinct.sql new file mode 100644 index 00000000000..347ffe76f26 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Distinct.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +SELECT + HISTOGRAM(DISTINCT key) AS key_histogram, + Histogram::Print(HISTOGRAM(key)) <> Histogram::Print(HISTOGRAM(DISTINCT key)) AS is_different +FROM ( + SELECT + CAST(key AS Double) AS key + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Intervals.sql b/yql/essentials/udfs/common/histogram/test/cases/Intervals.sql new file mode 100644 index 00000000000..eaceab05780 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Intervals.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + HISTOGRAM(key, 1) AS key_histogram, + HISTOGRAM(subkey, 3) AS subkey_histogram, + HISTOGRAM(value, 1000000) AS value_histogram +FROM ( + SELECT + CAST(key AS Double) AS key, + CAST(subkey AS Double) AS subkey, + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Linear.in b/yql/essentials/udfs/common/histogram/test/cases/Linear.in new file mode 100644 index 00000000000..22a04e936f1 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Linear.in @@ -0,0 +1,19 @@ +{"key"="0";"subkey"="0";"value"="-3"}; +{"key"="0";"subkey"="0";"value"="-49"}; +{"key"="2";"subkey"="2";"value"="2"}; +{"key"="3";"subkey"="4";"value"="8"}; +{"key"="4";"subkey"="8";"value"="32"}; +{"key"="5";"subkey"="16";"value"="88"}; +{"key"="5";"subkey"="16";"value"="128"}; +{"key"="6";"subkey"="32";"value"="512"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; +{"key"="0";"subkey"="0";"value"="-2049"}; +{"key"="0";"subkey"="0";"value"="-2149"}; diff --git a/yql/essentials/udfs/common/histogram/test/cases/Linear.sql b/yql/essentials/udfs/common/histogram/test/cases/Linear.sql new file mode 100644 index 00000000000..08af7fc3019 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Linear.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +SELECT + LinearHistogram(value) AS default, + LinearHistogram(value, 33) AS linear_size, + LinearHistogram(value, 100, 100, 1000) AS linear_min_max +FROM ( + SELECT + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in new file mode 100644 index 00000000000..22a04e936f1 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.in @@ -0,0 +1,19 @@ +{"key"="0";"subkey"="0";"value"="-3"}; +{"key"="0";"subkey"="0";"value"="-49"}; +{"key"="2";"subkey"="2";"value"="2"}; +{"key"="3";"subkey"="4";"value"="8"}; +{"key"="4";"subkey"="8";"value"="32"}; +{"key"="5";"subkey"="16";"value"="88"}; +{"key"="5";"subkey"="16";"value"="128"}; +{"key"="6";"subkey"="32";"value"="512"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; +{"key"="0";"subkey"="0";"value"="-2049"}; +{"key"="0";"subkey"="0";"value"="-2149"}; diff --git a/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql new file mode 100644 index 00000000000..216488e91ec --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Logarithmic.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +SELECT + LogarithmicHistogram(value) AS default, + LogHistogram(value, 2) AS log_size, + LogHistogram(value, 0.5, 10, 10000) AS log_min_max +FROM ( + SELECT + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Methods.sql b/yql/essentials/udfs/common/histogram/test/cases/Methods.sql new file mode 100644 index 00000000000..8c351f80708 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Methods.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + Histogram::GetSumAboveBound(histo, 5.0) AS GetSumAboveBound, + Histogram::GetSumBelowBound(histo, 5.0) AS GetSumBelowBound, + Histogram::CalcUpperBound(histo, 5.0) AS CalcUpperBound, + Histogram::CalcLowerBound(histo, 5.0) AS CalcLowerBound, + Histogram::GetSumInRange(histo, 5.0, 20.0) AS GetSumInRange +FROM ( + SELECT + HISTOGRAM(CAST(key AS Double)) AS histo + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Normalize.sql b/yql/essentials/udfs/common/histogram/test/cases/Normalize.sql new file mode 100644 index 00000000000..3dd4a12621a --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Normalize.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + Histogram::Normalize(HISTOGRAM(key)) AS key_histogram, + Histogram::Normalize(HISTOGRAM(subkey), 1.0) AS subkey_histogram, + Histogram::Normalize(HISTOGRAM(value), -1.0) AS value_histogram +FROM ( + SELECT + CAST(key AS Double) AS key, + CAST(subkey AS Double) AS subkey, + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Print.sql b/yql/essentials/udfs/common/histogram/test/cases/Print.sql new file mode 100644 index 00000000000..5434e8dea31 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Print.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + Histogram::Print(HISTOGRAM(key)) AS key_histogram, + Histogram::Print(HISTOGRAM(subkey)) AS subkey_histogram, + Histogram::Print(HISTOGRAM(value), 50) AS value_histogram +FROM ( + SELECT + CAST(key AS Double) AS key, + CAST(subkey AS Double) AS subkey, + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql b/yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql new file mode 100644 index 00000000000..e71db5c68f8 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/ToCumulativeDistributionFunction.sql @@ -0,0 +1,13 @@ +/* syntax version 1 */ +SELECT + Histogram::ToCumulativeDistributionFunction(HISTOGRAM(key)) AS key_histogram, + Histogram::ToCumulativeDistributionFunction(HISTOGRAM(subkey)) AS subkey_histogram, + Histogram::ToCumulativeDistributionFunction(HISTOGRAM(value)) AS value_histogram, + Histogram::Normalize(Histogram::ToCumulativeDistributionFunction(HISTOGRAM(subkey)), 100, True) AS subkey_norm_cdf_histogram +FROM ( + SELECT + CAST(key AS Double) AS key, + CAST(subkey AS Double) AS subkey, + CAST(value AS Double) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/Weights.in b/yql/essentials/udfs/common/histogram/test/cases/Weights.in new file mode 100644 index 00000000000..0f50051d065 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Weights.in @@ -0,0 +1,16 @@ +{"key"="0";"subkey"="1000";"value"="-1"}; +{"key"="1";"subkey"="1";"value"="1"}; +{"key"="2";"subkey"="2";"value"="2"}; +{"key"="3";"subkey"="4";"value"="8"}; +{"key"="4";"subkey"="8";"value"="32"}; +{"key"="5";"subkey"="16";"value"="128"}; +{"key"="6";"subkey"="32";"value"="512"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; diff --git a/yql/essentials/udfs/common/histogram/test/cases/Weights.sql b/yql/essentials/udfs/common/histogram/test/cases/Weights.sql new file mode 100644 index 00000000000..77229008f6e --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/Weights.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + HISTOGRAM(key, value / subkey) AS basic_weight, + HISTOGRAM(key, value, 3) AS weight_and_bins, + Histogram::Print(HISTOGRAM(key)) <> Histogram::Print(HISTOGRAM(key, value)) AS equality_check +FROM ( + SELECT + CAST(key AS Double) AS key, + COALESCE(CAST(subkey AS Double), 1.0) AS subkey, + COALESCE(CAST(value AS Double), 1.0) AS value + FROM Input +); diff --git a/yql/essentials/udfs/common/histogram/test/cases/default.in b/yql/essentials/udfs/common/histogram/test/cases/default.in new file mode 100644 index 00000000000..494ea1402e8 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/cases/default.in @@ -0,0 +1,16 @@ +{"key"="0";"subkey"="0";"value"="-1"}; +{"key"="1";"subkey"="1";"value"="1"}; +{"key"="2";"subkey"="2";"value"="2"}; +{"key"="3";"subkey"="4";"value"="8"}; +{"key"="4";"subkey"="8";"value"="32"}; +{"key"="5";"subkey"="16";"value"="128"}; +{"key"="6";"subkey"="32";"value"="512"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="7";"subkey"="64";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; +{"key"="2";"subkey"="2";"value"="2048"}; +{"key"="3";"subkey"="4";"value"="2048"}; diff --git a/yql/essentials/udfs/common/histogram/test/ya.make b/yql/essentials/udfs/common/histogram/test/ya.make new file mode 100644 index 00000000000..e03673fbdfe --- /dev/null +++ b/yql/essentials/udfs/common/histogram/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +TIMEOUT(600) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +DEPENDS(yql/essentials/udfs/common/histogram) + +END() diff --git a/yql/essentials/udfs/common/histogram/ya.make b/yql/essentials/udfs/common/histogram/ya.make new file mode 100644 index 00000000000..937a3c68356 --- /dev/null +++ b/yql/essentials/udfs/common/histogram/ya.make @@ -0,0 +1,32 @@ +IF (YQL_PACKAGED) + PACKAGE() + + FROM_SANDBOX( + FILE 7319896927 OUT_NOAUTO libhistogram_udf.so + ) + + END() +ELSE() +YQL_UDF_CONTRIB(histogram_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + histogram_udf.cpp + ) + + PEERDIR( + library/cpp/histogram/adaptive + ) + + END() + +ENDIF() + +RECURSE_FOR_TESTS( + test +)
\ No newline at end of file diff --git a/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp new file mode 100644 index 00000000000..348fd69b9dc --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp @@ -0,0 +1,423 @@ +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <library/cpp/hyperloglog/hyperloglog.h> + +#include <util/generic/hash_set.h> + +#include <variant> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + class THybridHyperLogLog { + private: + using THybridSet = THashSet<ui64, std::hash<ui64>, std::equal_to<ui64>, TStdAllocatorForUdf<ui64>>; + using THybridHll = THyperLogLogWithAlloc<TStdAllocatorForUdf<ui8>>; + + explicit THybridHyperLogLog(unsigned precision) + : Var(THybridSet()), SizeLimit((1u << precision) / 8), Precision(precision) + { } + + THybridHll ConvertToHyperLogLog() const { + auto res = THybridHll::Create(Precision); + for (auto& el : GetSetRef()) { + res.Update(el); + } + return res; + } + + bool IsSet() const { + return Var.index() == 1; + } + + const THybridSet& GetSetRef() const { + return std::get<1>(Var); + } + + THybridSet& GetMutableSetRef() { + return std::get<1>(Var); + } + + const THybridHll& GetHllRef() const { + return std::get<0>(Var); + } + + THybridHll& GetMutableHllRef() { + return std::get<0>(Var); + } + + public: + THybridHyperLogLog (THybridHyperLogLog&&) = default; + + THybridHyperLogLog& operator=(THybridHyperLogLog&&) = default; + + void Update(ui64 hash) { + if (IsSet()) { + GetMutableSetRef().insert(hash); + if (GetSetRef().size() >= SizeLimit) { + Var = ConvertToHyperLogLog(); + } + } else { + GetMutableHllRef().Update(hash); + } + } + + void Merge(const THybridHyperLogLog& rh) { + if (IsSet() && rh.IsSet()) { + GetMutableSetRef().insert(rh.GetSetRef().begin(), rh.GetSetRef().end()); + if (GetSetRef().size() >= SizeLimit) { + Var = ConvertToHyperLogLog(); + } + } else { + if (IsSet()) { + Var = ConvertToHyperLogLog(); + } + if (rh.IsSet()) { + GetMutableHllRef().Merge(rh.ConvertToHyperLogLog()); + } else { + GetMutableHllRef().Merge(rh.GetHllRef()); + } + } + } + + void Save(IOutputStream& out) const { + out.Write(static_cast<char>(Var.index())); + out.Write(static_cast<char>(Precision)); + if (IsSet()) { + ::Save(&out, GetSetRef()); + } else { + GetHllRef().Save(out); + } + } + + ui64 Estimate() const { + if (IsSet()) { + return GetSetRef().size(); + } + return GetHllRef().Estimate(); + } + + static THybridHyperLogLog Create(unsigned precision) { + Y_ENSURE(precision >= THyperLogLog::PRECISION_MIN && precision <= THyperLogLog::PRECISION_MAX); + return THybridHyperLogLog(precision); + } + + static THybridHyperLogLog Load(IInputStream& in) { + char type; + Y_ENSURE(in.ReadChar(type)); + char precision; + Y_ENSURE(in.ReadChar(precision)); + auto res = Create(precision); + if (type) { + ::Load(&in, res.GetMutableSetRef()); + } else { + res.Var = THybridHll::Load(in); + } + return res; + } + + private: + std::variant<THybridHll, THybridSet> Var; + + size_t SizeLimit; + + unsigned Precision; + }; + + extern const char HyperLogLogResourceName[] = "HyperLogLog.State"; + + using THyperLogLogResource = TBoxedResource<THybridHyperLogLog, HyperLogLogResourceName>; + + class THyperLogLog_Create: public TBoxedValue { + public: + THyperLogLog_Create(TSourcePosition pos) + : Pos_(pos) + {} + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Create"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Create(args[1].Get<ui32>()))); + hll->Get()->Update(args[0].Get<ui64>()); + return TUnboxedValuePod(hll.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(ui64, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLog_Create(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + class THyperLogLog_AddValue: public TBoxedValue { + public: + THyperLogLog_AddValue(TSourcePosition pos) + : Pos_(pos) + {} + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("AddValue"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THyperLogLogResource* resource = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get()); + resource->Get()->Update(args[1].Get<ui64>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, ui64)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLog_AddValue(builder.GetSourcePosition())); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + class THyperLogLog_Serialize: public TBoxedValue { + public: + THyperLogLog_Serialize(TSourcePosition pos) + : Pos_(pos) + {} + + public: + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Serialize"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + TStringStream result; + static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get()->Save(result); + return valueBuilder->NewString(result.Str()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<char*(TResource<HyperLogLogResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLog_Serialize(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + class THyperLogLog_Deserialize: public TBoxedValue { + public: + THyperLogLog_Deserialize(TSourcePosition pos) + : Pos_(pos) + {} + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Deserialize"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + const TString arg(args[0].AsStringRef()); + TStringInput input(arg); + THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Load(input))); + return TUnboxedValuePod(hll.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(char*)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLog_Deserialize(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + class THyperLogLog_Merge: public TBoxedValue { + public: + THyperLogLog_Merge(TSourcePosition pos) + : Pos_(pos) + {} + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Merge"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + auto left = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); + static_cast<THyperLogLogResource*>(args[1].AsBoxed().Get())->Get()->Merge(*left); + return TUnboxedValuePod(args[1]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, TResource<HyperLogLogResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLog_Merge(builder.GetSourcePosition())); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + class THyperLogLog_GetResult: public TBoxedValue { + public: + THyperLogLog_GetResult(TSourcePosition pos) + : Pos_(pos) + {} + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("GetResult"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + auto hll = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); + return TUnboxedValuePod(hll->Estimate()); + } + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto resource = builder.Resource(HyperLogLogResourceName); + builder.Args()->Add(resource).Done().Returns<ui64>(); + + if (!typesOnly) { + builder.Implementation(new THyperLogLog_GetResult(builder.GetSourcePosition())); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + + private: + TSourcePosition Pos_; + }; + + SIMPLE_MODULE(THyperLogLogModule, + THyperLogLog_Create, + THyperLogLog_AddValue, + THyperLogLog_Serialize, + THyperLogLog_Deserialize, + THyperLogLog_Merge, + THyperLogLog_GetResult) +} + +REGISTER_MODULES(THyperLogLogModule) diff --git a/yql/essentials/udfs/common/hyperloglog/test/canondata/result.json b/yql/essentials/udfs/common/hyperloglog/test/canondata/result.json new file mode 100644 index 00000000000..fb6112fc5bc --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/test/canondata/result.json @@ -0,0 +1,7 @@ +{ + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..8a7a259e2de --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "str"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "double"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "int"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "5972" + ]; + [ + "1200" + ]; + [ + "5988" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in new file mode 100644 index 00000000000..d212651343d --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.in @@ -0,0 +1,6000 @@ +{"key"="-1000";"subkey"="-200";"value"="-11512"}; +{"key"="-999";"subkey"="-200";"value"="-11501"}; +{"key"="-998";"subkey"="-200";"value"="-11489"}; +{"key"="-997";"subkey"="-200";"value"="-11478"}; +{"key"="-996";"subkey"="-200";"value"="-11466"}; +{"key"="-995";"subkey"="-199";"value"="-11455"}; +{"key"="-994";"subkey"="-199";"value"="-11443"}; +{"key"="-993";"subkey"="-199";"value"="-11432"}; +{"key"="-992";"subkey"="-199";"value"="-11420"}; +{"key"="-991";"subkey"="-199";"value"="-11409"}; +{"key"="-990";"subkey"="-198";"value"="-11397"}; +{"key"="-989";"subkey"="-198";"value"="-11386"}; +{"key"="-988";"subkey"="-198";"value"="-11374"}; +{"key"="-987";"subkey"="-198";"value"="-11363"}; +{"key"="-986";"subkey"="-198";"value"="-11351"}; +{"key"="-985";"subkey"="-197";"value"="-11340"}; +{"key"="-984";"subkey"="-197";"value"="-11328"}; +{"key"="-983";"subkey"="-197";"value"="-11317"}; +{"key"="-982";"subkey"="-197";"value"="-11305"}; +{"key"="-981";"subkey"="-197";"value"="-11294"}; +{"key"="-980";"subkey"="-196";"value"="-11282"}; +{"key"="-979";"subkey"="-196";"value"="-11271"}; +{"key"="-978";"subkey"="-196";"value"="-11259"}; +{"key"="-977";"subkey"="-196";"value"="-11248"}; +{"key"="-976";"subkey"="-196";"value"="-11236"}; +{"key"="-975";"subkey"="-195";"value"="-11225"}; +{"key"="-974";"subkey"="-195";"value"="-11213"}; +{"key"="-973";"subkey"="-195";"value"="-11202"}; +{"key"="-972";"subkey"="-195";"value"="-11190"}; +{"key"="-971";"subkey"="-195";"value"="-11179"}; +{"key"="-970";"subkey"="-194";"value"="-11167"}; +{"key"="-969";"subkey"="-194";"value"="-11156"}; +{"key"="-968";"subkey"="-194";"value"="-11144"}; +{"key"="-967";"subkey"="-194";"value"="-11132"}; +{"key"="-966";"subkey"="-194";"value"="-11121"}; +{"key"="-965";"subkey"="-193";"value"="-11109"}; +{"key"="-964";"subkey"="-193";"value"="-11098"}; +{"key"="-963";"subkey"="-193";"value"="-11086"}; +{"key"="-962";"subkey"="-193";"value"="-11075"}; +{"key"="-961";"subkey"="-193";"value"="-11063"}; +{"key"="-960";"subkey"="-192";"value"="-11052"}; +{"key"="-959";"subkey"="-192";"value"="-11040"}; +{"key"="-958";"subkey"="-192";"value"="-11029"}; +{"key"="-957";"subkey"="-192";"value"="-11017"}; +{"key"="-956";"subkey"="-192";"value"="-11006"}; +{"key"="-955";"subkey"="-191";"value"="-10994"}; +{"key"="-954";"subkey"="-191";"value"="-10983"}; +{"key"="-953";"subkey"="-191";"value"="-10971"}; +{"key"="-952";"subkey"="-191";"value"="-10960"}; +{"key"="-951";"subkey"="-191";"value"="-10948"}; +{"key"="-950";"subkey"="-190";"value"="-10937"}; +{"key"="-949";"subkey"="-190";"value"="-10925"}; +{"key"="-948";"subkey"="-190";"value"="-10914"}; +{"key"="-947";"subkey"="-190";"value"="-10902"}; +{"key"="-946";"subkey"="-190";"value"="-10891"}; +{"key"="-945";"subkey"="-189";"value"="-10879"}; +{"key"="-944";"subkey"="-189";"value"="-10868"}; +{"key"="-943";"subkey"="-189";"value"="-10856"}; +{"key"="-942";"subkey"="-189";"value"="-10845"}; +{"key"="-941";"subkey"="-189";"value"="-10833"}; +{"key"="-940";"subkey"="-188";"value"="-10822"}; +{"key"="-939";"subkey"="-188";"value"="-10810"}; +{"key"="-938";"subkey"="-188";"value"="-10799"}; +{"key"="-937";"subkey"="-188";"value"="-10787"}; +{"key"="-936";"subkey"="-188";"value"="-10776"}; +{"key"="-935";"subkey"="-187";"value"="-10764"}; +{"key"="-934";"subkey"="-187";"value"="-10753"}; +{"key"="-933";"subkey"="-187";"value"="-10741"}; +{"key"="-932";"subkey"="-187";"value"="-10730"}; +{"key"="-931";"subkey"="-187";"value"="-10718"}; +{"key"="-930";"subkey"="-186";"value"="-10707"}; +{"key"="-929";"subkey"="-186";"value"="-10695"}; +{"key"="-928";"subkey"="-186";"value"="-10683"}; +{"key"="-927";"subkey"="-186";"value"="-10672"}; +{"key"="-926";"subkey"="-186";"value"="-10660"}; +{"key"="-925";"subkey"="-185";"value"="-10649"}; +{"key"="-924";"subkey"="-185";"value"="-10637"}; +{"key"="-923";"subkey"="-185";"value"="-10626"}; +{"key"="-922";"subkey"="-185";"value"="-10614"}; +{"key"="-921";"subkey"="-185";"value"="-10603"}; +{"key"="-920";"subkey"="-184";"value"="-10591"}; +{"key"="-919";"subkey"="-184";"value"="-10580"}; +{"key"="-918";"subkey"="-184";"value"="-10568"}; +{"key"="-917";"subkey"="-184";"value"="-10557"}; +{"key"="-916";"subkey"="-184";"value"="-10545"}; +{"key"="-915";"subkey"="-183";"value"="-10534"}; +{"key"="-914";"subkey"="-183";"value"="-10522"}; +{"key"="-913";"subkey"="-183";"value"="-10511"}; +{"key"="-912";"subkey"="-183";"value"="-10499"}; +{"key"="-911";"subkey"="-183";"value"="-10488"}; +{"key"="-910";"subkey"="-182";"value"="-10476"}; +{"key"="-909";"subkey"="-182";"value"="-10465"}; +{"key"="-908";"subkey"="-182";"value"="-10453"}; +{"key"="-907";"subkey"="-182";"value"="-10442"}; +{"key"="-906";"subkey"="-182";"value"="-10430"}; +{"key"="-905";"subkey"="-181";"value"="-10419"}; +{"key"="-904";"subkey"="-181";"value"="-10407"}; +{"key"="-903";"subkey"="-181";"value"="-10396"}; +{"key"="-902";"subkey"="-181";"value"="-10384"}; +{"key"="-901";"subkey"="-181";"value"="-10373"}; +{"key"="-900";"subkey"="-180";"value"="-10361"}; +{"key"="-899";"subkey"="-180";"value"="-10350"}; +{"key"="-898";"subkey"="-180";"value"="-10338"}; +{"key"="-897";"subkey"="-180";"value"="-10327"}; +{"key"="-896";"subkey"="-180";"value"="-10315"}; +{"key"="-895";"subkey"="-179";"value"="-10304"}; +{"key"="-894";"subkey"="-179";"value"="-10292"}; +{"key"="-893";"subkey"="-179";"value"="-10281"}; +{"key"="-892";"subkey"="-179";"value"="-10269"}; +{"key"="-891";"subkey"="-179";"value"="-10258"}; +{"key"="-890";"subkey"="-178";"value"="-10246"}; +{"key"="-889";"subkey"="-178";"value"="-10234"}; +{"key"="-888";"subkey"="-178";"value"="-10223"}; +{"key"="-887";"subkey"="-178";"value"="-10211"}; +{"key"="-886";"subkey"="-178";"value"="-10200"}; +{"key"="-885";"subkey"="-177";"value"="-10188"}; +{"key"="-884";"subkey"="-177";"value"="-10177"}; +{"key"="-883";"subkey"="-177";"value"="-10165"}; +{"key"="-882";"subkey"="-177";"value"="-10154"}; +{"key"="-881";"subkey"="-177";"value"="-10142"}; +{"key"="-880";"subkey"="-176";"value"="-10131"}; +{"key"="-879";"subkey"="-176";"value"="-10119"}; +{"key"="-878";"subkey"="-176";"value"="-10108"}; +{"key"="-877";"subkey"="-176";"value"="-10096"}; +{"key"="-876";"subkey"="-176";"value"="-10085"}; +{"key"="-875";"subkey"="-175";"value"="-10073"}; +{"key"="-874";"subkey"="-175";"value"="-10062"}; +{"key"="-873";"subkey"="-175";"value"="-10050"}; +{"key"="-872";"subkey"="-175";"value"="-10039"}; +{"key"="-871";"subkey"="-175";"value"="-10027"}; +{"key"="-870";"subkey"="-174";"value"="-10016"}; +{"key"="-869";"subkey"="-174";"value"="-10004"}; +{"key"="-868";"subkey"="-174";"value"="-9993"}; +{"key"="-867";"subkey"="-174";"value"="-9981"}; +{"key"="-866";"subkey"="-174";"value"="-9970"}; +{"key"="-865";"subkey"="-173";"value"="-9958"}; +{"key"="-864";"subkey"="-173";"value"="-9947"}; +{"key"="-863";"subkey"="-173";"value"="-9935"}; +{"key"="-862";"subkey"="-173";"value"="-9924"}; +{"key"="-861";"subkey"="-173";"value"="-9912"}; +{"key"="-860";"subkey"="-172";"value"="-9901"}; +{"key"="-859";"subkey"="-172";"value"="-9889"}; +{"key"="-858";"subkey"="-172";"value"="-9878"}; +{"key"="-857";"subkey"="-172";"value"="-9866"}; +{"key"="-856";"subkey"="-172";"value"="-9855"}; +{"key"="-855";"subkey"="-171";"value"="-9843"}; +{"key"="-854";"subkey"="-171";"value"="-9832"}; +{"key"="-853";"subkey"="-171";"value"="-9820"}; +{"key"="-852";"subkey"="-171";"value"="-9809"}; +{"key"="-851";"subkey"="-171";"value"="-9797"}; +{"key"="-850";"subkey"="-170";"value"="-9785"}; +{"key"="-849";"subkey"="-170";"value"="-9774"}; +{"key"="-848";"subkey"="-170";"value"="-9762"}; +{"key"="-847";"subkey"="-170";"value"="-9751"}; +{"key"="-846";"subkey"="-170";"value"="-9739"}; +{"key"="-845";"subkey"="-169";"value"="-9728"}; +{"key"="-844";"subkey"="-169";"value"="-9716"}; +{"key"="-843";"subkey"="-169";"value"="-9705"}; +{"key"="-842";"subkey"="-169";"value"="-9693"}; +{"key"="-841";"subkey"="-169";"value"="-9682"}; +{"key"="-840";"subkey"="-168";"value"="-9670"}; +{"key"="-839";"subkey"="-168";"value"="-9659"}; +{"key"="-838";"subkey"="-168";"value"="-9647"}; +{"key"="-837";"subkey"="-168";"value"="-9636"}; +{"key"="-836";"subkey"="-168";"value"="-9624"}; +{"key"="-835";"subkey"="-167";"value"="-9613"}; +{"key"="-834";"subkey"="-167";"value"="-9601"}; +{"key"="-833";"subkey"="-167";"value"="-9590"}; +{"key"="-832";"subkey"="-167";"value"="-9578"}; +{"key"="-831";"subkey"="-167";"value"="-9567"}; +{"key"="-830";"subkey"="-166";"value"="-9555"}; +{"key"="-829";"subkey"="-166";"value"="-9544"}; +{"key"="-828";"subkey"="-166";"value"="-9532"}; +{"key"="-827";"subkey"="-166";"value"="-9521"}; +{"key"="-826";"subkey"="-166";"value"="-9509"}; +{"key"="-825";"subkey"="-165";"value"="-9498"}; +{"key"="-824";"subkey"="-165";"value"="-9486"}; +{"key"="-823";"subkey"="-165";"value"="-9475"}; +{"key"="-822";"subkey"="-165";"value"="-9463"}; +{"key"="-821";"subkey"="-165";"value"="-9452"}; +{"key"="-820";"subkey"="-164";"value"="-9440"}; +{"key"="-819";"subkey"="-164";"value"="-9429"}; +{"key"="-818";"subkey"="-164";"value"="-9417"}; +{"key"="-817";"subkey"="-164";"value"="-9406"}; +{"key"="-816";"subkey"="-164";"value"="-9394"}; +{"key"="-815";"subkey"="-163";"value"="-9383"}; +{"key"="-814";"subkey"="-163";"value"="-9371"}; +{"key"="-813";"subkey"="-163";"value"="-9360"}; +{"key"="-812";"subkey"="-163";"value"="-9348"}; +{"key"="-811";"subkey"="-163";"value"="-9336"}; +{"key"="-810";"subkey"="-162";"value"="-9325"}; +{"key"="-809";"subkey"="-162";"value"="-9313"}; +{"key"="-808";"subkey"="-162";"value"="-9302"}; +{"key"="-807";"subkey"="-162";"value"="-9290"}; +{"key"="-806";"subkey"="-162";"value"="-9279"}; +{"key"="-805";"subkey"="-161";"value"="-9267"}; +{"key"="-804";"subkey"="-161";"value"="-9256"}; +{"key"="-803";"subkey"="-161";"value"="-9244"}; +{"key"="-802";"subkey"="-161";"value"="-9233"}; +{"key"="-801";"subkey"="-161";"value"="-9221"}; +{"key"="-800";"subkey"="-160";"value"="-9210"}; +{"key"="-799";"subkey"="-160";"value"="-9198"}; +{"key"="-798";"subkey"="-160";"value"="-9187"}; +{"key"="-797";"subkey"="-160";"value"="-9175"}; +{"key"="-796";"subkey"="-160";"value"="-9164"}; +{"key"="-795";"subkey"="-159";"value"="-9152"}; +{"key"="-794";"subkey"="-159";"value"="-9141"}; +{"key"="-793";"subkey"="-159";"value"="-9129"}; +{"key"="-792";"subkey"="-159";"value"="-9118"}; +{"key"="-791";"subkey"="-159";"value"="-9106"}; +{"key"="-790";"subkey"="-158";"value"="-9095"}; +{"key"="-789";"subkey"="-158";"value"="-9083"}; +{"key"="-788";"subkey"="-158";"value"="-9072"}; +{"key"="-787";"subkey"="-158";"value"="-9060"}; +{"key"="-786";"subkey"="-158";"value"="-9049"}; +{"key"="-785";"subkey"="-157";"value"="-9037"}; +{"key"="-784";"subkey"="-157";"value"="-9026"}; +{"key"="-783";"subkey"="-157";"value"="-9014"}; +{"key"="-782";"subkey"="-157";"value"="-9003"}; +{"key"="-781";"subkey"="-157";"value"="-8991"}; +{"key"="-780";"subkey"="-156";"value"="-8980"}; +{"key"="-779";"subkey"="-156";"value"="-8968"}; +{"key"="-778";"subkey"="-156";"value"="-8957"}; +{"key"="-777";"subkey"="-156";"value"="-8945"}; +{"key"="-776";"subkey"="-156";"value"="-8934"}; +{"key"="-775";"subkey"="-155";"value"="-8922"}; +{"key"="-774";"subkey"="-155";"value"="-8911"}; +{"key"="-773";"subkey"="-155";"value"="-8899"}; +{"key"="-772";"subkey"="-155";"value"="-8887"}; +{"key"="-771";"subkey"="-155";"value"="-8876"}; +{"key"="-770";"subkey"="-154";"value"="-8864"}; +{"key"="-769";"subkey"="-154";"value"="-8853"}; +{"key"="-768";"subkey"="-154";"value"="-8841"}; +{"key"="-767";"subkey"="-154";"value"="-8830"}; +{"key"="-766";"subkey"="-154";"value"="-8818"}; +{"key"="-765";"subkey"="-153";"value"="-8807"}; +{"key"="-764";"subkey"="-153";"value"="-8795"}; +{"key"="-763";"subkey"="-153";"value"="-8784"}; +{"key"="-762";"subkey"="-153";"value"="-8772"}; +{"key"="-761";"subkey"="-153";"value"="-8761"}; +{"key"="-760";"subkey"="-152";"value"="-8749"}; +{"key"="-759";"subkey"="-152";"value"="-8738"}; +{"key"="-758";"subkey"="-152";"value"="-8726"}; +{"key"="-757";"subkey"="-152";"value"="-8715"}; +{"key"="-756";"subkey"="-152";"value"="-8703"}; +{"key"="-755";"subkey"="-151";"value"="-8692"}; +{"key"="-754";"subkey"="-151";"value"="-8680"}; +{"key"="-753";"subkey"="-151";"value"="-8669"}; +{"key"="-752";"subkey"="-151";"value"="-8657"}; +{"key"="-751";"subkey"="-151";"value"="-8646"}; +{"key"="-750";"subkey"="-150";"value"="-8634"}; +{"key"="-749";"subkey"="-150";"value"="-8623"}; +{"key"="-748";"subkey"="-150";"value"="-8611"}; +{"key"="-747";"subkey"="-150";"value"="-8600"}; +{"key"="-746";"subkey"="-150";"value"="-8588"}; +{"key"="-745";"subkey"="-149";"value"="-8577"}; +{"key"="-744";"subkey"="-149";"value"="-8565"}; +{"key"="-743";"subkey"="-149";"value"="-8554"}; +{"key"="-742";"subkey"="-149";"value"="-8542"}; +{"key"="-741";"subkey"="-149";"value"="-8531"}; +{"key"="-740";"subkey"="-148";"value"="-8519"}; +{"key"="-739";"subkey"="-148";"value"="-8508"}; +{"key"="-738";"subkey"="-148";"value"="-8496"}; +{"key"="-737";"subkey"="-148";"value"="-8485"}; +{"key"="-736";"subkey"="-148";"value"="-8473"}; +{"key"="-735";"subkey"="-147";"value"="-8462"}; +{"key"="-734";"subkey"="-147";"value"="-8450"}; +{"key"="-733";"subkey"="-147";"value"="-8438"}; +{"key"="-732";"subkey"="-147";"value"="-8427"}; +{"key"="-731";"subkey"="-147";"value"="-8415"}; +{"key"="-730";"subkey"="-146";"value"="-8404"}; +{"key"="-729";"subkey"="-146";"value"="-8392"}; +{"key"="-728";"subkey"="-146";"value"="-8381"}; +{"key"="-727";"subkey"="-146";"value"="-8369"}; +{"key"="-726";"subkey"="-146";"value"="-8358"}; +{"key"="-725";"subkey"="-145";"value"="-8346"}; +{"key"="-724";"subkey"="-145";"value"="-8335"}; +{"key"="-723";"subkey"="-145";"value"="-8323"}; +{"key"="-722";"subkey"="-145";"value"="-8312"}; +{"key"="-721";"subkey"="-145";"value"="-8300"}; +{"key"="-720";"subkey"="-144";"value"="-8289"}; +{"key"="-719";"subkey"="-144";"value"="-8277"}; +{"key"="-718";"subkey"="-144";"value"="-8266"}; +{"key"="-717";"subkey"="-144";"value"="-8254"}; +{"key"="-716";"subkey"="-144";"value"="-8243"}; +{"key"="-715";"subkey"="-143";"value"="-8231"}; +{"key"="-714";"subkey"="-143";"value"="-8220"}; +{"key"="-713";"subkey"="-143";"value"="-8208"}; +{"key"="-712";"subkey"="-143";"value"="-8197"}; +{"key"="-711";"subkey"="-143";"value"="-8185"}; +{"key"="-710";"subkey"="-142";"value"="-8174"}; +{"key"="-709";"subkey"="-142";"value"="-8162"}; +{"key"="-708";"subkey"="-142";"value"="-8151"}; +{"key"="-707";"subkey"="-142";"value"="-8139"}; +{"key"="-706";"subkey"="-142";"value"="-8128"}; +{"key"="-705";"subkey"="-141";"value"="-8116"}; +{"key"="-704";"subkey"="-141";"value"="-8105"}; +{"key"="-703";"subkey"="-141";"value"="-8093"}; +{"key"="-702";"subkey"="-141";"value"="-8082"}; +{"key"="-701";"subkey"="-141";"value"="-8070"}; +{"key"="-700";"subkey"="-140";"value"="-8059"}; +{"key"="-699";"subkey"="-140";"value"="-8047"}; +{"key"="-698";"subkey"="-140";"value"="-8036"}; +{"key"="-697";"subkey"="-140";"value"="-8024"}; +{"key"="-696";"subkey"="-140";"value"="-8012"}; +{"key"="-695";"subkey"="-139";"value"="-8001"}; +{"key"="-694";"subkey"="-139";"value"="-7989"}; +{"key"="-693";"subkey"="-139";"value"="-7978"}; +{"key"="-692";"subkey"="-139";"value"="-7966"}; +{"key"="-691";"subkey"="-139";"value"="-7955"}; +{"key"="-690";"subkey"="-138";"value"="-7943"}; +{"key"="-689";"subkey"="-138";"value"="-7932"}; +{"key"="-688";"subkey"="-138";"value"="-7920"}; +{"key"="-687";"subkey"="-138";"value"="-7909"}; +{"key"="-686";"subkey"="-138";"value"="-7897"}; +{"key"="-685";"subkey"="-137";"value"="-7886"}; +{"key"="-684";"subkey"="-137";"value"="-7874"}; +{"key"="-683";"subkey"="-137";"value"="-7863"}; +{"key"="-682";"subkey"="-137";"value"="-7851"}; +{"key"="-681";"subkey"="-137";"value"="-7840"}; +{"key"="-680";"subkey"="-136";"value"="-7828"}; +{"key"="-679";"subkey"="-136";"value"="-7817"}; +{"key"="-678";"subkey"="-136";"value"="-7805"}; +{"key"="-677";"subkey"="-136";"value"="-7794"}; +{"key"="-676";"subkey"="-136";"value"="-7782"}; +{"key"="-675";"subkey"="-135";"value"="-7771"}; +{"key"="-674";"subkey"="-135";"value"="-7759"}; +{"key"="-673";"subkey"="-135";"value"="-7748"}; +{"key"="-672";"subkey"="-135";"value"="-7736"}; +{"key"="-671";"subkey"="-135";"value"="-7725"}; +{"key"="-670";"subkey"="-134";"value"="-7713"}; +{"key"="-669";"subkey"="-134";"value"="-7702"}; +{"key"="-668";"subkey"="-134";"value"="-7690"}; +{"key"="-667";"subkey"="-134";"value"="-7679"}; +{"key"="-666";"subkey"="-134";"value"="-7667"}; +{"key"="-665";"subkey"="-133";"value"="-7656"}; +{"key"="-664";"subkey"="-133";"value"="-7644"}; +{"key"="-663";"subkey"="-133";"value"="-7633"}; +{"key"="-662";"subkey"="-133";"value"="-7621"}; +{"key"="-661";"subkey"="-133";"value"="-7610"}; +{"key"="-660";"subkey"="-132";"value"="-7598"}; +{"key"="-659";"subkey"="-132";"value"="-7587"}; +{"key"="-658";"subkey"="-132";"value"="-7575"}; +{"key"="-657";"subkey"="-132";"value"="-7563"}; +{"key"="-656";"subkey"="-132";"value"="-7552"}; +{"key"="-655";"subkey"="-131";"value"="-7540"}; +{"key"="-654";"subkey"="-131";"value"="-7529"}; +{"key"="-653";"subkey"="-131";"value"="-7517"}; +{"key"="-652";"subkey"="-131";"value"="-7506"}; +{"key"="-651";"subkey"="-131";"value"="-7494"}; +{"key"="-650";"subkey"="-130";"value"="-7483"}; +{"key"="-649";"subkey"="-130";"value"="-7471"}; +{"key"="-648";"subkey"="-130";"value"="-7460"}; +{"key"="-647";"subkey"="-130";"value"="-7448"}; +{"key"="-646";"subkey"="-130";"value"="-7437"}; +{"key"="-645";"subkey"="-129";"value"="-7425"}; +{"key"="-644";"subkey"="-129";"value"="-7414"}; +{"key"="-643";"subkey"="-129";"value"="-7402"}; +{"key"="-642";"subkey"="-129";"value"="-7391"}; +{"key"="-641";"subkey"="-129";"value"="-7379"}; +{"key"="-640";"subkey"="-128";"value"="-7368"}; +{"key"="-639";"subkey"="-128";"value"="-7356"}; +{"key"="-638";"subkey"="-128";"value"="-7345"}; +{"key"="-637";"subkey"="-128";"value"="-7333"}; +{"key"="-636";"subkey"="-128";"value"="-7322"}; +{"key"="-635";"subkey"="-127";"value"="-7310"}; +{"key"="-634";"subkey"="-127";"value"="-7299"}; +{"key"="-633";"subkey"="-127";"value"="-7287"}; +{"key"="-632";"subkey"="-127";"value"="-7276"}; +{"key"="-631";"subkey"="-127";"value"="-7264"}; +{"key"="-630";"subkey"="-126";"value"="-7253"}; +{"key"="-629";"subkey"="-126";"value"="-7241"}; +{"key"="-628";"subkey"="-126";"value"="-7230"}; +{"key"="-627";"subkey"="-126";"value"="-7218"}; +{"key"="-626";"subkey"="-126";"value"="-7207"}; +{"key"="-625";"subkey"="-125";"value"="-7195"}; +{"key"="-624";"subkey"="-125";"value"="-7184"}; +{"key"="-623";"subkey"="-125";"value"="-7172"}; +{"key"="-622";"subkey"="-125";"value"="-7161"}; +{"key"="-621";"subkey"="-125";"value"="-7149"}; +{"key"="-620";"subkey"="-124";"value"="-7138"}; +{"key"="-619";"subkey"="-124";"value"="-7126"}; +{"key"="-618";"subkey"="-124";"value"="-7114"}; +{"key"="-617";"subkey"="-124";"value"="-7103"}; +{"key"="-616";"subkey"="-124";"value"="-7091"}; +{"key"="-615";"subkey"="-123";"value"="-7080"}; +{"key"="-614";"subkey"="-123";"value"="-7068"}; +{"key"="-613";"subkey"="-123";"value"="-7057"}; +{"key"="-612";"subkey"="-123";"value"="-7045"}; +{"key"="-611";"subkey"="-123";"value"="-7034"}; +{"key"="-610";"subkey"="-122";"value"="-7022"}; +{"key"="-609";"subkey"="-122";"value"="-7011"}; +{"key"="-608";"subkey"="-122";"value"="-6999"}; +{"key"="-607";"subkey"="-122";"value"="-6988"}; +{"key"="-606";"subkey"="-122";"value"="-6976"}; +{"key"="-605";"subkey"="-121";"value"="-6965"}; +{"key"="-604";"subkey"="-121";"value"="-6953"}; +{"key"="-603";"subkey"="-121";"value"="-6942"}; +{"key"="-602";"subkey"="-121";"value"="-6930"}; +{"key"="-601";"subkey"="-121";"value"="-6919"}; +{"key"="-600";"subkey"="-120";"value"="-6907"}; +{"key"="-599";"subkey"="-120";"value"="-6896"}; +{"key"="-598";"subkey"="-120";"value"="-6884"}; +{"key"="-597";"subkey"="-120";"value"="-6873"}; +{"key"="-596";"subkey"="-120";"value"="-6861"}; +{"key"="-595";"subkey"="-119";"value"="-6850"}; +{"key"="-594";"subkey"="-119";"value"="-6838"}; +{"key"="-593";"subkey"="-119";"value"="-6827"}; +{"key"="-592";"subkey"="-119";"value"="-6815"}; +{"key"="-591";"subkey"="-119";"value"="-6804"}; +{"key"="-590";"subkey"="-118";"value"="-6792"}; +{"key"="-589";"subkey"="-118";"value"="-6781"}; +{"key"="-588";"subkey"="-118";"value"="-6769"}; +{"key"="-587";"subkey"="-118";"value"="-6758"}; +{"key"="-586";"subkey"="-118";"value"="-6746"}; +{"key"="-585";"subkey"="-117";"value"="-6735"}; +{"key"="-584";"subkey"="-117";"value"="-6723"}; +{"key"="-583";"subkey"="-117";"value"="-6712"}; +{"key"="-582";"subkey"="-117";"value"="-6700"}; +{"key"="-581";"subkey"="-117";"value"="-6689"}; +{"key"="-580";"subkey"="-116";"value"="-6677"}; +{"key"="-579";"subkey"="-116";"value"="-6665"}; +{"key"="-578";"subkey"="-116";"value"="-6654"}; +{"key"="-577";"subkey"="-116";"value"="-6642"}; +{"key"="-576";"subkey"="-116";"value"="-6631"}; +{"key"="-575";"subkey"="-115";"value"="-6619"}; +{"key"="-574";"subkey"="-115";"value"="-6608"}; +{"key"="-573";"subkey"="-115";"value"="-6596"}; +{"key"="-572";"subkey"="-115";"value"="-6585"}; +{"key"="-571";"subkey"="-115";"value"="-6573"}; +{"key"="-570";"subkey"="-114";"value"="-6562"}; +{"key"="-569";"subkey"="-114";"value"="-6550"}; +{"key"="-568";"subkey"="-114";"value"="-6539"}; +{"key"="-567";"subkey"="-114";"value"="-6527"}; +{"key"="-566";"subkey"="-114";"value"="-6516"}; +{"key"="-565";"subkey"="-113";"value"="-6504"}; +{"key"="-564";"subkey"="-113";"value"="-6493"}; +{"key"="-563";"subkey"="-113";"value"="-6481"}; +{"key"="-562";"subkey"="-113";"value"="-6470"}; +{"key"="-561";"subkey"="-113";"value"="-6458"}; +{"key"="-560";"subkey"="-112";"value"="-6447"}; +{"key"="-559";"subkey"="-112";"value"="-6435"}; +{"key"="-558";"subkey"="-112";"value"="-6424"}; +{"key"="-557";"subkey"="-112";"value"="-6412"}; +{"key"="-556";"subkey"="-112";"value"="-6401"}; +{"key"="-555";"subkey"="-111";"value"="-6389"}; +{"key"="-554";"subkey"="-111";"value"="-6378"}; +{"key"="-553";"subkey"="-111";"value"="-6366"}; +{"key"="-552";"subkey"="-111";"value"="-6355"}; +{"key"="-551";"subkey"="-111";"value"="-6343"}; +{"key"="-550";"subkey"="-110";"value"="-6332"}; +{"key"="-549";"subkey"="-110";"value"="-6320"}; +{"key"="-548";"subkey"="-110";"value"="-6309"}; +{"key"="-547";"subkey"="-110";"value"="-6297"}; +{"key"="-546";"subkey"="-110";"value"="-6286"}; +{"key"="-545";"subkey"="-109";"value"="-6274"}; +{"key"="-544";"subkey"="-109";"value"="-6263"}; +{"key"="-543";"subkey"="-109";"value"="-6251"}; +{"key"="-542";"subkey"="-109";"value"="-6240"}; +{"key"="-541";"subkey"="-109";"value"="-6228"}; +{"key"="-540";"subkey"="-108";"value"="-6216"}; +{"key"="-539";"subkey"="-108";"value"="-6205"}; +{"key"="-538";"subkey"="-108";"value"="-6193"}; +{"key"="-537";"subkey"="-108";"value"="-6182"}; +{"key"="-536";"subkey"="-108";"value"="-6170"}; +{"key"="-535";"subkey"="-107";"value"="-6159"}; +{"key"="-534";"subkey"="-107";"value"="-6147"}; +{"key"="-533";"subkey"="-107";"value"="-6136"}; +{"key"="-532";"subkey"="-107";"value"="-6124"}; +{"key"="-531";"subkey"="-107";"value"="-6113"}; +{"key"="-530";"subkey"="-106";"value"="-6101"}; +{"key"="-529";"subkey"="-106";"value"="-6090"}; +{"key"="-528";"subkey"="-106";"value"="-6078"}; +{"key"="-527";"subkey"="-106";"value"="-6067"}; +{"key"="-526";"subkey"="-106";"value"="-6055"}; +{"key"="-525";"subkey"="-105";"value"="-6044"}; +{"key"="-524";"subkey"="-105";"value"="-6032"}; +{"key"="-523";"subkey"="-105";"value"="-6021"}; +{"key"="-522";"subkey"="-105";"value"="-6009"}; +{"key"="-521";"subkey"="-105";"value"="-5998"}; +{"key"="-520";"subkey"="-104";"value"="-5986"}; +{"key"="-519";"subkey"="-104";"value"="-5975"}; +{"key"="-518";"subkey"="-104";"value"="-5963"}; +{"key"="-517";"subkey"="-104";"value"="-5952"}; +{"key"="-516";"subkey"="-104";"value"="-5940"}; +{"key"="-515";"subkey"="-103";"value"="-5929"}; +{"key"="-514";"subkey"="-103";"value"="-5917"}; +{"key"="-513";"subkey"="-103";"value"="-5906"}; +{"key"="-512";"subkey"="-103";"value"="-5894"}; +{"key"="-511";"subkey"="-103";"value"="-5883"}; +{"key"="-510";"subkey"="-102";"value"="-5871"}; +{"key"="-509";"subkey"="-102";"value"="-5860"}; +{"key"="-508";"subkey"="-102";"value"="-5848"}; +{"key"="-507";"subkey"="-102";"value"="-5837"}; +{"key"="-506";"subkey"="-102";"value"="-5825"}; +{"key"="-505";"subkey"="-101";"value"="-5814"}; +{"key"="-504";"subkey"="-101";"value"="-5802"}; +{"key"="-503";"subkey"="-101";"value"="-5791"}; +{"key"="-502";"subkey"="-101";"value"="-5779"}; +{"key"="-501";"subkey"="-101";"value"="-5767"}; +{"key"="-500";"subkey"="-100";"value"="-5756"}; +{"key"="-499";"subkey"="-100";"value"="-5744"}; +{"key"="-498";"subkey"="-100";"value"="-5733"}; +{"key"="-497";"subkey"="-100";"value"="-5721"}; +{"key"="-496";"subkey"="-100";"value"="-5710"}; +{"key"="-495";"subkey"="-99";"value"="-5698"}; +{"key"="-494";"subkey"="-99";"value"="-5687"}; +{"key"="-493";"subkey"="-99";"value"="-5675"}; +{"key"="-492";"subkey"="-99";"value"="-5664"}; +{"key"="-491";"subkey"="-99";"value"="-5652"}; +{"key"="-490";"subkey"="-98";"value"="-5641"}; +{"key"="-489";"subkey"="-98";"value"="-5629"}; +{"key"="-488";"subkey"="-98";"value"="-5618"}; +{"key"="-487";"subkey"="-98";"value"="-5606"}; +{"key"="-486";"subkey"="-98";"value"="-5595"}; +{"key"="-485";"subkey"="-97";"value"="-5583"}; +{"key"="-484";"subkey"="-97";"value"="-5572"}; +{"key"="-483";"subkey"="-97";"value"="-5560"}; +{"key"="-482";"subkey"="-97";"value"="-5549"}; +{"key"="-481";"subkey"="-97";"value"="-5537"}; +{"key"="-480";"subkey"="-96";"value"="-5526"}; +{"key"="-479";"subkey"="-96";"value"="-5514"}; +{"key"="-478";"subkey"="-96";"value"="-5503"}; +{"key"="-477";"subkey"="-96";"value"="-5491"}; +{"key"="-476";"subkey"="-96";"value"="-5480"}; +{"key"="-475";"subkey"="-95";"value"="-5468"}; +{"key"="-474";"subkey"="-95";"value"="-5457"}; +{"key"="-473";"subkey"="-95";"value"="-5445"}; +{"key"="-472";"subkey"="-95";"value"="-5434"}; +{"key"="-471";"subkey"="-95";"value"="-5422"}; +{"key"="-470";"subkey"="-94";"value"="-5411"}; +{"key"="-469";"subkey"="-94";"value"="-5399"}; +{"key"="-468";"subkey"="-94";"value"="-5388"}; +{"key"="-467";"subkey"="-94";"value"="-5376"}; +{"key"="-466";"subkey"="-94";"value"="-5365"}; +{"key"="-465";"subkey"="-93";"value"="-5353"}; +{"key"="-464";"subkey"="-93";"value"="-5341"}; +{"key"="-463";"subkey"="-93";"value"="-5330"}; +{"key"="-462";"subkey"="-93";"value"="-5318"}; +{"key"="-461";"subkey"="-93";"value"="-5307"}; +{"key"="-460";"subkey"="-92";"value"="-5295"}; +{"key"="-459";"subkey"="-92";"value"="-5284"}; +{"key"="-458";"subkey"="-92";"value"="-5272"}; +{"key"="-457";"subkey"="-92";"value"="-5261"}; +{"key"="-456";"subkey"="-92";"value"="-5249"}; +{"key"="-455";"subkey"="-91";"value"="-5238"}; +{"key"="-454";"subkey"="-91";"value"="-5226"}; +{"key"="-453";"subkey"="-91";"value"="-5215"}; +{"key"="-452";"subkey"="-91";"value"="-5203"}; +{"key"="-451";"subkey"="-91";"value"="-5192"}; +{"key"="-450";"subkey"="-90";"value"="-5180"}; +{"key"="-449";"subkey"="-90";"value"="-5169"}; +{"key"="-448";"subkey"="-90";"value"="-5157"}; +{"key"="-447";"subkey"="-90";"value"="-5146"}; +{"key"="-446";"subkey"="-90";"value"="-5134"}; +{"key"="-445";"subkey"="-89";"value"="-5123"}; +{"key"="-444";"subkey"="-89";"value"="-5111"}; +{"key"="-443";"subkey"="-89";"value"="-5100"}; +{"key"="-442";"subkey"="-89";"value"="-5088"}; +{"key"="-441";"subkey"="-89";"value"="-5077"}; +{"key"="-440";"subkey"="-88";"value"="-5065"}; +{"key"="-439";"subkey"="-88";"value"="-5054"}; +{"key"="-438";"subkey"="-88";"value"="-5042"}; +{"key"="-437";"subkey"="-88";"value"="-5031"}; +{"key"="-436";"subkey"="-88";"value"="-5019"}; +{"key"="-435";"subkey"="-87";"value"="-5008"}; +{"key"="-434";"subkey"="-87";"value"="-4996"}; +{"key"="-433";"subkey"="-87";"value"="-4985"}; +{"key"="-432";"subkey"="-87";"value"="-4973"}; +{"key"="-431";"subkey"="-87";"value"="-4962"}; +{"key"="-430";"subkey"="-86";"value"="-4950"}; +{"key"="-429";"subkey"="-86";"value"="-4939"}; +{"key"="-428";"subkey"="-86";"value"="-4927"}; +{"key"="-427";"subkey"="-86";"value"="-4916"}; +{"key"="-426";"subkey"="-86";"value"="-4904"}; +{"key"="-425";"subkey"="-85";"value"="-4892"}; +{"key"="-424";"subkey"="-85";"value"="-4881"}; +{"key"="-423";"subkey"="-85";"value"="-4869"}; +{"key"="-422";"subkey"="-85";"value"="-4858"}; +{"key"="-421";"subkey"="-85";"value"="-4846"}; +{"key"="-420";"subkey"="-84";"value"="-4835"}; +{"key"="-419";"subkey"="-84";"value"="-4823"}; +{"key"="-418";"subkey"="-84";"value"="-4812"}; +{"key"="-417";"subkey"="-84";"value"="-4800"}; +{"key"="-416";"subkey"="-84";"value"="-4789"}; +{"key"="-415";"subkey"="-83";"value"="-4777"}; +{"key"="-414";"subkey"="-83";"value"="-4766"}; +{"key"="-413";"subkey"="-83";"value"="-4754"}; +{"key"="-412";"subkey"="-83";"value"="-4743"}; +{"key"="-411";"subkey"="-83";"value"="-4731"}; +{"key"="-410";"subkey"="-82";"value"="-4720"}; +{"key"="-409";"subkey"="-82";"value"="-4708"}; +{"key"="-408";"subkey"="-82";"value"="-4697"}; +{"key"="-407";"subkey"="-82";"value"="-4685"}; +{"key"="-406";"subkey"="-82";"value"="-4674"}; +{"key"="-405";"subkey"="-81";"value"="-4662"}; +{"key"="-404";"subkey"="-81";"value"="-4651"}; +{"key"="-403";"subkey"="-81";"value"="-4639"}; +{"key"="-402";"subkey"="-81";"value"="-4628"}; +{"key"="-401";"subkey"="-81";"value"="-4616"}; +{"key"="-400";"subkey"="-80";"value"="-4605"}; +{"key"="-399";"subkey"="-80";"value"="-4593"}; +{"key"="-398";"subkey"="-80";"value"="-4582"}; +{"key"="-397";"subkey"="-80";"value"="-4570"}; +{"key"="-396";"subkey"="-80";"value"="-4559"}; +{"key"="-395";"subkey"="-79";"value"="-4547"}; +{"key"="-394";"subkey"="-79";"value"="-4536"}; +{"key"="-393";"subkey"="-79";"value"="-4524"}; +{"key"="-392";"subkey"="-79";"value"="-4513"}; +{"key"="-391";"subkey"="-79";"value"="-4501"}; +{"key"="-390";"subkey"="-78";"value"="-4490"}; +{"key"="-389";"subkey"="-78";"value"="-4478"}; +{"key"="-388";"subkey"="-78";"value"="-4467"}; +{"key"="-387";"subkey"="-78";"value"="-4455"}; +{"key"="-386";"subkey"="-78";"value"="-4443"}; +{"key"="-385";"subkey"="-77";"value"="-4432"}; +{"key"="-384";"subkey"="-77";"value"="-4420"}; +{"key"="-383";"subkey"="-77";"value"="-4409"}; +{"key"="-382";"subkey"="-77";"value"="-4397"}; +{"key"="-381";"subkey"="-77";"value"="-4386"}; +{"key"="-380";"subkey"="-76";"value"="-4374"}; +{"key"="-379";"subkey"="-76";"value"="-4363"}; +{"key"="-378";"subkey"="-76";"value"="-4351"}; +{"key"="-377";"subkey"="-76";"value"="-4340"}; +{"key"="-376";"subkey"="-76";"value"="-4328"}; +{"key"="-375";"subkey"="-75";"value"="-4317"}; +{"key"="-374";"subkey"="-75";"value"="-4305"}; +{"key"="-373";"subkey"="-75";"value"="-4294"}; +{"key"="-372";"subkey"="-75";"value"="-4282"}; +{"key"="-371";"subkey"="-75";"value"="-4271"}; +{"key"="-370";"subkey"="-74";"value"="-4259"}; +{"key"="-369";"subkey"="-74";"value"="-4248"}; +{"key"="-368";"subkey"="-74";"value"="-4236"}; +{"key"="-367";"subkey"="-74";"value"="-4225"}; +{"key"="-366";"subkey"="-74";"value"="-4213"}; +{"key"="-365";"subkey"="-73";"value"="-4202"}; +{"key"="-364";"subkey"="-73";"value"="-4190"}; +{"key"="-363";"subkey"="-73";"value"="-4179"}; +{"key"="-362";"subkey"="-73";"value"="-4167"}; +{"key"="-361";"subkey"="-73";"value"="-4156"}; +{"key"="-360";"subkey"="-72";"value"="-4144"}; +{"key"="-359";"subkey"="-72";"value"="-4133"}; +{"key"="-358";"subkey"="-72";"value"="-4121"}; +{"key"="-357";"subkey"="-72";"value"="-4110"}; +{"key"="-356";"subkey"="-72";"value"="-4098"}; +{"key"="-355";"subkey"="-71";"value"="-4087"}; +{"key"="-354";"subkey"="-71";"value"="-4075"}; +{"key"="-353";"subkey"="-71";"value"="-4064"}; +{"key"="-352";"subkey"="-71";"value"="-4052"}; +{"key"="-351";"subkey"="-71";"value"="-4041"}; +{"key"="-350";"subkey"="-70";"value"="-4029"}; +{"key"="-349";"subkey"="-70";"value"="-4018"}; +{"key"="-348";"subkey"="-70";"value"="-4006"}; +{"key"="-347";"subkey"="-70";"value"="-3994"}; +{"key"="-346";"subkey"="-70";"value"="-3983"}; +{"key"="-345";"subkey"="-69";"value"="-3971"}; +{"key"="-344";"subkey"="-69";"value"="-3960"}; +{"key"="-343";"subkey"="-69";"value"="-3948"}; +{"key"="-342";"subkey"="-69";"value"="-3937"}; +{"key"="-341";"subkey"="-69";"value"="-3925"}; +{"key"="-340";"subkey"="-68";"value"="-3914"}; +{"key"="-339";"subkey"="-68";"value"="-3902"}; +{"key"="-338";"subkey"="-68";"value"="-3891"}; +{"key"="-337";"subkey"="-68";"value"="-3879"}; +{"key"="-336";"subkey"="-68";"value"="-3868"}; +{"key"="-335";"subkey"="-67";"value"="-3856"}; +{"key"="-334";"subkey"="-67";"value"="-3845"}; +{"key"="-333";"subkey"="-67";"value"="-3833"}; +{"key"="-332";"subkey"="-67";"value"="-3822"}; +{"key"="-331";"subkey"="-67";"value"="-3810"}; +{"key"="-330";"subkey"="-66";"value"="-3799"}; +{"key"="-329";"subkey"="-66";"value"="-3787"}; +{"key"="-328";"subkey"="-66";"value"="-3776"}; +{"key"="-327";"subkey"="-66";"value"="-3764"}; +{"key"="-326";"subkey"="-66";"value"="-3753"}; +{"key"="-325";"subkey"="-65";"value"="-3741"}; +{"key"="-324";"subkey"="-65";"value"="-3730"}; +{"key"="-323";"subkey"="-65";"value"="-3718"}; +{"key"="-322";"subkey"="-65";"value"="-3707"}; +{"key"="-321";"subkey"="-65";"value"="-3695"}; +{"key"="-320";"subkey"="-64";"value"="-3684"}; +{"key"="-319";"subkey"="-64";"value"="-3672"}; +{"key"="-318";"subkey"="-64";"value"="-3661"}; +{"key"="-317";"subkey"="-64";"value"="-3649"}; +{"key"="-316";"subkey"="-64";"value"="-3638"}; +{"key"="-315";"subkey"="-63";"value"="-3626"}; +{"key"="-314";"subkey"="-63";"value"="-3615"}; +{"key"="-313";"subkey"="-63";"value"="-3603"}; +{"key"="-312";"subkey"="-63";"value"="-3592"}; +{"key"="-311";"subkey"="-63";"value"="-3580"}; +{"key"="-310";"subkey"="-62";"value"="-3569"}; +{"key"="-309";"subkey"="-62";"value"="-3557"}; +{"key"="-308";"subkey"="-62";"value"="-3545"}; +{"key"="-307";"subkey"="-62";"value"="-3534"}; +{"key"="-306";"subkey"="-62";"value"="-3522"}; +{"key"="-305";"subkey"="-61";"value"="-3511"}; +{"key"="-304";"subkey"="-61";"value"="-3499"}; +{"key"="-303";"subkey"="-61";"value"="-3488"}; +{"key"="-302";"subkey"="-61";"value"="-3476"}; +{"key"="-301";"subkey"="-61";"value"="-3465"}; +{"key"="-300";"subkey"="-60";"value"="-3453"}; +{"key"="-299";"subkey"="-60";"value"="-3442"}; +{"key"="-298";"subkey"="-60";"value"="-3430"}; +{"key"="-297";"subkey"="-60";"value"="-3419"}; +{"key"="-296";"subkey"="-60";"value"="-3407"}; +{"key"="-295";"subkey"="-59";"value"="-3396"}; +{"key"="-294";"subkey"="-59";"value"="-3384"}; +{"key"="-293";"subkey"="-59";"value"="-3373"}; +{"key"="-292";"subkey"="-59";"value"="-3361"}; +{"key"="-291";"subkey"="-59";"value"="-3350"}; +{"key"="-290";"subkey"="-58";"value"="-3338"}; +{"key"="-289";"subkey"="-58";"value"="-3327"}; +{"key"="-288";"subkey"="-58";"value"="-3315"}; +{"key"="-287";"subkey"="-58";"value"="-3304"}; +{"key"="-286";"subkey"="-58";"value"="-3292"}; +{"key"="-285";"subkey"="-57";"value"="-3281"}; +{"key"="-284";"subkey"="-57";"value"="-3269"}; +{"key"="-283";"subkey"="-57";"value"="-3258"}; +{"key"="-282";"subkey"="-57";"value"="-3246"}; +{"key"="-281";"subkey"="-57";"value"="-3235"}; +{"key"="-280";"subkey"="-56";"value"="-3223"}; +{"key"="-279";"subkey"="-56";"value"="-3212"}; +{"key"="-278";"subkey"="-56";"value"="-3200"}; +{"key"="-277";"subkey"="-56";"value"="-3189"}; +{"key"="-276";"subkey"="-56";"value"="-3177"}; +{"key"="-275";"subkey"="-55";"value"="-3166"}; +{"key"="-274";"subkey"="-55";"value"="-3154"}; +{"key"="-273";"subkey"="-55";"value"="-3143"}; +{"key"="-272";"subkey"="-55";"value"="-3131"}; +{"key"="-271";"subkey"="-55";"value"="-3120"}; +{"key"="-270";"subkey"="-54";"value"="-3108"}; +{"key"="-269";"subkey"="-54";"value"="-3096"}; +{"key"="-268";"subkey"="-54";"value"="-3085"}; +{"key"="-267";"subkey"="-54";"value"="-3073"}; +{"key"="-266";"subkey"="-54";"value"="-3062"}; +{"key"="-265";"subkey"="-53";"value"="-3050"}; +{"key"="-264";"subkey"="-53";"value"="-3039"}; +{"key"="-263";"subkey"="-53";"value"="-3027"}; +{"key"="-262";"subkey"="-53";"value"="-3016"}; +{"key"="-261";"subkey"="-53";"value"="-3004"}; +{"key"="-260";"subkey"="-52";"value"="-2993"}; +{"key"="-259";"subkey"="-52";"value"="-2981"}; +{"key"="-258";"subkey"="-52";"value"="-2970"}; +{"key"="-257";"subkey"="-52";"value"="-2958"}; +{"key"="-256";"subkey"="-52";"value"="-2947"}; +{"key"="-255";"subkey"="-51";"value"="-2935"}; +{"key"="-254";"subkey"="-51";"value"="-2924"}; +{"key"="-253";"subkey"="-51";"value"="-2912"}; +{"key"="-252";"subkey"="-51";"value"="-2901"}; +{"key"="-251";"subkey"="-51";"value"="-2889"}; +{"key"="-250";"subkey"="-50";"value"="-2878"}; +{"key"="-249";"subkey"="-50";"value"="-2866"}; +{"key"="-248";"subkey"="-50";"value"="-2855"}; +{"key"="-247";"subkey"="-50";"value"="-2843"}; +{"key"="-246";"subkey"="-50";"value"="-2832"}; +{"key"="-245";"subkey"="-49";"value"="-2820"}; +{"key"="-244";"subkey"="-49";"value"="-2809"}; +{"key"="-243";"subkey"="-49";"value"="-2797"}; +{"key"="-242";"subkey"="-49";"value"="-2786"}; +{"key"="-241";"subkey"="-49";"value"="-2774"}; +{"key"="-240";"subkey"="-48";"value"="-2763"}; +{"key"="-239";"subkey"="-48";"value"="-2751"}; +{"key"="-238";"subkey"="-48";"value"="-2740"}; +{"key"="-237";"subkey"="-48";"value"="-2728"}; +{"key"="-236";"subkey"="-48";"value"="-2717"}; +{"key"="-235";"subkey"="-47";"value"="-2705"}; +{"key"="-234";"subkey"="-47";"value"="-2694"}; +{"key"="-233";"subkey"="-47";"value"="-2682"}; +{"key"="-232";"subkey"="-47";"value"="-2670"}; +{"key"="-231";"subkey"="-47";"value"="-2659"}; +{"key"="-230";"subkey"="-46";"value"="-2647"}; +{"key"="-229";"subkey"="-46";"value"="-2636"}; +{"key"="-228";"subkey"="-46";"value"="-2624"}; +{"key"="-227";"subkey"="-46";"value"="-2613"}; +{"key"="-226";"subkey"="-46";"value"="-2601"}; +{"key"="-225";"subkey"="-45";"value"="-2590"}; +{"key"="-224";"subkey"="-45";"value"="-2578"}; +{"key"="-223";"subkey"="-45";"value"="-2567"}; +{"key"="-222";"subkey"="-45";"value"="-2555"}; +{"key"="-221";"subkey"="-45";"value"="-2544"}; +{"key"="-220";"subkey"="-44";"value"="-2532"}; +{"key"="-219";"subkey"="-44";"value"="-2521"}; +{"key"="-218";"subkey"="-44";"value"="-2509"}; +{"key"="-217";"subkey"="-44";"value"="-2498"}; +{"key"="-216";"subkey"="-44";"value"="-2486"}; +{"key"="-215";"subkey"="-43";"value"="-2475"}; +{"key"="-214";"subkey"="-43";"value"="-2463"}; +{"key"="-213";"subkey"="-43";"value"="-2452"}; +{"key"="-212";"subkey"="-43";"value"="-2440"}; +{"key"="-211";"subkey"="-43";"value"="-2429"}; +{"key"="-210";"subkey"="-42";"value"="-2417"}; +{"key"="-209";"subkey"="-42";"value"="-2406"}; +{"key"="-208";"subkey"="-42";"value"="-2394"}; +{"key"="-207";"subkey"="-42";"value"="-2383"}; +{"key"="-206";"subkey"="-42";"value"="-2371"}; +{"key"="-205";"subkey"="-41";"value"="-2360"}; +{"key"="-204";"subkey"="-41";"value"="-2348"}; +{"key"="-203";"subkey"="-41";"value"="-2337"}; +{"key"="-202";"subkey"="-41";"value"="-2325"}; +{"key"="-201";"subkey"="-41";"value"="-2314"}; +{"key"="-200";"subkey"="-40";"value"="-2302"}; +{"key"="-199";"subkey"="-40";"value"="-2291"}; +{"key"="-198";"subkey"="-40";"value"="-2279"}; +{"key"="-197";"subkey"="-40";"value"="-2268"}; +{"key"="-196";"subkey"="-40";"value"="-2256"}; +{"key"="-195";"subkey"="-39";"value"="-2245"}; +{"key"="-194";"subkey"="-39";"value"="-2233"}; +{"key"="-193";"subkey"="-39";"value"="-2221"}; +{"key"="-192";"subkey"="-39";"value"="-2210"}; +{"key"="-191";"subkey"="-39";"value"="-2198"}; +{"key"="-190";"subkey"="-38";"value"="-2187"}; +{"key"="-189";"subkey"="-38";"value"="-2175"}; +{"key"="-188";"subkey"="-38";"value"="-2164"}; +{"key"="-187";"subkey"="-38";"value"="-2152"}; +{"key"="-186";"subkey"="-38";"value"="-2141"}; +{"key"="-185";"subkey"="-37";"value"="-2129"}; +{"key"="-184";"subkey"="-37";"value"="-2118"}; +{"key"="-183";"subkey"="-37";"value"="-2106"}; +{"key"="-182";"subkey"="-37";"value"="-2095"}; +{"key"="-181";"subkey"="-37";"value"="-2083"}; +{"key"="-180";"subkey"="-36";"value"="-2072"}; +{"key"="-179";"subkey"="-36";"value"="-2060"}; +{"key"="-178";"subkey"="-36";"value"="-2049"}; +{"key"="-177";"subkey"="-36";"value"="-2037"}; +{"key"="-176";"subkey"="-36";"value"="-2026"}; +{"key"="-175";"subkey"="-35";"value"="-2014"}; +{"key"="-174";"subkey"="-35";"value"="-2003"}; +{"key"="-173";"subkey"="-35";"value"="-1991"}; +{"key"="-172";"subkey"="-35";"value"="-1980"}; +{"key"="-171";"subkey"="-35";"value"="-1968"}; +{"key"="-170";"subkey"="-34";"value"="-1957"}; +{"key"="-169";"subkey"="-34";"value"="-1945"}; +{"key"="-168";"subkey"="-34";"value"="-1934"}; +{"key"="-167";"subkey"="-34";"value"="-1922"}; +{"key"="-166";"subkey"="-34";"value"="-1911"}; +{"key"="-165";"subkey"="-33";"value"="-1899"}; +{"key"="-164";"subkey"="-33";"value"="-1888"}; +{"key"="-163";"subkey"="-33";"value"="-1876"}; +{"key"="-162";"subkey"="-33";"value"="-1865"}; +{"key"="-161";"subkey"="-33";"value"="-1853"}; +{"key"="-160";"subkey"="-32";"value"="-1842"}; +{"key"="-159";"subkey"="-32";"value"="-1830"}; +{"key"="-158";"subkey"="-32";"value"="-1819"}; +{"key"="-157";"subkey"="-32";"value"="-1807"}; +{"key"="-156";"subkey"="-32";"value"="-1796"}; +{"key"="-155";"subkey"="-31";"value"="-1784"}; +{"key"="-154";"subkey"="-31";"value"="-1772"}; +{"key"="-153";"subkey"="-31";"value"="-1761"}; +{"key"="-152";"subkey"="-31";"value"="-1749"}; +{"key"="-151";"subkey"="-31";"value"="-1738"}; +{"key"="-150";"subkey"="-30";"value"="-1726"}; +{"key"="-149";"subkey"="-30";"value"="-1715"}; +{"key"="-148";"subkey"="-30";"value"="-1703"}; +{"key"="-147";"subkey"="-30";"value"="-1692"}; +{"key"="-146";"subkey"="-30";"value"="-1680"}; +{"key"="-145";"subkey"="-29";"value"="-1669"}; +{"key"="-144";"subkey"="-29";"value"="-1657"}; +{"key"="-143";"subkey"="-29";"value"="-1646"}; +{"key"="-142";"subkey"="-29";"value"="-1634"}; +{"key"="-141";"subkey"="-29";"value"="-1623"}; +{"key"="-140";"subkey"="-28";"value"="-1611"}; +{"key"="-139";"subkey"="-28";"value"="-1600"}; +{"key"="-138";"subkey"="-28";"value"="-1588"}; +{"key"="-137";"subkey"="-28";"value"="-1577"}; +{"key"="-136";"subkey"="-28";"value"="-1565"}; +{"key"="-135";"subkey"="-27";"value"="-1554"}; +{"key"="-134";"subkey"="-27";"value"="-1542"}; +{"key"="-133";"subkey"="-27";"value"="-1531"}; +{"key"="-132";"subkey"="-27";"value"="-1519"}; +{"key"="-131";"subkey"="-27";"value"="-1508"}; +{"key"="-130";"subkey"="-26";"value"="-1496"}; +{"key"="-129";"subkey"="-26";"value"="-1485"}; +{"key"="-128";"subkey"="-26";"value"="-1473"}; +{"key"="-127";"subkey"="-26";"value"="-1462"}; +{"key"="-126";"subkey"="-26";"value"="-1450"}; +{"key"="-125";"subkey"="-25";"value"="-1439"}; +{"key"="-124";"subkey"="-25";"value"="-1427"}; +{"key"="-123";"subkey"="-25";"value"="-1416"}; +{"key"="-122";"subkey"="-25";"value"="-1404"}; +{"key"="-121";"subkey"="-25";"value"="-1393"}; +{"key"="-120";"subkey"="-24";"value"="-1381"}; +{"key"="-119";"subkey"="-24";"value"="-1370"}; +{"key"="-118";"subkey"="-24";"value"="-1358"}; +{"key"="-117";"subkey"="-24";"value"="-1347"}; +{"key"="-116";"subkey"="-24";"value"="-1335"}; +{"key"="-115";"subkey"="-23";"value"="-1323"}; +{"key"="-114";"subkey"="-23";"value"="-1312"}; +{"key"="-113";"subkey"="-23";"value"="-1300"}; +{"key"="-112";"subkey"="-23";"value"="-1289"}; +{"key"="-111";"subkey"="-23";"value"="-1277"}; +{"key"="-110";"subkey"="-22";"value"="-1266"}; +{"key"="-109";"subkey"="-22";"value"="-1254"}; +{"key"="-108";"subkey"="-22";"value"="-1243"}; +{"key"="-107";"subkey"="-22";"value"="-1231"}; +{"key"="-106";"subkey"="-22";"value"="-1220"}; +{"key"="-105";"subkey"="-21";"value"="-1208"}; +{"key"="-104";"subkey"="-21";"value"="-1197"}; +{"key"="-103";"subkey"="-21";"value"="-1185"}; +{"key"="-102";"subkey"="-21";"value"="-1174"}; +{"key"="-101";"subkey"="-21";"value"="-1162"}; +{"key"="-100";"subkey"="-20";"value"="-1151"}; +{"key"="-99";"subkey"="-20";"value"="-1139"}; +{"key"="-98";"subkey"="-20";"value"="-1128"}; +{"key"="-97";"subkey"="-20";"value"="-1116"}; +{"key"="-96";"subkey"="-20";"value"="-1105"}; +{"key"="-95";"subkey"="-19";"value"="-1093"}; +{"key"="-94";"subkey"="-19";"value"="-1082"}; +{"key"="-93";"subkey"="-19";"value"="-1070"}; +{"key"="-92";"subkey"="-19";"value"="-1059"}; +{"key"="-91";"subkey"="-19";"value"="-1047"}; +{"key"="-90";"subkey"="-18";"value"="-1036"}; +{"key"="-89";"subkey"="-18";"value"="-1024"}; +{"key"="-88";"subkey"="-18";"value"="-1013"}; +{"key"="-87";"subkey"="-18";"value"="-1001"}; +{"key"="-86";"subkey"="-18";"value"="-990"}; +{"key"="-85";"subkey"="-17";"value"="-978"}; +{"key"="-84";"subkey"="-17";"value"="-967"}; +{"key"="-83";"subkey"="-17";"value"="-955"}; +{"key"="-82";"subkey"="-17";"value"="-944"}; +{"key"="-81";"subkey"="-17";"value"="-932"}; +{"key"="-80";"subkey"="-16";"value"="-921"}; +{"key"="-79";"subkey"="-16";"value"="-909"}; +{"key"="-78";"subkey"="-16";"value"="-898"}; +{"key"="-77";"subkey"="-16";"value"="-886"}; +{"key"="-76";"subkey"="-16";"value"="-874"}; +{"key"="-75";"subkey"="-15";"value"="-863"}; +{"key"="-74";"subkey"="-15";"value"="-851"}; +{"key"="-73";"subkey"="-15";"value"="-840"}; +{"key"="-72";"subkey"="-15";"value"="-828"}; +{"key"="-71";"subkey"="-15";"value"="-817"}; +{"key"="-70";"subkey"="-14";"value"="-805"}; +{"key"="-69";"subkey"="-14";"value"="-794"}; +{"key"="-68";"subkey"="-14";"value"="-782"}; +{"key"="-67";"subkey"="-14";"value"="-771"}; +{"key"="-66";"subkey"="-14";"value"="-759"}; +{"key"="-65";"subkey"="-13";"value"="-748"}; +{"key"="-64";"subkey"="-13";"value"="-736"}; +{"key"="-63";"subkey"="-13";"value"="-725"}; +{"key"="-62";"subkey"="-13";"value"="-713"}; +{"key"="-61";"subkey"="-13";"value"="-702"}; +{"key"="-60";"subkey"="-12";"value"="-690"}; +{"key"="-59";"subkey"="-12";"value"="-679"}; +{"key"="-58";"subkey"="-12";"value"="-667"}; +{"key"="-57";"subkey"="-12";"value"="-656"}; +{"key"="-56";"subkey"="-12";"value"="-644"}; +{"key"="-55";"subkey"="-11";"value"="-633"}; +{"key"="-54";"subkey"="-11";"value"="-621"}; +{"key"="-53";"subkey"="-11";"value"="-610"}; +{"key"="-52";"subkey"="-11";"value"="-598"}; +{"key"="-51";"subkey"="-11";"value"="-587"}; +{"key"="-50";"subkey"="-10";"value"="-575"}; +{"key"="-49";"subkey"="-10";"value"="-564"}; +{"key"="-48";"subkey"="-10";"value"="-552"}; +{"key"="-47";"subkey"="-10";"value"="-541"}; +{"key"="-46";"subkey"="-10";"value"="-529"}; +{"key"="-45";"subkey"="-9";"value"="-518"}; +{"key"="-44";"subkey"="-9";"value"="-506"}; +{"key"="-43";"subkey"="-9";"value"="-495"}; +{"key"="-42";"subkey"="-9";"value"="-483"}; +{"key"="-41";"subkey"="-9";"value"="-472"}; +{"key"="-40";"subkey"="-8";"value"="-460"}; +{"key"="-39";"subkey"="-8";"value"="-449"}; +{"key"="-38";"subkey"="-8";"value"="-437"}; +{"key"="-37";"subkey"="-8";"value"="-425"}; +{"key"="-36";"subkey"="-8";"value"="-414"}; +{"key"="-35";"subkey"="-7";"value"="-402"}; +{"key"="-34";"subkey"="-7";"value"="-391"}; +{"key"="-33";"subkey"="-7";"value"="-379"}; +{"key"="-32";"subkey"="-7";"value"="-368"}; +{"key"="-31";"subkey"="-7";"value"="-356"}; +{"key"="-30";"subkey"="-6";"value"="-345"}; +{"key"="-29";"subkey"="-6";"value"="-333"}; +{"key"="-28";"subkey"="-6";"value"="-322"}; +{"key"="-27";"subkey"="-6";"value"="-310"}; +{"key"="-26";"subkey"="-6";"value"="-299"}; +{"key"="-25";"subkey"="-5";"value"="-287"}; +{"key"="-24";"subkey"="-5";"value"="-276"}; +{"key"="-23";"subkey"="-5";"value"="-264"}; +{"key"="-22";"subkey"="-5";"value"="-253"}; +{"key"="-21";"subkey"="-5";"value"="-241"}; +{"key"="-20";"subkey"="-4";"value"="-230"}; +{"key"="-19";"subkey"="-4";"value"="-218"}; +{"key"="-18";"subkey"="-4";"value"="-207"}; +{"key"="-17";"subkey"="-4";"value"="-195"}; +{"key"="-16";"subkey"="-4";"value"="-184"}; +{"key"="-15";"subkey"="-3";"value"="-172"}; +{"key"="-14";"subkey"="-3";"value"="-161"}; +{"key"="-13";"subkey"="-3";"value"="-149"}; +{"key"="-12";"subkey"="-3";"value"="-138"}; +{"key"="-11";"subkey"="-3";"value"="-126"}; +{"key"="-10";"subkey"="-2";"value"="-115"}; +{"key"="-9";"subkey"="-2";"value"="-103"}; +{"key"="-8";"subkey"="-2";"value"="-92"}; +{"key"="-7";"subkey"="-2";"value"="-80"}; +{"key"="-6";"subkey"="-2";"value"="-69"}; +{"key"="-5";"subkey"="-1";"value"="-57"}; +{"key"="-4";"subkey"="-1";"value"="-46"}; +{"key"="-3";"subkey"="-1";"value"="-34"}; +{"key"="-2";"subkey"="-1";"value"="-23"}; +{"key"="-1";"subkey"="-1";"value"="-11"}; +{"key"="0";"subkey"="0";"value"="0"}; +{"key"="1";"subkey"="0";"value"="11"}; +{"key"="2";"subkey"="0";"value"="23"}; +{"key"="3";"subkey"="0";"value"="34"}; +{"key"="4";"subkey"="0";"value"="46"}; +{"key"="5";"subkey"="1";"value"="57"}; +{"key"="6";"subkey"="1";"value"="69"}; +{"key"="7";"subkey"="1";"value"="80"}; +{"key"="8";"subkey"="1";"value"="92"}; +{"key"="9";"subkey"="1";"value"="103"}; +{"key"="10";"subkey"="2";"value"="115"}; +{"key"="11";"subkey"="2";"value"="126"}; +{"key"="12";"subkey"="2";"value"="138"}; +{"key"="13";"subkey"="2";"value"="149"}; +{"key"="14";"subkey"="2";"value"="161"}; +{"key"="15";"subkey"="3";"value"="172"}; +{"key"="16";"subkey"="3";"value"="184"}; +{"key"="17";"subkey"="3";"value"="195"}; +{"key"="18";"subkey"="3";"value"="207"}; +{"key"="19";"subkey"="3";"value"="218"}; +{"key"="20";"subkey"="4";"value"="230"}; +{"key"="21";"subkey"="4";"value"="241"}; +{"key"="22";"subkey"="4";"value"="253"}; +{"key"="23";"subkey"="4";"value"="264"}; +{"key"="24";"subkey"="4";"value"="276"}; +{"key"="25";"subkey"="5";"value"="287"}; +{"key"="26";"subkey"="5";"value"="299"}; +{"key"="27";"subkey"="5";"value"="310"}; +{"key"="28";"subkey"="5";"value"="322"}; +{"key"="29";"subkey"="5";"value"="333"}; +{"key"="30";"subkey"="6";"value"="345"}; +{"key"="31";"subkey"="6";"value"="356"}; +{"key"="32";"subkey"="6";"value"="368"}; +{"key"="33";"subkey"="6";"value"="379"}; +{"key"="34";"subkey"="6";"value"="391"}; +{"key"="35";"subkey"="7";"value"="402"}; +{"key"="36";"subkey"="7";"value"="414"}; +{"key"="37";"subkey"="7";"value"="425"}; +{"key"="38";"subkey"="7";"value"="437"}; +{"key"="39";"subkey"="7";"value"="449"}; +{"key"="40";"subkey"="8";"value"="460"}; +{"key"="41";"subkey"="8";"value"="472"}; +{"key"="42";"subkey"="8";"value"="483"}; +{"key"="43";"subkey"="8";"value"="495"}; +{"key"="44";"subkey"="8";"value"="506"}; +{"key"="45";"subkey"="9";"value"="518"}; +{"key"="46";"subkey"="9";"value"="529"}; +{"key"="47";"subkey"="9";"value"="541"}; +{"key"="48";"subkey"="9";"value"="552"}; +{"key"="49";"subkey"="9";"value"="564"}; +{"key"="50";"subkey"="10";"value"="575"}; +{"key"="51";"subkey"="10";"value"="587"}; +{"key"="52";"subkey"="10";"value"="598"}; +{"key"="53";"subkey"="10";"value"="610"}; +{"key"="54";"subkey"="10";"value"="621"}; +{"key"="55";"subkey"="11";"value"="633"}; +{"key"="56";"subkey"="11";"value"="644"}; +{"key"="57";"subkey"="11";"value"="656"}; +{"key"="58";"subkey"="11";"value"="667"}; +{"key"="59";"subkey"="11";"value"="679"}; +{"key"="60";"subkey"="12";"value"="690"}; +{"key"="61";"subkey"="12";"value"="702"}; +{"key"="62";"subkey"="12";"value"="713"}; +{"key"="63";"subkey"="12";"value"="725"}; +{"key"="64";"subkey"="12";"value"="736"}; +{"key"="65";"subkey"="13";"value"="748"}; +{"key"="66";"subkey"="13";"value"="759"}; +{"key"="67";"subkey"="13";"value"="771"}; +{"key"="68";"subkey"="13";"value"="782"}; +{"key"="69";"subkey"="13";"value"="794"}; +{"key"="70";"subkey"="14";"value"="805"}; +{"key"="71";"subkey"="14";"value"="817"}; +{"key"="72";"subkey"="14";"value"="828"}; +{"key"="73";"subkey"="14";"value"="840"}; +{"key"="74";"subkey"="14";"value"="851"}; +{"key"="75";"subkey"="15";"value"="863"}; +{"key"="76";"subkey"="15";"value"="874"}; +{"key"="77";"subkey"="15";"value"="886"}; +{"key"="78";"subkey"="15";"value"="898"}; +{"key"="79";"subkey"="15";"value"="909"}; +{"key"="80";"subkey"="16";"value"="921"}; +{"key"="81";"subkey"="16";"value"="932"}; +{"key"="82";"subkey"="16";"value"="944"}; +{"key"="83";"subkey"="16";"value"="955"}; +{"key"="84";"subkey"="16";"value"="967"}; +{"key"="85";"subkey"="17";"value"="978"}; +{"key"="86";"subkey"="17";"value"="990"}; +{"key"="87";"subkey"="17";"value"="1001"}; +{"key"="88";"subkey"="17";"value"="1013"}; +{"key"="89";"subkey"="17";"value"="1024"}; +{"key"="90";"subkey"="18";"value"="1036"}; +{"key"="91";"subkey"="18";"value"="1047"}; +{"key"="92";"subkey"="18";"value"="1059"}; +{"key"="93";"subkey"="18";"value"="1070"}; +{"key"="94";"subkey"="18";"value"="1082"}; +{"key"="95";"subkey"="19";"value"="1093"}; +{"key"="96";"subkey"="19";"value"="1105"}; +{"key"="97";"subkey"="19";"value"="1116"}; +{"key"="98";"subkey"="19";"value"="1128"}; +{"key"="99";"subkey"="19";"value"="1139"}; +{"key"="100";"subkey"="20";"value"="460"}; +{"key"="101";"subkey"="20";"value"="466"}; +{"key"="102";"subkey"="20";"value"="471"}; +{"key"="103";"subkey"="20";"value"="477"}; +{"key"="104";"subkey"="20";"value"="483"}; +{"key"="105";"subkey"="21";"value"="488"}; +{"key"="106";"subkey"="21";"value"="494"}; +{"key"="107";"subkey"="21";"value"="499"}; +{"key"="108";"subkey"="21";"value"="505"}; +{"key"="109";"subkey"="21";"value"="511"}; +{"key"="110";"subkey"="22";"value"="517"}; +{"key"="111";"subkey"="22";"value"="522"}; +{"key"="112";"subkey"="22";"value"="528"}; +{"key"="113";"subkey"="22";"value"="534"}; +{"key"="114";"subkey"="22";"value"="539"}; +{"key"="115";"subkey"="23";"value"="545"}; +{"key"="116";"subkey"="23";"value"="551"}; +{"key"="117";"subkey"="23";"value"="557"}; +{"key"="118";"subkey"="23";"value"="562"}; +{"key"="119";"subkey"="23";"value"="568"}; +{"key"="120";"subkey"="24";"value"="574"}; +{"key"="121";"subkey"="24";"value"="580"}; +{"key"="122";"subkey"="24";"value"="586"}; +{"key"="123";"subkey"="24";"value"="591"}; +{"key"="124";"subkey"="24";"value"="597"}; +{"key"="125";"subkey"="25";"value"="603"}; +{"key"="126";"subkey"="25";"value"="609"}; +{"key"="127";"subkey"="25";"value"="615"}; +{"key"="128";"subkey"="25";"value"="621"}; +{"key"="129";"subkey"="25";"value"="626"}; +{"key"="130";"subkey"="26";"value"="632"}; +{"key"="131";"subkey"="26";"value"="638"}; +{"key"="132";"subkey"="26";"value"="644"}; +{"key"="133";"subkey"="26";"value"="650"}; +{"key"="134";"subkey"="26";"value"="656"}; +{"key"="135";"subkey"="27";"value"="662"}; +{"key"="136";"subkey"="27";"value"="668"}; +{"key"="137";"subkey"="27";"value"="674"}; +{"key"="138";"subkey"="27";"value"="679"}; +{"key"="139";"subkey"="27";"value"="685"}; +{"key"="140";"subkey"="28";"value"="691"}; +{"key"="141";"subkey"="28";"value"="697"}; +{"key"="142";"subkey"="28";"value"="703"}; +{"key"="143";"subkey"="28";"value"="709"}; +{"key"="144";"subkey"="28";"value"="715"}; +{"key"="145";"subkey"="29";"value"="721"}; +{"key"="146";"subkey"="29";"value"="727"}; +{"key"="147";"subkey"="29";"value"="733"}; +{"key"="148";"subkey"="29";"value"="739"}; +{"key"="149";"subkey"="29";"value"="745"}; +{"key"="150";"subkey"="30";"value"="751"}; +{"key"="151";"subkey"="30";"value"="757"}; +{"key"="152";"subkey"="30";"value"="763"}; +{"key"="153";"subkey"="30";"value"="769"}; +{"key"="154";"subkey"="30";"value"="775"}; +{"key"="155";"subkey"="31";"value"="781"}; +{"key"="156";"subkey"="31";"value"="787"}; +{"key"="157";"subkey"="31";"value"="793"}; +{"key"="158";"subkey"="31";"value"="799"}; +{"key"="159";"subkey"="31";"value"="805"}; +{"key"="160";"subkey"="32";"value"="812"}; +{"key"="161";"subkey"="32";"value"="818"}; +{"key"="162";"subkey"="32";"value"="824"}; +{"key"="163";"subkey"="32";"value"="830"}; +{"key"="164";"subkey"="32";"value"="836"}; +{"key"="165";"subkey"="33";"value"="842"}; +{"key"="166";"subkey"="33";"value"="848"}; +{"key"="167";"subkey"="33";"value"="854"}; +{"key"="168";"subkey"="33";"value"="860"}; +{"key"="169";"subkey"="33";"value"="866"}; +{"key"="170";"subkey"="34";"value"="873"}; +{"key"="171";"subkey"="34";"value"="879"}; +{"key"="172";"subkey"="34";"value"="885"}; +{"key"="173";"subkey"="34";"value"="891"}; +{"key"="174";"subkey"="34";"value"="897"}; +{"key"="175";"subkey"="35";"value"="903"}; +{"key"="176";"subkey"="35";"value"="910"}; +{"key"="177";"subkey"="35";"value"="916"}; +{"key"="178";"subkey"="35";"value"="922"}; +{"key"="179";"subkey"="35";"value"="928"}; +{"key"="180";"subkey"="36";"value"="934"}; +{"key"="181";"subkey"="36";"value"="940"}; +{"key"="182";"subkey"="36";"value"="947"}; +{"key"="183";"subkey"="36";"value"="953"}; +{"key"="184";"subkey"="36";"value"="959"}; +{"key"="185";"subkey"="37";"value"="965"}; +{"key"="186";"subkey"="37";"value"="971"}; +{"key"="187";"subkey"="37";"value"="978"}; +{"key"="188";"subkey"="37";"value"="984"}; +{"key"="189";"subkey"="37";"value"="990"}; +{"key"="190";"subkey"="38";"value"="996"}; +{"key"="191";"subkey"="38";"value"="1003"}; +{"key"="192";"subkey"="38";"value"="1009"}; +{"key"="193";"subkey"="38";"value"="1015"}; +{"key"="194";"subkey"="38";"value"="1021"}; +{"key"="195";"subkey"="39";"value"="1028"}; +{"key"="196";"subkey"="39";"value"="1034"}; +{"key"="197";"subkey"="39";"value"="1040"}; +{"key"="198";"subkey"="39";"value"="1047"}; +{"key"="199";"subkey"="39";"value"="1053"}; +{"key"="200";"subkey"="40";"value"="1059"}; +{"key"="201";"subkey"="40";"value"="1065"}; +{"key"="202";"subkey"="40";"value"="1072"}; +{"key"="203";"subkey"="40";"value"="1078"}; +{"key"="204";"subkey"="40";"value"="1084"}; +{"key"="205";"subkey"="41";"value"="1091"}; +{"key"="206";"subkey"="41";"value"="1097"}; +{"key"="207";"subkey"="41";"value"="1103"}; +{"key"="208";"subkey"="41";"value"="1110"}; +{"key"="209";"subkey"="41";"value"="1116"}; +{"key"="210";"subkey"="42";"value"="1122"}; +{"key"="211";"subkey"="42";"value"="1129"}; +{"key"="212";"subkey"="42";"value"="1135"}; +{"key"="213";"subkey"="42";"value"="1141"}; +{"key"="214";"subkey"="42";"value"="1148"}; +{"key"="215";"subkey"="43";"value"="1154"}; +{"key"="216";"subkey"="43";"value"="1161"}; +{"key"="217";"subkey"="43";"value"="1167"}; +{"key"="218";"subkey"="43";"value"="1173"}; +{"key"="219";"subkey"="43";"value"="1180"}; +{"key"="220";"subkey"="44";"value"="1186"}; +{"key"="221";"subkey"="44";"value"="1192"}; +{"key"="222";"subkey"="44";"value"="1199"}; +{"key"="223";"subkey"="44";"value"="1205"}; +{"key"="224";"subkey"="44";"value"="1212"}; +{"key"="225";"subkey"="45";"value"="1218"}; +{"key"="226";"subkey"="45";"value"="1225"}; +{"key"="227";"subkey"="45";"value"="1231"}; +{"key"="228";"subkey"="45";"value"="1237"}; +{"key"="229";"subkey"="45";"value"="1244"}; +{"key"="230";"subkey"="46";"value"="1250"}; +{"key"="231";"subkey"="46";"value"="1257"}; +{"key"="232";"subkey"="46";"value"="1263"}; +{"key"="233";"subkey"="46";"value"="1270"}; +{"key"="234";"subkey"="46";"value"="1276"}; +{"key"="235";"subkey"="47";"value"="1283"}; +{"key"="236";"subkey"="47";"value"="1289"}; +{"key"="237";"subkey"="47";"value"="1295"}; +{"key"="238";"subkey"="47";"value"="1302"}; +{"key"="239";"subkey"="47";"value"="1308"}; +{"key"="240";"subkey"="48";"value"="1315"}; +{"key"="241";"subkey"="48";"value"="1321"}; +{"key"="242";"subkey"="48";"value"="1328"}; +{"key"="243";"subkey"="48";"value"="1334"}; +{"key"="244";"subkey"="48";"value"="1341"}; +{"key"="245";"subkey"="49";"value"="1347"}; +{"key"="246";"subkey"="49";"value"="1354"}; +{"key"="247";"subkey"="49";"value"="1360"}; +{"key"="248";"subkey"="49";"value"="1367"}; +{"key"="249";"subkey"="49";"value"="1373"}; +{"key"="250";"subkey"="50";"value"="1380"}; +{"key"="251";"subkey"="50";"value"="1386"}; +{"key"="252";"subkey"="50";"value"="1393"}; +{"key"="253";"subkey"="50";"value"="1399"}; +{"key"="254";"subkey"="50";"value"="1406"}; +{"key"="255";"subkey"="51";"value"="1413"}; +{"key"="256";"subkey"="51";"value"="1419"}; +{"key"="257";"subkey"="51";"value"="1426"}; +{"key"="258";"subkey"="51";"value"="1432"}; +{"key"="259";"subkey"="51";"value"="1439"}; +{"key"="260";"subkey"="52";"value"="1445"}; +{"key"="261";"subkey"="52";"value"="1452"}; +{"key"="262";"subkey"="52";"value"="1458"}; +{"key"="263";"subkey"="52";"value"="1465"}; +{"key"="264";"subkey"="52";"value"="1472"}; +{"key"="265";"subkey"="53";"value"="1478"}; +{"key"="266";"subkey"="53";"value"="1485"}; +{"key"="267";"subkey"="53";"value"="1491"}; +{"key"="268";"subkey"="53";"value"="1498"}; +{"key"="269";"subkey"="53";"value"="1504"}; +{"key"="270";"subkey"="54";"value"="1511"}; +{"key"="271";"subkey"="54";"value"="1518"}; +{"key"="272";"subkey"="54";"value"="1524"}; +{"key"="273";"subkey"="54";"value"="1531"}; +{"key"="274";"subkey"="54";"value"="1537"}; +{"key"="275";"subkey"="55";"value"="1544"}; +{"key"="276";"subkey"="55";"value"="1551"}; +{"key"="277";"subkey"="55";"value"="1557"}; +{"key"="278";"subkey"="55";"value"="1564"}; +{"key"="279";"subkey"="55";"value"="1571"}; +{"key"="280";"subkey"="56";"value"="1577"}; +{"key"="281";"subkey"="56";"value"="1584"}; +{"key"="282";"subkey"="56";"value"="1591"}; +{"key"="283";"subkey"="56";"value"="1597"}; +{"key"="284";"subkey"="56";"value"="1604"}; +{"key"="285";"subkey"="57";"value"="1610"}; +{"key"="286";"subkey"="57";"value"="1617"}; +{"key"="287";"subkey"="57";"value"="1624"}; +{"key"="288";"subkey"="57";"value"="1630"}; +{"key"="289";"subkey"="57";"value"="1637"}; +{"key"="290";"subkey"="58";"value"="1644"}; +{"key"="291";"subkey"="58";"value"="1650"}; +{"key"="292";"subkey"="58";"value"="1657"}; +{"key"="293";"subkey"="58";"value"="1664"}; +{"key"="294";"subkey"="58";"value"="1670"}; +{"key"="295";"subkey"="59";"value"="1677"}; +{"key"="296";"subkey"="59";"value"="1684"}; +{"key"="297";"subkey"="59";"value"="1691"}; +{"key"="298";"subkey"="59";"value"="1697"}; +{"key"="299";"subkey"="59";"value"="1704"}; +{"key"="300";"subkey"="60";"value"="1711"}; +{"key"="301";"subkey"="60";"value"="1717"}; +{"key"="302";"subkey"="60";"value"="1724"}; +{"key"="303";"subkey"="60";"value"="1731"}; +{"key"="304";"subkey"="60";"value"="1737"}; +{"key"="305";"subkey"="61";"value"="1744"}; +{"key"="306";"subkey"="61";"value"="1751"}; +{"key"="307";"subkey"="61";"value"="1758"}; +{"key"="308";"subkey"="61";"value"="1764"}; +{"key"="309";"subkey"="61";"value"="1771"}; +{"key"="310";"subkey"="62";"value"="1778"}; +{"key"="311";"subkey"="62";"value"="1785"}; +{"key"="312";"subkey"="62";"value"="1791"}; +{"key"="313";"subkey"="62";"value"="1798"}; +{"key"="314";"subkey"="62";"value"="1805"}; +{"key"="315";"subkey"="63";"value"="1812"}; +{"key"="316";"subkey"="63";"value"="1818"}; +{"key"="317";"subkey"="63";"value"="1825"}; +{"key"="318";"subkey"="63";"value"="1832"}; +{"key"="319";"subkey"="63";"value"="1839"}; +{"key"="320";"subkey"="64";"value"="1845"}; +{"key"="321";"subkey"="64";"value"="1852"}; +{"key"="322";"subkey"="64";"value"="1859"}; +{"key"="323";"subkey"="64";"value"="1866"}; +{"key"="324";"subkey"="64";"value"="1872"}; +{"key"="325";"subkey"="65";"value"="1879"}; +{"key"="326";"subkey"="65";"value"="1886"}; +{"key"="327";"subkey"="65";"value"="1893"}; +{"key"="328";"subkey"="65";"value"="1900"}; +{"key"="329";"subkey"="65";"value"="1906"}; +{"key"="330";"subkey"="66";"value"="1913"}; +{"key"="331";"subkey"="66";"value"="1920"}; +{"key"="332";"subkey"="66";"value"="1927"}; +{"key"="333";"subkey"="66";"value"="1934"}; +{"key"="334";"subkey"="66";"value"="1940"}; +{"key"="335";"subkey"="67";"value"="1947"}; +{"key"="336";"subkey"="67";"value"="1954"}; +{"key"="337";"subkey"="67";"value"="1961"}; +{"key"="338";"subkey"="67";"value"="1968"}; +{"key"="339";"subkey"="67";"value"="1975"}; +{"key"="340";"subkey"="68";"value"="1981"}; +{"key"="341";"subkey"="68";"value"="1988"}; +{"key"="342";"subkey"="68";"value"="1995"}; +{"key"="343";"subkey"="68";"value"="2002"}; +{"key"="344";"subkey"="68";"value"="2009"}; +{"key"="345";"subkey"="69";"value"="2016"}; +{"key"="346";"subkey"="69";"value"="2022"}; +{"key"="347";"subkey"="69";"value"="2029"}; +{"key"="348";"subkey"="69";"value"="2036"}; +{"key"="349";"subkey"="69";"value"="2043"}; +{"key"="350";"subkey"="70";"value"="2050"}; +{"key"="351";"subkey"="70";"value"="2057"}; +{"key"="352";"subkey"="70";"value"="2063"}; +{"key"="353";"subkey"="70";"value"="2070"}; +{"key"="354";"subkey"="70";"value"="2077"}; +{"key"="355";"subkey"="71";"value"="2084"}; +{"key"="356";"subkey"="71";"value"="2091"}; +{"key"="357";"subkey"="71";"value"="2098"}; +{"key"="358";"subkey"="71";"value"="2105"}; +{"key"="359";"subkey"="71";"value"="2112"}; +{"key"="360";"subkey"="72";"value"="2118"}; +{"key"="361";"subkey"="72";"value"="2125"}; +{"key"="362";"subkey"="72";"value"="2132"}; +{"key"="363";"subkey"="72";"value"="2139"}; +{"key"="364";"subkey"="72";"value"="2146"}; +{"key"="365";"subkey"="73";"value"="2153"}; +{"key"="366";"subkey"="73";"value"="2160"}; +{"key"="367";"subkey"="73";"value"="2167"}; +{"key"="368";"subkey"="73";"value"="2174"}; +{"key"="369";"subkey"="73";"value"="2181"}; +{"key"="370";"subkey"="74";"value"="2187"}; +{"key"="371";"subkey"="74";"value"="2194"}; +{"key"="372";"subkey"="74";"value"="2201"}; +{"key"="373";"subkey"="74";"value"="2208"}; +{"key"="374";"subkey"="74";"value"="2215"}; +{"key"="375";"subkey"="75";"value"="2222"}; +{"key"="376";"subkey"="75";"value"="2229"}; +{"key"="377";"subkey"="75";"value"="2236"}; +{"key"="378";"subkey"="75";"value"="2243"}; +{"key"="379";"subkey"="75";"value"="2250"}; +{"key"="380";"subkey"="76";"value"="2257"}; +{"key"="381";"subkey"="76";"value"="2264"}; +{"key"="382";"subkey"="76";"value"="2271"}; +{"key"="383";"subkey"="76";"value"="2278"}; +{"key"="384";"subkey"="76";"value"="2285"}; +{"key"="385";"subkey"="77";"value"="2291"}; +{"key"="386";"subkey"="77";"value"="2298"}; +{"key"="387";"subkey"="77";"value"="2305"}; +{"key"="388";"subkey"="77";"value"="2312"}; +{"key"="389";"subkey"="77";"value"="2319"}; +{"key"="390";"subkey"="78";"value"="2326"}; +{"key"="391";"subkey"="78";"value"="2333"}; +{"key"="392";"subkey"="78";"value"="2340"}; +{"key"="393";"subkey"="78";"value"="2347"}; +{"key"="394";"subkey"="78";"value"="2354"}; +{"key"="395";"subkey"="79";"value"="2361"}; +{"key"="396";"subkey"="79";"value"="2368"}; +{"key"="397";"subkey"="79";"value"="2375"}; +{"key"="398";"subkey"="79";"value"="2382"}; +{"key"="399";"subkey"="79";"value"="2389"}; +{"key"="400";"subkey"="80";"value"="2396"}; +{"key"="401";"subkey"="80";"value"="2403"}; +{"key"="402";"subkey"="80";"value"="2410"}; +{"key"="403";"subkey"="80";"value"="2417"}; +{"key"="404";"subkey"="80";"value"="2424"}; +{"key"="405";"subkey"="81";"value"="2431"}; +{"key"="406";"subkey"="81";"value"="2438"}; +{"key"="407";"subkey"="81";"value"="2445"}; +{"key"="408";"subkey"="81";"value"="2452"}; +{"key"="409";"subkey"="81";"value"="2459"}; +{"key"="410";"subkey"="82";"value"="2466"}; +{"key"="411";"subkey"="82";"value"="2473"}; +{"key"="412";"subkey"="82";"value"="2480"}; +{"key"="413";"subkey"="82";"value"="2487"}; +{"key"="414";"subkey"="82";"value"="2494"}; +{"key"="415";"subkey"="83";"value"="2501"}; +{"key"="416";"subkey"="83";"value"="2508"}; +{"key"="417";"subkey"="83";"value"="2515"}; +{"key"="418";"subkey"="83";"value"="2522"}; +{"key"="419";"subkey"="83";"value"="2529"}; +{"key"="420";"subkey"="84";"value"="2536"}; +{"key"="421";"subkey"="84";"value"="2543"}; +{"key"="422";"subkey"="84";"value"="2550"}; +{"key"="423";"subkey"="84";"value"="2558"}; +{"key"="424";"subkey"="84";"value"="2565"}; +{"key"="425";"subkey"="85";"value"="2572"}; +{"key"="426";"subkey"="85";"value"="2579"}; +{"key"="427";"subkey"="85";"value"="2586"}; +{"key"="428";"subkey"="85";"value"="2593"}; +{"key"="429";"subkey"="85";"value"="2600"}; +{"key"="430";"subkey"="86";"value"="2607"}; +{"key"="431";"subkey"="86";"value"="2614"}; +{"key"="432";"subkey"="86";"value"="2621"}; +{"key"="433";"subkey"="86";"value"="2628"}; +{"key"="434";"subkey"="86";"value"="2635"}; +{"key"="435";"subkey"="87";"value"="2642"}; +{"key"="436";"subkey"="87";"value"="2649"}; +{"key"="437";"subkey"="87";"value"="2656"}; +{"key"="438";"subkey"="87";"value"="2664"}; +{"key"="439";"subkey"="87";"value"="2671"}; +{"key"="440";"subkey"="88";"value"="2678"}; +{"key"="441";"subkey"="88";"value"="2685"}; +{"key"="442";"subkey"="88";"value"="2692"}; +{"key"="443";"subkey"="88";"value"="2699"}; +{"key"="444";"subkey"="88";"value"="2706"}; +{"key"="445";"subkey"="89";"value"="2713"}; +{"key"="446";"subkey"="89";"value"="2720"}; +{"key"="447";"subkey"="89";"value"="2727"}; +{"key"="448";"subkey"="89";"value"="2734"}; +{"key"="449";"subkey"="89";"value"="2742"}; +{"key"="450";"subkey"="90";"value"="2749"}; +{"key"="451";"subkey"="90";"value"="2756"}; +{"key"="452";"subkey"="90";"value"="2763"}; +{"key"="453";"subkey"="90";"value"="2770"}; +{"key"="454";"subkey"="90";"value"="2777"}; +{"key"="455";"subkey"="91";"value"="2784"}; +{"key"="456";"subkey"="91";"value"="2791"}; +{"key"="457";"subkey"="91";"value"="2798"}; +{"key"="458";"subkey"="91";"value"="2806"}; +{"key"="459";"subkey"="91";"value"="2813"}; +{"key"="460";"subkey"="92";"value"="2820"}; +{"key"="461";"subkey"="92";"value"="2827"}; +{"key"="462";"subkey"="92";"value"="2834"}; +{"key"="463";"subkey"="92";"value"="2841"}; +{"key"="464";"subkey"="92";"value"="2848"}; +{"key"="465";"subkey"="93";"value"="2856"}; +{"key"="466";"subkey"="93";"value"="2863"}; +{"key"="467";"subkey"="93";"value"="2870"}; +{"key"="468";"subkey"="93";"value"="2877"}; +{"key"="469";"subkey"="93";"value"="2884"}; +{"key"="470";"subkey"="94";"value"="2891"}; +{"key"="471";"subkey"="94";"value"="2898"}; +{"key"="472";"subkey"="94";"value"="2906"}; +{"key"="473";"subkey"="94";"value"="2913"}; +{"key"="474";"subkey"="94";"value"="2920"}; +{"key"="475";"subkey"="95";"value"="2927"}; +{"key"="476";"subkey"="95";"value"="2934"}; +{"key"="477";"subkey"="95";"value"="2941"}; +{"key"="478";"subkey"="95";"value"="2949"}; +{"key"="479";"subkey"="95";"value"="2956"}; +{"key"="480";"subkey"="96";"value"="2963"}; +{"key"="481";"subkey"="96";"value"="2970"}; +{"key"="482";"subkey"="96";"value"="2977"}; +{"key"="483";"subkey"="96";"value"="2984"}; +{"key"="484";"subkey"="96";"value"="2992"}; +{"key"="485";"subkey"="97";"value"="2999"}; +{"key"="486";"subkey"="97";"value"="3006"}; +{"key"="487";"subkey"="97";"value"="3013"}; +{"key"="488";"subkey"="97";"value"="3020"}; +{"key"="489";"subkey"="97";"value"="3028"}; +{"key"="490";"subkey"="98";"value"="3035"}; +{"key"="491";"subkey"="98";"value"="3042"}; +{"key"="492";"subkey"="98";"value"="3049"}; +{"key"="493";"subkey"="98";"value"="3056"}; +{"key"="494";"subkey"="98";"value"="3064"}; +{"key"="495";"subkey"="99";"value"="3071"}; +{"key"="496";"subkey"="99";"value"="3078"}; +{"key"="497";"subkey"="99";"value"="3085"}; +{"key"="498";"subkey"="99";"value"="3092"}; +{"key"="499";"subkey"="99";"value"="3100"}; +{"key"="500";"subkey"="100";"value"="3107"}; +{"key"="501";"subkey"="100";"value"="3114"}; +{"key"="502";"subkey"="100";"value"="3121"}; +{"key"="503";"subkey"="100";"value"="3128"}; +{"key"="504";"subkey"="100";"value"="3136"}; +{"key"="505";"subkey"="101";"value"="3143"}; +{"key"="506";"subkey"="101";"value"="3150"}; +{"key"="507";"subkey"="101";"value"="3157"}; +{"key"="508";"subkey"="101";"value"="3165"}; +{"key"="509";"subkey"="101";"value"="3172"}; +{"key"="510";"subkey"="102";"value"="3179"}; +{"key"="511";"subkey"="102";"value"="3186"}; +{"key"="512";"subkey"="102";"value"="3194"}; +{"key"="513";"subkey"="102";"value"="3201"}; +{"key"="514";"subkey"="102";"value"="3208"}; +{"key"="515";"subkey"="103";"value"="3215"}; +{"key"="516";"subkey"="103";"value"="3222"}; +{"key"="517";"subkey"="103";"value"="3230"}; +{"key"="518";"subkey"="103";"value"="3237"}; +{"key"="519";"subkey"="103";"value"="3244"}; +{"key"="520";"subkey"="104";"value"="3251"}; +{"key"="521";"subkey"="104";"value"="3259"}; +{"key"="522";"subkey"="104";"value"="3266"}; +{"key"="523";"subkey"="104";"value"="3273"}; +{"key"="524";"subkey"="104";"value"="3281"}; +{"key"="525";"subkey"="105";"value"="3288"}; +{"key"="526";"subkey"="105";"value"="3295"}; +{"key"="527";"subkey"="105";"value"="3302"}; +{"key"="528";"subkey"="105";"value"="3310"}; +{"key"="529";"subkey"="105";"value"="3317"}; +{"key"="530";"subkey"="106";"value"="3324"}; +{"key"="531";"subkey"="106";"value"="3331"}; +{"key"="532";"subkey"="106";"value"="3339"}; +{"key"="533";"subkey"="106";"value"="3346"}; +{"key"="534";"subkey"="106";"value"="3353"}; +{"key"="535";"subkey"="107";"value"="3361"}; +{"key"="536";"subkey"="107";"value"="3368"}; +{"key"="537";"subkey"="107";"value"="3375"}; +{"key"="538";"subkey"="107";"value"="3382"}; +{"key"="539";"subkey"="107";"value"="3390"}; +{"key"="540";"subkey"="108";"value"="3397"}; +{"key"="541";"subkey"="108";"value"="3404"}; +{"key"="542";"subkey"="108";"value"="3412"}; +{"key"="543";"subkey"="108";"value"="3419"}; +{"key"="544";"subkey"="108";"value"="3426"}; +{"key"="545";"subkey"="109";"value"="3433"}; +{"key"="546";"subkey"="109";"value"="3441"}; +{"key"="547";"subkey"="109";"value"="3448"}; +{"key"="548";"subkey"="109";"value"="3455"}; +{"key"="549";"subkey"="109";"value"="3463"}; +{"key"="550";"subkey"="110";"value"="3470"}; +{"key"="551";"subkey"="110";"value"="3477"}; +{"key"="552";"subkey"="110";"value"="3485"}; +{"key"="553";"subkey"="110";"value"="3492"}; +{"key"="554";"subkey"="110";"value"="3499"}; +{"key"="555";"subkey"="111";"value"="3507"}; +{"key"="556";"subkey"="111";"value"="3514"}; +{"key"="557";"subkey"="111";"value"="3521"}; +{"key"="558";"subkey"="111";"value"="3528"}; +{"key"="559";"subkey"="111";"value"="3536"}; +{"key"="560";"subkey"="112";"value"="3543"}; +{"key"="561";"subkey"="112";"value"="3550"}; +{"key"="562";"subkey"="112";"value"="3558"}; +{"key"="563";"subkey"="112";"value"="3565"}; +{"key"="564";"subkey"="112";"value"="3572"}; +{"key"="565";"subkey"="113";"value"="3580"}; +{"key"="566";"subkey"="113";"value"="3587"}; +{"key"="567";"subkey"="113";"value"="3594"}; +{"key"="568";"subkey"="113";"value"="3602"}; +{"key"="569";"subkey"="113";"value"="3609"}; +{"key"="570";"subkey"="114";"value"="3617"}; +{"key"="571";"subkey"="114";"value"="3624"}; +{"key"="572";"subkey"="114";"value"="3631"}; +{"key"="573";"subkey"="114";"value"="3639"}; +{"key"="574";"subkey"="114";"value"="3646"}; +{"key"="575";"subkey"="115";"value"="3653"}; +{"key"="576";"subkey"="115";"value"="3661"}; +{"key"="577";"subkey"="115";"value"="3668"}; +{"key"="578";"subkey"="115";"value"="3675"}; +{"key"="579";"subkey"="115";"value"="3683"}; +{"key"="580";"subkey"="116";"value"="3690"}; +{"key"="581";"subkey"="116";"value"="3697"}; +{"key"="582";"subkey"="116";"value"="3705"}; +{"key"="583";"subkey"="116";"value"="3712"}; +{"key"="584";"subkey"="116";"value"="3720"}; +{"key"="585";"subkey"="117";"value"="3727"}; +{"key"="586";"subkey"="117";"value"="3734"}; +{"key"="587";"subkey"="117";"value"="3742"}; +{"key"="588";"subkey"="117";"value"="3749"}; +{"key"="589";"subkey"="117";"value"="3756"}; +{"key"="590";"subkey"="118";"value"="3764"}; +{"key"="591";"subkey"="118";"value"="3771"}; +{"key"="592";"subkey"="118";"value"="3779"}; +{"key"="593";"subkey"="118";"value"="3786"}; +{"key"="594";"subkey"="118";"value"="3793"}; +{"key"="595";"subkey"="119";"value"="3801"}; +{"key"="596";"subkey"="119";"value"="3808"}; +{"key"="597";"subkey"="119";"value"="3815"}; +{"key"="598";"subkey"="119";"value"="3823"}; +{"key"="599";"subkey"="119";"value"="3830"}; +{"key"="600";"subkey"="120";"value"="3838"}; +{"key"="601";"subkey"="120";"value"="3845"}; +{"key"="602";"subkey"="120";"value"="3852"}; +{"key"="603";"subkey"="120";"value"="3860"}; +{"key"="604";"subkey"="120";"value"="3867"}; +{"key"="605";"subkey"="121";"value"="3875"}; +{"key"="606";"subkey"="121";"value"="3882"}; +{"key"="607";"subkey"="121";"value"="3889"}; +{"key"="608";"subkey"="121";"value"="3897"}; +{"key"="609";"subkey"="121";"value"="3904"}; +{"key"="610";"subkey"="122";"value"="3912"}; +{"key"="611";"subkey"="122";"value"="3919"}; +{"key"="612";"subkey"="122";"value"="3927"}; +{"key"="613";"subkey"="122";"value"="3934"}; +{"key"="614";"subkey"="122";"value"="3941"}; +{"key"="615";"subkey"="123";"value"="3949"}; +{"key"="616";"subkey"="123";"value"="3956"}; +{"key"="617";"subkey"="123";"value"="3964"}; +{"key"="618";"subkey"="123";"value"="3971"}; +{"key"="619";"subkey"="123";"value"="3978"}; +{"key"="620";"subkey"="124";"value"="3986"}; +{"key"="621";"subkey"="124";"value"="3993"}; +{"key"="622";"subkey"="124";"value"="4001"}; +{"key"="623";"subkey"="124";"value"="4008"}; +{"key"="624";"subkey"="124";"value"="4016"}; +{"key"="625";"subkey"="125";"value"="4023"}; +{"key"="626";"subkey"="125";"value"="4031"}; +{"key"="627";"subkey"="125";"value"="4038"}; +{"key"="628";"subkey"="125";"value"="4045"}; +{"key"="629";"subkey"="125";"value"="4053"}; +{"key"="630";"subkey"="126";"value"="4060"}; +{"key"="631";"subkey"="126";"value"="4068"}; +{"key"="632";"subkey"="126";"value"="4075"}; +{"key"="633";"subkey"="126";"value"="4083"}; +{"key"="634";"subkey"="126";"value"="4090"}; +{"key"="635";"subkey"="127";"value"="4098"}; +{"key"="636";"subkey"="127";"value"="4105"}; +{"key"="637";"subkey"="127";"value"="4112"}; +{"key"="638";"subkey"="127";"value"="4120"}; +{"key"="639";"subkey"="127";"value"="4127"}; +{"key"="640";"subkey"="128";"value"="4135"}; +{"key"="641";"subkey"="128";"value"="4142"}; +{"key"="642";"subkey"="128";"value"="4150"}; +{"key"="643";"subkey"="128";"value"="4157"}; +{"key"="644";"subkey"="128";"value"="4165"}; +{"key"="645";"subkey"="129";"value"="4172"}; +{"key"="646";"subkey"="129";"value"="4180"}; +{"key"="647";"subkey"="129";"value"="4187"}; +{"key"="648";"subkey"="129";"value"="4195"}; +{"key"="649";"subkey"="129";"value"="4202"}; +{"key"="650";"subkey"="130";"value"="4210"}; +{"key"="651";"subkey"="130";"value"="4217"}; +{"key"="652";"subkey"="130";"value"="4224"}; +{"key"="653";"subkey"="130";"value"="4232"}; +{"key"="654";"subkey"="130";"value"="4239"}; +{"key"="655";"subkey"="131";"value"="4247"}; +{"key"="656";"subkey"="131";"value"="4254"}; +{"key"="657";"subkey"="131";"value"="4262"}; +{"key"="658";"subkey"="131";"value"="4269"}; +{"key"="659";"subkey"="131";"value"="4277"}; +{"key"="660";"subkey"="132";"value"="4284"}; +{"key"="661";"subkey"="132";"value"="4292"}; +{"key"="662";"subkey"="132";"value"="4299"}; +{"key"="663";"subkey"="132";"value"="4307"}; +{"key"="664";"subkey"="132";"value"="4314"}; +{"key"="665";"subkey"="133";"value"="4322"}; +{"key"="666";"subkey"="133";"value"="4329"}; +{"key"="667";"subkey"="133";"value"="4337"}; +{"key"="668";"subkey"="133";"value"="4344"}; +{"key"="669";"subkey"="133";"value"="4352"}; +{"key"="670";"subkey"="134";"value"="4359"}; +{"key"="671";"subkey"="134";"value"="4367"}; +{"key"="672";"subkey"="134";"value"="4374"}; +{"key"="673";"subkey"="134";"value"="4382"}; +{"key"="674";"subkey"="134";"value"="4389"}; +{"key"="675";"subkey"="135";"value"="4397"}; +{"key"="676";"subkey"="135";"value"="4404"}; +{"key"="677";"subkey"="135";"value"="4412"}; +{"key"="678";"subkey"="135";"value"="4419"}; +{"key"="679";"subkey"="135";"value"="4427"}; +{"key"="680";"subkey"="136";"value"="4435"}; +{"key"="681";"subkey"="136";"value"="4442"}; +{"key"="682";"subkey"="136";"value"="4450"}; +{"key"="683";"subkey"="136";"value"="4457"}; +{"key"="684";"subkey"="136";"value"="4465"}; +{"key"="685";"subkey"="137";"value"="4472"}; +{"key"="686";"subkey"="137";"value"="4480"}; +{"key"="687";"subkey"="137";"value"="4487"}; +{"key"="688";"subkey"="137";"value"="4495"}; +{"key"="689";"subkey"="137";"value"="4502"}; +{"key"="690";"subkey"="138";"value"="4510"}; +{"key"="691";"subkey"="138";"value"="4517"}; +{"key"="692";"subkey"="138";"value"="4525"}; +{"key"="693";"subkey"="138";"value"="4532"}; +{"key"="694";"subkey"="138";"value"="4540"}; +{"key"="695";"subkey"="139";"value"="4548"}; +{"key"="696";"subkey"="139";"value"="4555"}; +{"key"="697";"subkey"="139";"value"="4563"}; +{"key"="698";"subkey"="139";"value"="4570"}; +{"key"="699";"subkey"="139";"value"="4578"}; +{"key"="700";"subkey"="140";"value"="4585"}; +{"key"="701";"subkey"="140";"value"="4593"}; +{"key"="702";"subkey"="140";"value"="4600"}; +{"key"="703";"subkey"="140";"value"="4608"}; +{"key"="704";"subkey"="140";"value"="4615"}; +{"key"="705";"subkey"="141";"value"="4623"}; +{"key"="706";"subkey"="141";"value"="4631"}; +{"key"="707";"subkey"="141";"value"="4638"}; +{"key"="708";"subkey"="141";"value"="4646"}; +{"key"="709";"subkey"="141";"value"="4653"}; +{"key"="710";"subkey"="142";"value"="4661"}; +{"key"="711";"subkey"="142";"value"="4668"}; +{"key"="712";"subkey"="142";"value"="4676"}; +{"key"="713";"subkey"="142";"value"="4684"}; +{"key"="714";"subkey"="142";"value"="4691"}; +{"key"="715";"subkey"="143";"value"="4699"}; +{"key"="716";"subkey"="143";"value"="4706"}; +{"key"="717";"subkey"="143";"value"="4714"}; +{"key"="718";"subkey"="143";"value"="4721"}; +{"key"="719";"subkey"="143";"value"="4729"}; +{"key"="720";"subkey"="144";"value"="4737"}; +{"key"="721";"subkey"="144";"value"="4744"}; +{"key"="722";"subkey"="144";"value"="4752"}; +{"key"="723";"subkey"="144";"value"="4759"}; +{"key"="724";"subkey"="144";"value"="4767"}; +{"key"="725";"subkey"="145";"value"="4774"}; +{"key"="726";"subkey"="145";"value"="4782"}; +{"key"="727";"subkey"="145";"value"="4790"}; +{"key"="728";"subkey"="145";"value"="4797"}; +{"key"="729";"subkey"="145";"value"="4805"}; +{"key"="730";"subkey"="146";"value"="4812"}; +{"key"="731";"subkey"="146";"value"="4820"}; +{"key"="732";"subkey"="146";"value"="4828"}; +{"key"="733";"subkey"="146";"value"="4835"}; +{"key"="734";"subkey"="146";"value"="4843"}; +{"key"="735";"subkey"="147";"value"="4850"}; +{"key"="736";"subkey"="147";"value"="4858"}; +{"key"="737";"subkey"="147";"value"="4866"}; +{"key"="738";"subkey"="147";"value"="4873"}; +{"key"="739";"subkey"="147";"value"="4881"}; +{"key"="740";"subkey"="148";"value"="4888"}; +{"key"="741";"subkey"="148";"value"="4896"}; +{"key"="742";"subkey"="148";"value"="4904"}; +{"key"="743";"subkey"="148";"value"="4911"}; +{"key"="744";"subkey"="148";"value"="4919"}; +{"key"="745";"subkey"="149";"value"="4926"}; +{"key"="746";"subkey"="149";"value"="4934"}; +{"key"="747";"subkey"="149";"value"="4942"}; +{"key"="748";"subkey"="149";"value"="4949"}; +{"key"="749";"subkey"="149";"value"="4957"}; +{"key"="750";"subkey"="150";"value"="4965"}; +{"key"="751";"subkey"="150";"value"="4972"}; +{"key"="752";"subkey"="150";"value"="4980"}; +{"key"="753";"subkey"="150";"value"="4987"}; +{"key"="754";"subkey"="150";"value"="4995"}; +{"key"="755";"subkey"="151";"value"="5003"}; +{"key"="756";"subkey"="151";"value"="5010"}; +{"key"="757";"subkey"="151";"value"="5018"}; +{"key"="758";"subkey"="151";"value"="5026"}; +{"key"="759";"subkey"="151";"value"="5033"}; +{"key"="760";"subkey"="152";"value"="5041"}; +{"key"="761";"subkey"="152";"value"="5048"}; +{"key"="762";"subkey"="152";"value"="5056"}; +{"key"="763";"subkey"="152";"value"="5064"}; +{"key"="764";"subkey"="152";"value"="5071"}; +{"key"="765";"subkey"="153";"value"="5079"}; +{"key"="766";"subkey"="153";"value"="5087"}; +{"key"="767";"subkey"="153";"value"="5094"}; +{"key"="768";"subkey"="153";"value"="5102"}; +{"key"="769";"subkey"="153";"value"="5110"}; +{"key"="770";"subkey"="154";"value"="5117"}; +{"key"="771";"subkey"="154";"value"="5125"}; +{"key"="772";"subkey"="154";"value"="5133"}; +{"key"="773";"subkey"="154";"value"="5140"}; +{"key"="774";"subkey"="154";"value"="5148"}; +{"key"="775";"subkey"="155";"value"="5155"}; +{"key"="776";"subkey"="155";"value"="5163"}; +{"key"="777";"subkey"="155";"value"="5171"}; +{"key"="778";"subkey"="155";"value"="5178"}; +{"key"="779";"subkey"="155";"value"="5186"}; +{"key"="780";"subkey"="156";"value"="5194"}; +{"key"="781";"subkey"="156";"value"="5201"}; +{"key"="782";"subkey"="156";"value"="5209"}; +{"key"="783";"subkey"="156";"value"="5217"}; +{"key"="784";"subkey"="156";"value"="5224"}; +{"key"="785";"subkey"="157";"value"="5232"}; +{"key"="786";"subkey"="157";"value"="5240"}; +{"key"="787";"subkey"="157";"value"="5247"}; +{"key"="788";"subkey"="157";"value"="5255"}; +{"key"="789";"subkey"="157";"value"="5263"}; +{"key"="790";"subkey"="158";"value"="5270"}; +{"key"="791";"subkey"="158";"value"="5278"}; +{"key"="792";"subkey"="158";"value"="5286"}; +{"key"="793";"subkey"="158";"value"="5293"}; +{"key"="794";"subkey"="158";"value"="5301"}; +{"key"="795";"subkey"="159";"value"="5309"}; +{"key"="796";"subkey"="159";"value"="5316"}; +{"key"="797";"subkey"="159";"value"="5324"}; +{"key"="798";"subkey"="159";"value"="5332"}; +{"key"="799";"subkey"="159";"value"="5340"}; +{"key"="800";"subkey"="160";"value"="5347"}; +{"key"="801";"subkey"="160";"value"="5355"}; +{"key"="802";"subkey"="160";"value"="5363"}; +{"key"="803";"subkey"="160";"value"="5370"}; +{"key"="804";"subkey"="160";"value"="5378"}; +{"key"="805";"subkey"="161";"value"="5386"}; +{"key"="806";"subkey"="161";"value"="5393"}; +{"key"="807";"subkey"="161";"value"="5401"}; +{"key"="808";"subkey"="161";"value"="5409"}; +{"key"="809";"subkey"="161";"value"="5416"}; +{"key"="810";"subkey"="162";"value"="5424"}; +{"key"="811";"subkey"="162";"value"="5432"}; +{"key"="812";"subkey"="162";"value"="5439"}; +{"key"="813";"subkey"="162";"value"="5447"}; +{"key"="814";"subkey"="162";"value"="5455"}; +{"key"="815";"subkey"="163";"value"="5463"}; +{"key"="816";"subkey"="163";"value"="5470"}; +{"key"="817";"subkey"="163";"value"="5478"}; +{"key"="818";"subkey"="163";"value"="5486"}; +{"key"="819";"subkey"="163";"value"="5493"}; +{"key"="820";"subkey"="164";"value"="5501"}; +{"key"="821";"subkey"="164";"value"="5509"}; +{"key"="822";"subkey"="164";"value"="5517"}; +{"key"="823";"subkey"="164";"value"="5524"}; +{"key"="824";"subkey"="164";"value"="5532"}; +{"key"="825";"subkey"="165";"value"="5540"}; +{"key"="826";"subkey"="165";"value"="5547"}; +{"key"="827";"subkey"="165";"value"="5555"}; +{"key"="828";"subkey"="165";"value"="5563"}; +{"key"="829";"subkey"="165";"value"="5571"}; +{"key"="830";"subkey"="166";"value"="5578"}; +{"key"="831";"subkey"="166";"value"="5586"}; +{"key"="832";"subkey"="166";"value"="5594"}; +{"key"="833";"subkey"="166";"value"="5601"}; +{"key"="834";"subkey"="166";"value"="5609"}; +{"key"="835";"subkey"="167";"value"="5617"}; +{"key"="836";"subkey"="167";"value"="5625"}; +{"key"="837";"subkey"="167";"value"="5632"}; +{"key"="838";"subkey"="167";"value"="5640"}; +{"key"="839";"subkey"="167";"value"="5648"}; +{"key"="840";"subkey"="168";"value"="5656"}; +{"key"="841";"subkey"="168";"value"="5663"}; +{"key"="842";"subkey"="168";"value"="5671"}; +{"key"="843";"subkey"="168";"value"="5679"}; +{"key"="844";"subkey"="168";"value"="5687"}; +{"key"="845";"subkey"="169";"value"="5694"}; +{"key"="846";"subkey"="169";"value"="5702"}; +{"key"="847";"subkey"="169";"value"="5710"}; +{"key"="848";"subkey"="169";"value"="5717"}; +{"key"="849";"subkey"="169";"value"="5725"}; +{"key"="850";"subkey"="170";"value"="5733"}; +{"key"="851";"subkey"="170";"value"="5741"}; +{"key"="852";"subkey"="170";"value"="5748"}; +{"key"="853";"subkey"="170";"value"="5756"}; +{"key"="854";"subkey"="170";"value"="5764"}; +{"key"="855";"subkey"="171";"value"="5772"}; +{"key"="856";"subkey"="171";"value"="5779"}; +{"key"="857";"subkey"="171";"value"="5787"}; +{"key"="858";"subkey"="171";"value"="5795"}; +{"key"="859";"subkey"="171";"value"="5803"}; +{"key"="860";"subkey"="172";"value"="5810"}; +{"key"="861";"subkey"="172";"value"="5818"}; +{"key"="862";"subkey"="172";"value"="5826"}; +{"key"="863";"subkey"="172";"value"="5834"}; +{"key"="864";"subkey"="172";"value"="5841"}; +{"key"="865";"subkey"="173";"value"="5849"}; +{"key"="866";"subkey"="173";"value"="5857"}; +{"key"="867";"subkey"="173";"value"="5865"}; +{"key"="868";"subkey"="173";"value"="5873"}; +{"key"="869";"subkey"="173";"value"="5880"}; +{"key"="870";"subkey"="174";"value"="5888"}; +{"key"="871";"subkey"="174";"value"="5896"}; +{"key"="872";"subkey"="174";"value"="5904"}; +{"key"="873";"subkey"="174";"value"="5911"}; +{"key"="874";"subkey"="174";"value"="5919"}; +{"key"="875";"subkey"="175";"value"="5927"}; +{"key"="876";"subkey"="175";"value"="5935"}; +{"key"="877";"subkey"="175";"value"="5942"}; +{"key"="878";"subkey"="175";"value"="5950"}; +{"key"="879";"subkey"="175";"value"="5958"}; +{"key"="880";"subkey"="176";"value"="5966"}; +{"key"="881";"subkey"="176";"value"="5974"}; +{"key"="882";"subkey"="176";"value"="5981"}; +{"key"="883";"subkey"="176";"value"="5989"}; +{"key"="884";"subkey"="176";"value"="5997"}; +{"key"="885";"subkey"="177";"value"="6005"}; +{"key"="886";"subkey"="177";"value"="6013"}; +{"key"="887";"subkey"="177";"value"="6020"}; +{"key"="888";"subkey"="177";"value"="6028"}; +{"key"="889";"subkey"="177";"value"="6036"}; +{"key"="890";"subkey"="178";"value"="6044"}; +{"key"="891";"subkey"="178";"value"="6051"}; +{"key"="892";"subkey"="178";"value"="6059"}; +{"key"="893";"subkey"="178";"value"="6067"}; +{"key"="894";"subkey"="178";"value"="6075"}; +{"key"="895";"subkey"="179";"value"="6083"}; +{"key"="896";"subkey"="179";"value"="6090"}; +{"key"="897";"subkey"="179";"value"="6098"}; +{"key"="898";"subkey"="179";"value"="6106"}; +{"key"="899";"subkey"="179";"value"="6114"}; +{"key"="900";"subkey"="180";"value"="6122"}; +{"key"="901";"subkey"="180";"value"="6129"}; +{"key"="902";"subkey"="180";"value"="6137"}; +{"key"="903";"subkey"="180";"value"="6145"}; +{"key"="904";"subkey"="180";"value"="6153"}; +{"key"="905";"subkey"="181";"value"="6161"}; +{"key"="906";"subkey"="181";"value"="6168"}; +{"key"="907";"subkey"="181";"value"="6176"}; +{"key"="908";"subkey"="181";"value"="6184"}; +{"key"="909";"subkey"="181";"value"="6192"}; +{"key"="910";"subkey"="182";"value"="6200"}; +{"key"="911";"subkey"="182";"value"="6208"}; +{"key"="912";"subkey"="182";"value"="6215"}; +{"key"="913";"subkey"="182";"value"="6223"}; +{"key"="914";"subkey"="182";"value"="6231"}; +{"key"="915";"subkey"="183";"value"="6239"}; +{"key"="916";"subkey"="183";"value"="6247"}; +{"key"="917";"subkey"="183";"value"="6254"}; +{"key"="918";"subkey"="183";"value"="6262"}; +{"key"="919";"subkey"="183";"value"="6270"}; +{"key"="920";"subkey"="184";"value"="6278"}; +{"key"="921";"subkey"="184";"value"="6286"}; +{"key"="922";"subkey"="184";"value"="6294"}; +{"key"="923";"subkey"="184";"value"="6301"}; +{"key"="924";"subkey"="184";"value"="6309"}; +{"key"="925";"subkey"="185";"value"="6317"}; +{"key"="926";"subkey"="185";"value"="6325"}; +{"key"="927";"subkey"="185";"value"="6333"}; +{"key"="928";"subkey"="185";"value"="6341"}; +{"key"="929";"subkey"="185";"value"="6348"}; +{"key"="930";"subkey"="186";"value"="6356"}; +{"key"="931";"subkey"="186";"value"="6364"}; +{"key"="932";"subkey"="186";"value"="6372"}; +{"key"="933";"subkey"="186";"value"="6380"}; +{"key"="934";"subkey"="186";"value"="6388"}; +{"key"="935";"subkey"="187";"value"="6395"}; +{"key"="936";"subkey"="187";"value"="6403"}; +{"key"="937";"subkey"="187";"value"="6411"}; +{"key"="938";"subkey"="187";"value"="6419"}; +{"key"="939";"subkey"="187";"value"="6427"}; +{"key"="940";"subkey"="188";"value"="6435"}; +{"key"="941";"subkey"="188";"value"="6442"}; +{"key"="942";"subkey"="188";"value"="6450"}; +{"key"="943";"subkey"="188";"value"="6458"}; +{"key"="944";"subkey"="188";"value"="6466"}; +{"key"="945";"subkey"="189";"value"="6474"}; +{"key"="946";"subkey"="189";"value"="6482"}; +{"key"="947";"subkey"="189";"value"="6490"}; +{"key"="948";"subkey"="189";"value"="6497"}; +{"key"="949";"subkey"="189";"value"="6505"}; +{"key"="950";"subkey"="190";"value"="6513"}; +{"key"="951";"subkey"="190";"value"="6521"}; +{"key"="952";"subkey"="190";"value"="6529"}; +{"key"="953";"subkey"="190";"value"="6537"}; +{"key"="954";"subkey"="190";"value"="6545"}; +{"key"="955";"subkey"="191";"value"="6552"}; +{"key"="956";"subkey"="191";"value"="6560"}; +{"key"="957";"subkey"="191";"value"="6568"}; +{"key"="958";"subkey"="191";"value"="6576"}; +{"key"="959";"subkey"="191";"value"="6584"}; +{"key"="960";"subkey"="192";"value"="6592"}; +{"key"="961";"subkey"="192";"value"="6600"}; +{"key"="962";"subkey"="192";"value"="6607"}; +{"key"="963";"subkey"="192";"value"="6615"}; +{"key"="964";"subkey"="192";"value"="6623"}; +{"key"="965";"subkey"="193";"value"="6631"}; +{"key"="966";"subkey"="193";"value"="6639"}; +{"key"="967";"subkey"="193";"value"="6647"}; +{"key"="968";"subkey"="193";"value"="6655"}; +{"key"="969";"subkey"="193";"value"="6663"}; +{"key"="970";"subkey"="194";"value"="6670"}; +{"key"="971";"subkey"="194";"value"="6678"}; +{"key"="972";"subkey"="194";"value"="6686"}; +{"key"="973";"subkey"="194";"value"="6694"}; +{"key"="974";"subkey"="194";"value"="6702"}; +{"key"="975";"subkey"="195";"value"="6710"}; +{"key"="976";"subkey"="195";"value"="6718"}; +{"key"="977";"subkey"="195";"value"="6726"}; +{"key"="978";"subkey"="195";"value"="6734"}; +{"key"="979";"subkey"="195";"value"="6741"}; +{"key"="980";"subkey"="196";"value"="6749"}; +{"key"="981";"subkey"="196";"value"="6757"}; +{"key"="982";"subkey"="196";"value"="6765"}; +{"key"="983";"subkey"="196";"value"="6773"}; +{"key"="984";"subkey"="196";"value"="6781"}; +{"key"="985";"subkey"="197";"value"="6789"}; +{"key"="986";"subkey"="197";"value"="6797"}; +{"key"="987";"subkey"="197";"value"="6805"}; +{"key"="988";"subkey"="197";"value"="6812"}; +{"key"="989";"subkey"="197";"value"="6820"}; +{"key"="990";"subkey"="198";"value"="6828"}; +{"key"="991";"subkey"="198";"value"="6836"}; +{"key"="992";"subkey"="198";"value"="6844"}; +{"key"="993";"subkey"="198";"value"="6852"}; +{"key"="994";"subkey"="198";"value"="6860"}; +{"key"="995";"subkey"="199";"value"="6868"}; +{"key"="996";"subkey"="199";"value"="6876"}; +{"key"="997";"subkey"="199";"value"="6884"}; +{"key"="998";"subkey"="199";"value"="6891"}; +{"key"="999";"subkey"="199";"value"="6899"}; +{"key"="1000";"subkey"="200";"value"="6907"}; +{"key"="1001";"subkey"="200";"value"="6915"}; +{"key"="1002";"subkey"="200";"value"="6923"}; +{"key"="1003";"subkey"="200";"value"="6931"}; +{"key"="1004";"subkey"="200";"value"="6939"}; +{"key"="1005";"subkey"="201";"value"="6947"}; +{"key"="1006";"subkey"="201";"value"="6955"}; +{"key"="1007";"subkey"="201";"value"="6963"}; +{"key"="1008";"subkey"="201";"value"="6971"}; +{"key"="1009";"subkey"="201";"value"="6978"}; +{"key"="1010";"subkey"="202";"value"="6986"}; +{"key"="1011";"subkey"="202";"value"="6994"}; +{"key"="1012";"subkey"="202";"value"="7002"}; +{"key"="1013";"subkey"="202";"value"="7010"}; +{"key"="1014";"subkey"="202";"value"="7018"}; +{"key"="1015";"subkey"="203";"value"="7026"}; +{"key"="1016";"subkey"="203";"value"="7034"}; +{"key"="1017";"subkey"="203";"value"="7042"}; +{"key"="1018";"subkey"="203";"value"="7050"}; +{"key"="1019";"subkey"="203";"value"="7058"}; +{"key"="1020";"subkey"="204";"value"="7066"}; +{"key"="1021";"subkey"="204";"value"="7074"}; +{"key"="1022";"subkey"="204";"value"="7081"}; +{"key"="1023";"subkey"="204";"value"="7089"}; +{"key"="1024";"subkey"="204";"value"="7097"}; +{"key"="1025";"subkey"="205";"value"="7105"}; +{"key"="1026";"subkey"="205";"value"="7113"}; +{"key"="1027";"subkey"="205";"value"="7121"}; +{"key"="1028";"subkey"="205";"value"="7129"}; +{"key"="1029";"subkey"="205";"value"="7137"}; +{"key"="1030";"subkey"="206";"value"="7145"}; +{"key"="1031";"subkey"="206";"value"="7153"}; +{"key"="1032";"subkey"="206";"value"="7161"}; +{"key"="1033";"subkey"="206";"value"="7169"}; +{"key"="1034";"subkey"="206";"value"="7177"}; +{"key"="1035";"subkey"="207";"value"="7185"}; +{"key"="1036";"subkey"="207";"value"="7193"}; +{"key"="1037";"subkey"="207";"value"="7201"}; +{"key"="1038";"subkey"="207";"value"="7208"}; +{"key"="1039";"subkey"="207";"value"="7216"}; +{"key"="1040";"subkey"="208";"value"="7224"}; +{"key"="1041";"subkey"="208";"value"="7232"}; +{"key"="1042";"subkey"="208";"value"="7240"}; +{"key"="1043";"subkey"="208";"value"="7248"}; +{"key"="1044";"subkey"="208";"value"="7256"}; +{"key"="1045";"subkey"="209";"value"="7264"}; +{"key"="1046";"subkey"="209";"value"="7272"}; +{"key"="1047";"subkey"="209";"value"="7280"}; +{"key"="1048";"subkey"="209";"value"="7288"}; +{"key"="1049";"subkey"="209";"value"="7296"}; +{"key"="1050";"subkey"="210";"value"="7304"}; +{"key"="1051";"subkey"="210";"value"="7312"}; +{"key"="1052";"subkey"="210";"value"="7320"}; +{"key"="1053";"subkey"="210";"value"="7328"}; +{"key"="1054";"subkey"="210";"value"="7336"}; +{"key"="1055";"subkey"="211";"value"="7344"}; +{"key"="1056";"subkey"="211";"value"="7352"}; +{"key"="1057";"subkey"="211";"value"="7360"}; +{"key"="1058";"subkey"="211";"value"="7368"}; +{"key"="1059";"subkey"="211";"value"="7376"}; +{"key"="1060";"subkey"="212";"value"="7383"}; +{"key"="1061";"subkey"="212";"value"="7391"}; +{"key"="1062";"subkey"="212";"value"="7399"}; +{"key"="1063";"subkey"="212";"value"="7407"}; +{"key"="1064";"subkey"="212";"value"="7415"}; +{"key"="1065";"subkey"="213";"value"="7423"}; +{"key"="1066";"subkey"="213";"value"="7431"}; +{"key"="1067";"subkey"="213";"value"="7439"}; +{"key"="1068";"subkey"="213";"value"="7447"}; +{"key"="1069";"subkey"="213";"value"="7455"}; +{"key"="1070";"subkey"="214";"value"="7463"}; +{"key"="1071";"subkey"="214";"value"="7471"}; +{"key"="1072";"subkey"="214";"value"="7479"}; +{"key"="1073";"subkey"="214";"value"="7487"}; +{"key"="1074";"subkey"="214";"value"="7495"}; +{"key"="1075";"subkey"="215";"value"="7503"}; +{"key"="1076";"subkey"="215";"value"="7511"}; +{"key"="1077";"subkey"="215";"value"="7519"}; +{"key"="1078";"subkey"="215";"value"="7527"}; +{"key"="1079";"subkey"="215";"value"="7535"}; +{"key"="1080";"subkey"="216";"value"="7543"}; +{"key"="1081";"subkey"="216";"value"="7551"}; +{"key"="1082";"subkey"="216";"value"="7559"}; +{"key"="1083";"subkey"="216";"value"="7567"}; +{"key"="1084";"subkey"="216";"value"="7575"}; +{"key"="1085";"subkey"="217";"value"="7583"}; +{"key"="1086";"subkey"="217";"value"="7591"}; +{"key"="1087";"subkey"="217";"value"="7599"}; +{"key"="1088";"subkey"="217";"value"="7607"}; +{"key"="1089";"subkey"="217";"value"="7615"}; +{"key"="1090";"subkey"="218";"value"="7623"}; +{"key"="1091";"subkey"="218";"value"="7631"}; +{"key"="1092";"subkey"="218";"value"="7639"}; +{"key"="1093";"subkey"="218";"value"="7647"}; +{"key"="1094";"subkey"="218";"value"="7655"}; +{"key"="1095";"subkey"="219";"value"="7663"}; +{"key"="1096";"subkey"="219";"value"="7671"}; +{"key"="1097";"subkey"="219";"value"="7679"}; +{"key"="1098";"subkey"="219";"value"="7687"}; +{"key"="1099";"subkey"="219";"value"="7695"}; +{"key"="1100";"subkey"="220";"value"="7703"}; +{"key"="1101";"subkey"="220";"value"="7711"}; +{"key"="1102";"subkey"="220";"value"="7719"}; +{"key"="1103";"subkey"="220";"value"="7727"}; +{"key"="1104";"subkey"="220";"value"="7735"}; +{"key"="1105";"subkey"="221";"value"="7743"}; +{"key"="1106";"subkey"="221";"value"="7751"}; +{"key"="1107";"subkey"="221";"value"="7759"}; +{"key"="1108";"subkey"="221";"value"="7767"}; +{"key"="1109";"subkey"="221";"value"="7775"}; +{"key"="1110";"subkey"="222";"value"="7783"}; +{"key"="1111";"subkey"="222";"value"="7791"}; +{"key"="1112";"subkey"="222";"value"="7799"}; +{"key"="1113";"subkey"="222";"value"="7807"}; +{"key"="1114";"subkey"="222";"value"="7815"}; +{"key"="1115";"subkey"="223";"value"="7823"}; +{"key"="1116";"subkey"="223";"value"="7831"}; +{"key"="1117";"subkey"="223";"value"="7839"}; +{"key"="1118";"subkey"="223";"value"="7847"}; +{"key"="1119";"subkey"="223";"value"="7855"}; +{"key"="1120";"subkey"="224";"value"="7863"}; +{"key"="1121";"subkey"="224";"value"="7871"}; +{"key"="1122";"subkey"="224";"value"="7879"}; +{"key"="1123";"subkey"="224";"value"="7887"}; +{"key"="1124";"subkey"="224";"value"="7895"}; +{"key"="1125";"subkey"="225";"value"="7903"}; +{"key"="1126";"subkey"="225";"value"="7911"}; +{"key"="1127";"subkey"="225";"value"="7919"}; +{"key"="1128";"subkey"="225";"value"="7927"}; +{"key"="1129";"subkey"="225";"value"="7935"}; +{"key"="1130";"subkey"="226";"value"="7943"}; +{"key"="1131";"subkey"="226";"value"="7951"}; +{"key"="1132";"subkey"="226";"value"="7959"}; +{"key"="1133";"subkey"="226";"value"="7967"}; +{"key"="1134";"subkey"="226";"value"="7975"}; +{"key"="1135";"subkey"="227";"value"="7984"}; +{"key"="1136";"subkey"="227";"value"="7992"}; +{"key"="1137";"subkey"="227";"value"="8000"}; +{"key"="1138";"subkey"="227";"value"="8008"}; +{"key"="1139";"subkey"="227";"value"="8016"}; +{"key"="1140";"subkey"="228";"value"="8024"}; +{"key"="1141";"subkey"="228";"value"="8032"}; +{"key"="1142";"subkey"="228";"value"="8040"}; +{"key"="1143";"subkey"="228";"value"="8048"}; +{"key"="1144";"subkey"="228";"value"="8056"}; +{"key"="1145";"subkey"="229";"value"="8064"}; +{"key"="1146";"subkey"="229";"value"="8072"}; +{"key"="1147";"subkey"="229";"value"="8080"}; +{"key"="1148";"subkey"="229";"value"="8088"}; +{"key"="1149";"subkey"="229";"value"="8096"}; +{"key"="1150";"subkey"="230";"value"="8104"}; +{"key"="1151";"subkey"="230";"value"="8112"}; +{"key"="1152";"subkey"="230";"value"="8120"}; +{"key"="1153";"subkey"="230";"value"="8128"}; +{"key"="1154";"subkey"="230";"value"="8136"}; +{"key"="1155";"subkey"="231";"value"="8144"}; +{"key"="1156";"subkey"="231";"value"="8152"}; +{"key"="1157";"subkey"="231";"value"="8160"}; +{"key"="1158";"subkey"="231";"value"="8169"}; +{"key"="1159";"subkey"="231";"value"="8177"}; +{"key"="1160";"subkey"="232";"value"="8185"}; +{"key"="1161";"subkey"="232";"value"="8193"}; +{"key"="1162";"subkey"="232";"value"="8201"}; +{"key"="1163";"subkey"="232";"value"="8209"}; +{"key"="1164";"subkey"="232";"value"="8217"}; +{"key"="1165";"subkey"="233";"value"="8225"}; +{"key"="1166";"subkey"="233";"value"="8233"}; +{"key"="1167";"subkey"="233";"value"="8241"}; +{"key"="1168";"subkey"="233";"value"="8249"}; +{"key"="1169";"subkey"="233";"value"="8257"}; +{"key"="1170";"subkey"="234";"value"="8265"}; +{"key"="1171";"subkey"="234";"value"="8273"}; +{"key"="1172";"subkey"="234";"value"="8281"}; +{"key"="1173";"subkey"="234";"value"="8289"}; +{"key"="1174";"subkey"="234";"value"="8298"}; +{"key"="1175";"subkey"="235";"value"="8306"}; +{"key"="1176";"subkey"="235";"value"="8314"}; +{"key"="1177";"subkey"="235";"value"="8322"}; +{"key"="1178";"subkey"="235";"value"="8330"}; +{"key"="1179";"subkey"="235";"value"="8338"}; +{"key"="1180";"subkey"="236";"value"="8346"}; +{"key"="1181";"subkey"="236";"value"="8354"}; +{"key"="1182";"subkey"="236";"value"="8362"}; +{"key"="1183";"subkey"="236";"value"="8370"}; +{"key"="1184";"subkey"="236";"value"="8378"}; +{"key"="1185";"subkey"="237";"value"="8386"}; +{"key"="1186";"subkey"="237";"value"="8394"}; +{"key"="1187";"subkey"="237";"value"="8402"}; +{"key"="1188";"subkey"="237";"value"="8411"}; +{"key"="1189";"subkey"="237";"value"="8419"}; +{"key"="1190";"subkey"="238";"value"="8427"}; +{"key"="1191";"subkey"="238";"value"="8435"}; +{"key"="1192";"subkey"="238";"value"="8443"}; +{"key"="1193";"subkey"="238";"value"="8451"}; +{"key"="1194";"subkey"="238";"value"="8459"}; +{"key"="1195";"subkey"="239";"value"="8467"}; +{"key"="1196";"subkey"="239";"value"="8475"}; +{"key"="1197";"subkey"="239";"value"="8483"}; +{"key"="1198";"subkey"="239";"value"="8491"}; +{"key"="1199";"subkey"="239";"value"="8500"}; +{"key"="1200";"subkey"="240";"value"="8508"}; +{"key"="1201";"subkey"="240";"value"="8516"}; +{"key"="1202";"subkey"="240";"value"="8524"}; +{"key"="1203";"subkey"="240";"value"="8532"}; +{"key"="1204";"subkey"="240";"value"="8540"}; +{"key"="1205";"subkey"="241";"value"="8548"}; +{"key"="1206";"subkey"="241";"value"="8556"}; +{"key"="1207";"subkey"="241";"value"="8564"}; +{"key"="1208";"subkey"="241";"value"="8572"}; +{"key"="1209";"subkey"="241";"value"="8580"}; +{"key"="1210";"subkey"="242";"value"="8589"}; +{"key"="1211";"subkey"="242";"value"="8597"}; +{"key"="1212";"subkey"="242";"value"="8605"}; +{"key"="1213";"subkey"="242";"value"="8613"}; +{"key"="1214";"subkey"="242";"value"="8621"}; +{"key"="1215";"subkey"="243";"value"="8629"}; +{"key"="1216";"subkey"="243";"value"="8637"}; +{"key"="1217";"subkey"="243";"value"="8645"}; +{"key"="1218";"subkey"="243";"value"="8653"}; +{"key"="1219";"subkey"="243";"value"="8661"}; +{"key"="1220";"subkey"="244";"value"="8670"}; +{"key"="1221";"subkey"="244";"value"="8678"}; +{"key"="1222";"subkey"="244";"value"="8686"}; +{"key"="1223";"subkey"="244";"value"="8694"}; +{"key"="1224";"subkey"="244";"value"="8702"}; +{"key"="1225";"subkey"="245";"value"="8710"}; +{"key"="1226";"subkey"="245";"value"="8718"}; +{"key"="1227";"subkey"="245";"value"="8726"}; +{"key"="1228";"subkey"="245";"value"="8734"}; +{"key"="1229";"subkey"="245";"value"="8743"}; +{"key"="1230";"subkey"="246";"value"="8751"}; +{"key"="1231";"subkey"="246";"value"="8759"}; +{"key"="1232";"subkey"="246";"value"="8767"}; +{"key"="1233";"subkey"="246";"value"="8775"}; +{"key"="1234";"subkey"="246";"value"="8783"}; +{"key"="1235";"subkey"="247";"value"="8791"}; +{"key"="1236";"subkey"="247";"value"="8799"}; +{"key"="1237";"subkey"="247";"value"="8807"}; +{"key"="1238";"subkey"="247";"value"="8816"}; +{"key"="1239";"subkey"="247";"value"="8824"}; +{"key"="1240";"subkey"="248";"value"="8832"}; +{"key"="1241";"subkey"="248";"value"="8840"}; +{"key"="1242";"subkey"="248";"value"="8848"}; +{"key"="1243";"subkey"="248";"value"="8856"}; +{"key"="1244";"subkey"="248";"value"="8864"}; +{"key"="1245";"subkey"="249";"value"="8872"}; +{"key"="1246";"subkey"="249";"value"="8881"}; +{"key"="1247";"subkey"="249";"value"="8889"}; +{"key"="1248";"subkey"="249";"value"="8897"}; +{"key"="1249";"subkey"="249";"value"="8905"}; +{"key"="1250";"subkey"="250";"value"="8913"}; +{"key"="1251";"subkey"="250";"value"="8921"}; +{"key"="1252";"subkey"="250";"value"="8929"}; +{"key"="1253";"subkey"="250";"value"="8938"}; +{"key"="1254";"subkey"="250";"value"="8946"}; +{"key"="1255";"subkey"="251";"value"="8954"}; +{"key"="1256";"subkey"="251";"value"="8962"}; +{"key"="1257";"subkey"="251";"value"="8970"}; +{"key"="1258";"subkey"="251";"value"="8978"}; +{"key"="1259";"subkey"="251";"value"="8986"}; +{"key"="1260";"subkey"="252";"value"="8994"}; +{"key"="1261";"subkey"="252";"value"="9003"}; +{"key"="1262";"subkey"="252";"value"="9011"}; +{"key"="1263";"subkey"="252";"value"="9019"}; +{"key"="1264";"subkey"="252";"value"="9027"}; +{"key"="1265";"subkey"="253";"value"="9035"}; +{"key"="1266";"subkey"="253";"value"="9043"}; +{"key"="1267";"subkey"="253";"value"="9051"}; +{"key"="1268";"subkey"="253";"value"="9060"}; +{"key"="1269";"subkey"="253";"value"="9068"}; +{"key"="1270";"subkey"="254";"value"="9076"}; +{"key"="1271";"subkey"="254";"value"="9084"}; +{"key"="1272";"subkey"="254";"value"="9092"}; +{"key"="1273";"subkey"="254";"value"="9100"}; +{"key"="1274";"subkey"="254";"value"="9108"}; +{"key"="1275";"subkey"="255";"value"="9117"}; +{"key"="1276";"subkey"="255";"value"="9125"}; +{"key"="1277";"subkey"="255";"value"="9133"}; +{"key"="1278";"subkey"="255";"value"="9141"}; +{"key"="1279";"subkey"="255";"value"="9149"}; +{"key"="1280";"subkey"="256";"value"="9157"}; +{"key"="1281";"subkey"="256";"value"="9166"}; +{"key"="1282";"subkey"="256";"value"="9174"}; +{"key"="1283";"subkey"="256";"value"="9182"}; +{"key"="1284";"subkey"="256";"value"="9190"}; +{"key"="1285";"subkey"="257";"value"="9198"}; +{"key"="1286";"subkey"="257";"value"="9206"}; +{"key"="1287";"subkey"="257";"value"="9215"}; +{"key"="1288";"subkey"="257";"value"="9223"}; +{"key"="1289";"subkey"="257";"value"="9231"}; +{"key"="1290";"subkey"="258";"value"="9239"}; +{"key"="1291";"subkey"="258";"value"="9247"}; +{"key"="1292";"subkey"="258";"value"="9255"}; +{"key"="1293";"subkey"="258";"value"="9263"}; +{"key"="1294";"subkey"="258";"value"="9272"}; +{"key"="1295";"subkey"="259";"value"="9280"}; +{"key"="1296";"subkey"="259";"value"="9288"}; +{"key"="1297";"subkey"="259";"value"="9296"}; +{"key"="1298";"subkey"="259";"value"="9304"}; +{"key"="1299";"subkey"="259";"value"="9312"}; +{"key"="1300";"subkey"="260";"value"="9321"}; +{"key"="1301";"subkey"="260";"value"="9329"}; +{"key"="1302";"subkey"="260";"value"="9337"}; +{"key"="1303";"subkey"="260";"value"="9345"}; +{"key"="1304";"subkey"="260";"value"="9353"}; +{"key"="1305";"subkey"="261";"value"="9362"}; +{"key"="1306";"subkey"="261";"value"="9370"}; +{"key"="1307";"subkey"="261";"value"="9378"}; +{"key"="1308";"subkey"="261";"value"="9386"}; +{"key"="1309";"subkey"="261";"value"="9394"}; +{"key"="1310";"subkey"="262";"value"="9402"}; +{"key"="1311";"subkey"="262";"value"="9411"}; +{"key"="1312";"subkey"="262";"value"="9419"}; +{"key"="1313";"subkey"="262";"value"="9427"}; +{"key"="1314";"subkey"="262";"value"="9435"}; +{"key"="1315";"subkey"="263";"value"="9443"}; +{"key"="1316";"subkey"="263";"value"="9451"}; +{"key"="1317";"subkey"="263";"value"="9460"}; +{"key"="1318";"subkey"="263";"value"="9468"}; +{"key"="1319";"subkey"="263";"value"="9476"}; +{"key"="1320";"subkey"="264";"value"="9484"}; +{"key"="1321";"subkey"="264";"value"="9492"}; +{"key"="1322";"subkey"="264";"value"="9501"}; +{"key"="1323";"subkey"="264";"value"="9509"}; +{"key"="1324";"subkey"="264";"value"="9517"}; +{"key"="1325";"subkey"="265";"value"="9525"}; +{"key"="1326";"subkey"="265";"value"="9533"}; +{"key"="1327";"subkey"="265";"value"="9542"}; +{"key"="1328";"subkey"="265";"value"="9550"}; +{"key"="1329";"subkey"="265";"value"="9558"}; +{"key"="1330";"subkey"="266";"value"="9566"}; +{"key"="1331";"subkey"="266";"value"="9574"}; +{"key"="1332";"subkey"="266";"value"="9582"}; +{"key"="1333";"subkey"="266";"value"="9591"}; +{"key"="1334";"subkey"="266";"value"="9599"}; +{"key"="1335";"subkey"="267";"value"="9607"}; +{"key"="1336";"subkey"="267";"value"="9615"}; +{"key"="1337";"subkey"="267";"value"="9623"}; +{"key"="1338";"subkey"="267";"value"="9632"}; +{"key"="1339";"subkey"="267";"value"="9640"}; +{"key"="1340";"subkey"="268";"value"="9648"}; +{"key"="1341";"subkey"="268";"value"="9656"}; +{"key"="1342";"subkey"="268";"value"="9664"}; +{"key"="1343";"subkey"="268";"value"="9673"}; +{"key"="1344";"subkey"="268";"value"="9681"}; +{"key"="1345";"subkey"="269";"value"="9689"}; +{"key"="1346";"subkey"="269";"value"="9697"}; +{"key"="1347";"subkey"="269";"value"="9705"}; +{"key"="1348";"subkey"="269";"value"="9714"}; +{"key"="1349";"subkey"="269";"value"="9722"}; +{"key"="1350";"subkey"="270";"value"="9730"}; +{"key"="1351";"subkey"="270";"value"="9738"}; +{"key"="1352";"subkey"="270";"value"="9747"}; +{"key"="1353";"subkey"="270";"value"="9755"}; +{"key"="1354";"subkey"="270";"value"="9763"}; +{"key"="1355";"subkey"="271";"value"="9771"}; +{"key"="1356";"subkey"="271";"value"="9779"}; +{"key"="1357";"subkey"="271";"value"="9788"}; +{"key"="1358";"subkey"="271";"value"="9796"}; +{"key"="1359";"subkey"="271";"value"="9804"}; +{"key"="1360";"subkey"="272";"value"="9812"}; +{"key"="1361";"subkey"="272";"value"="9820"}; +{"key"="1362";"subkey"="272";"value"="9829"}; +{"key"="1363";"subkey"="272";"value"="9837"}; +{"key"="1364";"subkey"="272";"value"="9845"}; +{"key"="1365";"subkey"="273";"value"="9853"}; +{"key"="1366";"subkey"="273";"value"="9862"}; +{"key"="1367";"subkey"="273";"value"="9870"}; +{"key"="1368";"subkey"="273";"value"="9878"}; +{"key"="1369";"subkey"="273";"value"="9886"}; +{"key"="1370";"subkey"="274";"value"="9894"}; +{"key"="1371";"subkey"="274";"value"="9903"}; +{"key"="1372";"subkey"="274";"value"="9911"}; +{"key"="1373";"subkey"="274";"value"="9919"}; +{"key"="1374";"subkey"="274";"value"="9927"}; +{"key"="1375";"subkey"="275";"value"="9936"}; +{"key"="1376";"subkey"="275";"value"="9944"}; +{"key"="1377";"subkey"="275";"value"="9952"}; +{"key"="1378";"subkey"="275";"value"="9960"}; +{"key"="1379";"subkey"="275";"value"="9968"}; +{"key"="1380";"subkey"="276";"value"="9977"}; +{"key"="1381";"subkey"="276";"value"="9985"}; +{"key"="1382";"subkey"="276";"value"="9993"}; +{"key"="1383";"subkey"="276";"value"="10001"}; +{"key"="1384";"subkey"="276";"value"="10010"}; +{"key"="1385";"subkey"="277";"value"="10018"}; +{"key"="1386";"subkey"="277";"value"="10026"}; +{"key"="1387";"subkey"="277";"value"="10034"}; +{"key"="1388";"subkey"="277";"value"="10043"}; +{"key"="1389";"subkey"="277";"value"="10051"}; +{"key"="1390";"subkey"="278";"value"="10059"}; +{"key"="1391";"subkey"="278";"value"="10067"}; +{"key"="1392";"subkey"="278";"value"="10075"}; +{"key"="1393";"subkey"="278";"value"="10084"}; +{"key"="1394";"subkey"="278";"value"="10092"}; +{"key"="1395";"subkey"="279";"value"="10100"}; +{"key"="1396";"subkey"="279";"value"="10108"}; +{"key"="1397";"subkey"="279";"value"="10117"}; +{"key"="1398";"subkey"="279";"value"="10125"}; +{"key"="1399";"subkey"="279";"value"="10133"}; +{"key"="1400";"subkey"="280";"value"="10141"}; +{"key"="1401";"subkey"="280";"value"="10150"}; +{"key"="1402";"subkey"="280";"value"="10158"}; +{"key"="1403";"subkey"="280";"value"="10166"}; +{"key"="1404";"subkey"="280";"value"="10174"}; +{"key"="1405";"subkey"="281";"value"="10183"}; +{"key"="1406";"subkey"="281";"value"="10191"}; +{"key"="1407";"subkey"="281";"value"="10199"}; +{"key"="1408";"subkey"="281";"value"="10207"}; +{"key"="1409";"subkey"="281";"value"="10216"}; +{"key"="1410";"subkey"="282";"value"="10224"}; +{"key"="1411";"subkey"="282";"value"="10232"}; +{"key"="1412";"subkey"="282";"value"="10240"}; +{"key"="1413";"subkey"="282";"value"="10249"}; +{"key"="1414";"subkey"="282";"value"="10257"}; +{"key"="1415";"subkey"="283";"value"="10265"}; +{"key"="1416";"subkey"="283";"value"="10273"}; +{"key"="1417";"subkey"="283";"value"="10282"}; +{"key"="1418";"subkey"="283";"value"="10290"}; +{"key"="1419";"subkey"="283";"value"="10298"}; +{"key"="1420";"subkey"="284";"value"="10306"}; +{"key"="1421";"subkey"="284";"value"="10315"}; +{"key"="1422";"subkey"="284";"value"="10323"}; +{"key"="1423";"subkey"="284";"value"="10331"}; +{"key"="1424";"subkey"="284";"value"="10339"}; +{"key"="1425";"subkey"="285";"value"="10348"}; +{"key"="1426";"subkey"="285";"value"="10356"}; +{"key"="1427";"subkey"="285";"value"="10364"}; +{"key"="1428";"subkey"="285";"value"="10373"}; +{"key"="1429";"subkey"="285";"value"="10381"}; +{"key"="1430";"subkey"="286";"value"="10389"}; +{"key"="1431";"subkey"="286";"value"="10397"}; +{"key"="1432";"subkey"="286";"value"="10406"}; +{"key"="1433";"subkey"="286";"value"="10414"}; +{"key"="1434";"subkey"="286";"value"="10422"}; +{"key"="1435";"subkey"="287";"value"="10430"}; +{"key"="1436";"subkey"="287";"value"="10439"}; +{"key"="1437";"subkey"="287";"value"="10447"}; +{"key"="1438";"subkey"="287";"value"="10455"}; +{"key"="1439";"subkey"="287";"value"="10463"}; +{"key"="1440";"subkey"="288";"value"="10472"}; +{"key"="1441";"subkey"="288";"value"="10480"}; +{"key"="1442";"subkey"="288";"value"="10488"}; +{"key"="1443";"subkey"="288";"value"="10497"}; +{"key"="1444";"subkey"="288";"value"="10505"}; +{"key"="1445";"subkey"="289";"value"="10513"}; +{"key"="1446";"subkey"="289";"value"="10521"}; +{"key"="1447";"subkey"="289";"value"="10530"}; +{"key"="1448";"subkey"="289";"value"="10538"}; +{"key"="1449";"subkey"="289";"value"="10546"}; +{"key"="1450";"subkey"="290";"value"="10555"}; +{"key"="1451";"subkey"="290";"value"="10563"}; +{"key"="1452";"subkey"="290";"value"="10571"}; +{"key"="1453";"subkey"="290";"value"="10579"}; +{"key"="1454";"subkey"="290";"value"="10588"}; +{"key"="1455";"subkey"="291";"value"="10596"}; +{"key"="1456";"subkey"="291";"value"="10604"}; +{"key"="1457";"subkey"="291";"value"="10612"}; +{"key"="1458";"subkey"="291";"value"="10621"}; +{"key"="1459";"subkey"="291";"value"="10629"}; +{"key"="1460";"subkey"="292";"value"="10637"}; +{"key"="1461";"subkey"="292";"value"="10646"}; +{"key"="1462";"subkey"="292";"value"="10654"}; +{"key"="1463";"subkey"="292";"value"="10662"}; +{"key"="1464";"subkey"="292";"value"="10670"}; +{"key"="1465";"subkey"="293";"value"="10679"}; +{"key"="1466";"subkey"="293";"value"="10687"}; +{"key"="1467";"subkey"="293";"value"="10695"}; +{"key"="1468";"subkey"="293";"value"="10704"}; +{"key"="1469";"subkey"="293";"value"="10712"}; +{"key"="1470";"subkey"="294";"value"="10720"}; +{"key"="1471";"subkey"="294";"value"="10729"}; +{"key"="1472";"subkey"="294";"value"="10737"}; +{"key"="1473";"subkey"="294";"value"="10745"}; +{"key"="1474";"subkey"="294";"value"="10753"}; +{"key"="1475";"subkey"="295";"value"="10762"}; +{"key"="1476";"subkey"="295";"value"="10770"}; +{"key"="1477";"subkey"="295";"value"="10778"}; +{"key"="1478";"subkey"="295";"value"="10787"}; +{"key"="1479";"subkey"="295";"value"="10795"}; +{"key"="1480";"subkey"="296";"value"="10803"}; +{"key"="1481";"subkey"="296";"value"="10812"}; +{"key"="1482";"subkey"="296";"value"="10820"}; +{"key"="1483";"subkey"="296";"value"="10828"}; +{"key"="1484";"subkey"="296";"value"="10836"}; +{"key"="1485";"subkey"="297";"value"="10845"}; +{"key"="1486";"subkey"="297";"value"="10853"}; +{"key"="1487";"subkey"="297";"value"="10861"}; +{"key"="1488";"subkey"="297";"value"="10870"}; +{"key"="1489";"subkey"="297";"value"="10878"}; +{"key"="1490";"subkey"="298";"value"="10886"}; +{"key"="1491";"subkey"="298";"value"="10895"}; +{"key"="1492";"subkey"="298";"value"="10903"}; +{"key"="1493";"subkey"="298";"value"="10911"}; +{"key"="1494";"subkey"="298";"value"="10919"}; +{"key"="1495";"subkey"="299";"value"="10928"}; +{"key"="1496";"subkey"="299";"value"="10936"}; +{"key"="1497";"subkey"="299";"value"="10944"}; +{"key"="1498";"subkey"="299";"value"="10953"}; +{"key"="1499";"subkey"="299";"value"="10961"}; +{"key"="1500";"subkey"="300";"value"="10969"}; +{"key"="1501";"subkey"="300";"value"="10978"}; +{"key"="1502";"subkey"="300";"value"="10986"}; +{"key"="1503";"subkey"="300";"value"="10994"}; +{"key"="1504";"subkey"="300";"value"="11003"}; +{"key"="1505";"subkey"="301";"value"="11011"}; +{"key"="1506";"subkey"="301";"value"="11019"}; +{"key"="1507";"subkey"="301";"value"="11028"}; +{"key"="1508";"subkey"="301";"value"="11036"}; +{"key"="1509";"subkey"="301";"value"="11044"}; +{"key"="1510";"subkey"="302";"value"="11052"}; +{"key"="1511";"subkey"="302";"value"="11061"}; +{"key"="1512";"subkey"="302";"value"="11069"}; +{"key"="1513";"subkey"="302";"value"="11077"}; +{"key"="1514";"subkey"="302";"value"="11086"}; +{"key"="1515";"subkey"="303";"value"="11094"}; +{"key"="1516";"subkey"="303";"value"="11102"}; +{"key"="1517";"subkey"="303";"value"="11111"}; +{"key"="1518";"subkey"="303";"value"="11119"}; +{"key"="1519";"subkey"="303";"value"="11127"}; +{"key"="1520";"subkey"="304";"value"="11136"}; +{"key"="1521";"subkey"="304";"value"="11144"}; +{"key"="1522";"subkey"="304";"value"="11152"}; +{"key"="1523";"subkey"="304";"value"="11161"}; +{"key"="1524";"subkey"="304";"value"="11169"}; +{"key"="1525";"subkey"="305";"value"="11177"}; +{"key"="1526";"subkey"="305";"value"="11186"}; +{"key"="1527";"subkey"="305";"value"="11194"}; +{"key"="1528";"subkey"="305";"value"="11202"}; +{"key"="1529";"subkey"="305";"value"="11211"}; +{"key"="1530";"subkey"="306";"value"="11219"}; +{"key"="1531";"subkey"="306";"value"="11227"}; +{"key"="1532";"subkey"="306";"value"="11236"}; +{"key"="1533";"subkey"="306";"value"="11244"}; +{"key"="1534";"subkey"="306";"value"="11252"}; +{"key"="1535";"subkey"="307";"value"="11261"}; +{"key"="1536";"subkey"="307";"value"="11269"}; +{"key"="1537";"subkey"="307";"value"="11277"}; +{"key"="1538";"subkey"="307";"value"="11286"}; +{"key"="1539";"subkey"="307";"value"="11294"}; +{"key"="1540";"subkey"="308";"value"="11302"}; +{"key"="1541";"subkey"="308";"value"="11311"}; +{"key"="1542";"subkey"="308";"value"="11319"}; +{"key"="1543";"subkey"="308";"value"="11327"}; +{"key"="1544";"subkey"="308";"value"="11336"}; +{"key"="1545";"subkey"="309";"value"="11344"}; +{"key"="1546";"subkey"="309";"value"="11352"}; +{"key"="1547";"subkey"="309";"value"="11361"}; +{"key"="1548";"subkey"="309";"value"="11369"}; +{"key"="1549";"subkey"="309";"value"="11377"}; +{"key"="1550";"subkey"="310";"value"="11386"}; +{"key"="1551";"subkey"="310";"value"="11394"}; +{"key"="1552";"subkey"="310";"value"="11403"}; +{"key"="1553";"subkey"="310";"value"="11411"}; +{"key"="1554";"subkey"="310";"value"="11419"}; +{"key"="1555";"subkey"="311";"value"="11428"}; +{"key"="1556";"subkey"="311";"value"="11436"}; +{"key"="1557";"subkey"="311";"value"="11444"}; +{"key"="1558";"subkey"="311";"value"="11453"}; +{"key"="1559";"subkey"="311";"value"="11461"}; +{"key"="1560";"subkey"="312";"value"="11469"}; +{"key"="1561";"subkey"="312";"value"="11478"}; +{"key"="1562";"subkey"="312";"value"="11486"}; +{"key"="1563";"subkey"="312";"value"="11494"}; +{"key"="1564";"subkey"="312";"value"="11503"}; +{"key"="1565";"subkey"="313";"value"="11511"}; +{"key"="1566";"subkey"="313";"value"="11519"}; +{"key"="1567";"subkey"="313";"value"="11528"}; +{"key"="1568";"subkey"="313";"value"="11536"}; +{"key"="1569";"subkey"="313";"value"="11545"}; +{"key"="1570";"subkey"="314";"value"="11553"}; +{"key"="1571";"subkey"="314";"value"="11561"}; +{"key"="1572";"subkey"="314";"value"="11570"}; +{"key"="1573";"subkey"="314";"value"="11578"}; +{"key"="1574";"subkey"="314";"value"="11586"}; +{"key"="1575";"subkey"="315";"value"="11595"}; +{"key"="1576";"subkey"="315";"value"="11603"}; +{"key"="1577";"subkey"="315";"value"="11611"}; +{"key"="1578";"subkey"="315";"value"="11620"}; +{"key"="1579";"subkey"="315";"value"="11628"}; +{"key"="1580";"subkey"="316";"value"="11636"}; +{"key"="1581";"subkey"="316";"value"="11645"}; +{"key"="1582";"subkey"="316";"value"="11653"}; +{"key"="1583";"subkey"="316";"value"="11662"}; +{"key"="1584";"subkey"="316";"value"="11670"}; +{"key"="1585";"subkey"="317";"value"="11678"}; +{"key"="1586";"subkey"="317";"value"="11687"}; +{"key"="1587";"subkey"="317";"value"="11695"}; +{"key"="1588";"subkey"="317";"value"="11703"}; +{"key"="1589";"subkey"="317";"value"="11712"}; +{"key"="1590";"subkey"="318";"value"="11720"}; +{"key"="1591";"subkey"="318";"value"="11729"}; +{"key"="1592";"subkey"="318";"value"="11737"}; +{"key"="1593";"subkey"="318";"value"="11745"}; +{"key"="1594";"subkey"="318";"value"="11754"}; +{"key"="1595";"subkey"="319";"value"="11762"}; +{"key"="1596";"subkey"="319";"value"="11770"}; +{"key"="1597";"subkey"="319";"value"="11779"}; +{"key"="1598";"subkey"="319";"value"="11787"}; +{"key"="1599";"subkey"="319";"value"="11796"}; +{"key"="1600";"subkey"="320";"value"="11804"}; +{"key"="1601";"subkey"="320";"value"="11812"}; +{"key"="1602";"subkey"="320";"value"="11821"}; +{"key"="1603";"subkey"="320";"value"="11829"}; +{"key"="1604";"subkey"="320";"value"="11837"}; +{"key"="1605";"subkey"="321";"value"="11846"}; +{"key"="1606";"subkey"="321";"value"="11854"}; +{"key"="1607";"subkey"="321";"value"="11863"}; +{"key"="1608";"subkey"="321";"value"="11871"}; +{"key"="1609";"subkey"="321";"value"="11879"}; +{"key"="1610";"subkey"="322";"value"="11888"}; +{"key"="1611";"subkey"="322";"value"="11896"}; +{"key"="1612";"subkey"="322";"value"="11904"}; +{"key"="1613";"subkey"="322";"value"="11913"}; +{"key"="1614";"subkey"="322";"value"="11921"}; +{"key"="1615";"subkey"="323";"value"="11930"}; +{"key"="1616";"subkey"="323";"value"="11938"}; +{"key"="1617";"subkey"="323";"value"="11946"}; +{"key"="1618";"subkey"="323";"value"="11955"}; +{"key"="1619";"subkey"="323";"value"="11963"}; +{"key"="1620";"subkey"="324";"value"="11972"}; +{"key"="1621";"subkey"="324";"value"="11980"}; +{"key"="1622";"subkey"="324";"value"="11988"}; +{"key"="1623";"subkey"="324";"value"="11997"}; +{"key"="1624";"subkey"="324";"value"="12005"}; +{"key"="1625";"subkey"="325";"value"="12014"}; +{"key"="1626";"subkey"="325";"value"="12022"}; +{"key"="1627";"subkey"="325";"value"="12030"}; +{"key"="1628";"subkey"="325";"value"="12039"}; +{"key"="1629";"subkey"="325";"value"="12047"}; +{"key"="1630";"subkey"="326";"value"="12056"}; +{"key"="1631";"subkey"="326";"value"="12064"}; +{"key"="1632";"subkey"="326";"value"="12072"}; +{"key"="1633";"subkey"="326";"value"="12081"}; +{"key"="1634";"subkey"="326";"value"="12089"}; +{"key"="1635";"subkey"="327";"value"="12098"}; +{"key"="1636";"subkey"="327";"value"="12106"}; +{"key"="1637";"subkey"="327";"value"="12114"}; +{"key"="1638";"subkey"="327";"value"="12123"}; +{"key"="1639";"subkey"="327";"value"="12131"}; +{"key"="1640";"subkey"="328";"value"="12140"}; +{"key"="1641";"subkey"="328";"value"="12148"}; +{"key"="1642";"subkey"="328";"value"="12156"}; +{"key"="1643";"subkey"="328";"value"="12165"}; +{"key"="1644";"subkey"="328";"value"="12173"}; +{"key"="1645";"subkey"="329";"value"="12182"}; +{"key"="1646";"subkey"="329";"value"="12190"}; +{"key"="1647";"subkey"="329";"value"="12198"}; +{"key"="1648";"subkey"="329";"value"="12207"}; +{"key"="1649";"subkey"="329";"value"="12215"}; +{"key"="1650";"subkey"="330";"value"="12224"}; +{"key"="1651";"subkey"="330";"value"="12232"}; +{"key"="1652";"subkey"="330";"value"="12240"}; +{"key"="1653";"subkey"="330";"value"="12249"}; +{"key"="1654";"subkey"="330";"value"="12257"}; +{"key"="1655";"subkey"="331";"value"="12266"}; +{"key"="1656";"subkey"="331";"value"="12274"}; +{"key"="1657";"subkey"="331";"value"="12282"}; +{"key"="1658";"subkey"="331";"value"="12291"}; +{"key"="1659";"subkey"="331";"value"="12299"}; +{"key"="1660";"subkey"="332";"value"="12308"}; +{"key"="1661";"subkey"="332";"value"="12316"}; +{"key"="1662";"subkey"="332";"value"="12325"}; +{"key"="1663";"subkey"="332";"value"="12333"}; +{"key"="1664";"subkey"="332";"value"="12341"}; +{"key"="1665";"subkey"="333";"value"="12350"}; +{"key"="1666";"subkey"="333";"value"="12358"}; +{"key"="1667";"subkey"="333";"value"="12367"}; +{"key"="1668";"subkey"="333";"value"="12375"}; +{"key"="1669";"subkey"="333";"value"="12383"}; +{"key"="1670";"subkey"="334";"value"="12392"}; +{"key"="1671";"subkey"="334";"value"="12400"}; +{"key"="1672";"subkey"="334";"value"="12409"}; +{"key"="1673";"subkey"="334";"value"="12417"}; +{"key"="1674";"subkey"="334";"value"="12426"}; +{"key"="1675";"subkey"="335";"value"="12434"}; +{"key"="1676";"subkey"="335";"value"="12442"}; +{"key"="1677";"subkey"="335";"value"="12451"}; +{"key"="1678";"subkey"="335";"value"="12459"}; +{"key"="1679";"subkey"="335";"value"="12468"}; +{"key"="1680";"subkey"="336";"value"="12476"}; +{"key"="1681";"subkey"="336";"value"="12485"}; +{"key"="1682";"subkey"="336";"value"="12493"}; +{"key"="1683";"subkey"="336";"value"="12501"}; +{"key"="1684";"subkey"="336";"value"="12510"}; +{"key"="1685";"subkey"="337";"value"="12518"}; +{"key"="1686";"subkey"="337";"value"="12527"}; +{"key"="1687";"subkey"="337";"value"="12535"}; +{"key"="1688";"subkey"="337";"value"="12544"}; +{"key"="1689";"subkey"="337";"value"="12552"}; +{"key"="1690";"subkey"="338";"value"="12560"}; +{"key"="1691";"subkey"="338";"value"="12569"}; +{"key"="1692";"subkey"="338";"value"="12577"}; +{"key"="1693";"subkey"="338";"value"="12586"}; +{"key"="1694";"subkey"="338";"value"="12594"}; +{"key"="1695";"subkey"="339";"value"="12603"}; +{"key"="1696";"subkey"="339";"value"="12611"}; +{"key"="1697";"subkey"="339";"value"="12619"}; +{"key"="1698";"subkey"="339";"value"="12628"}; +{"key"="1699";"subkey"="339";"value"="12636"}; +{"key"="1700";"subkey"="340";"value"="12645"}; +{"key"="1701";"subkey"="340";"value"="12653"}; +{"key"="1702";"subkey"="340";"value"="12662"}; +{"key"="1703";"subkey"="340";"value"="12670"}; +{"key"="1704";"subkey"="340";"value"="12679"}; +{"key"="1705";"subkey"="341";"value"="12687"}; +{"key"="1706";"subkey"="341";"value"="12695"}; +{"key"="1707";"subkey"="341";"value"="12704"}; +{"key"="1708";"subkey"="341";"value"="12712"}; +{"key"="1709";"subkey"="341";"value"="12721"}; +{"key"="1710";"subkey"="342";"value"="12729"}; +{"key"="1711";"subkey"="342";"value"="12738"}; +{"key"="1712";"subkey"="342";"value"="12746"}; +{"key"="1713";"subkey"="342";"value"="12755"}; +{"key"="1714";"subkey"="342";"value"="12763"}; +{"key"="1715";"subkey"="343";"value"="12771"}; +{"key"="1716";"subkey"="343";"value"="12780"}; +{"key"="1717";"subkey"="343";"value"="12788"}; +{"key"="1718";"subkey"="343";"value"="12797"}; +{"key"="1719";"subkey"="343";"value"="12805"}; +{"key"="1720";"subkey"="344";"value"="12814"}; +{"key"="1721";"subkey"="344";"value"="12822"}; +{"key"="1722";"subkey"="344";"value"="12831"}; +{"key"="1723";"subkey"="344";"value"="12839"}; +{"key"="1724";"subkey"="344";"value"="12847"}; +{"key"="1725";"subkey"="345";"value"="12856"}; +{"key"="1726";"subkey"="345";"value"="12864"}; +{"key"="1727";"subkey"="345";"value"="12873"}; +{"key"="1728";"subkey"="345";"value"="12881"}; +{"key"="1729";"subkey"="345";"value"="12890"}; +{"key"="1730";"subkey"="346";"value"="12898"}; +{"key"="1731";"subkey"="346";"value"="12907"}; +{"key"="1732";"subkey"="346";"value"="12915"}; +{"key"="1733";"subkey"="346";"value"="12924"}; +{"key"="1734";"subkey"="346";"value"="12932"}; +{"key"="1735";"subkey"="347";"value"="12940"}; +{"key"="1736";"subkey"="347";"value"="12949"}; +{"key"="1737";"subkey"="347";"value"="12957"}; +{"key"="1738";"subkey"="347";"value"="12966"}; +{"key"="1739";"subkey"="347";"value"="12974"}; +{"key"="1740";"subkey"="348";"value"="12983"}; +{"key"="1741";"subkey"="348";"value"="12991"}; +{"key"="1742";"subkey"="348";"value"="13000"}; +{"key"="1743";"subkey"="348";"value"="13008"}; +{"key"="1744";"subkey"="348";"value"="13017"}; +{"key"="1745";"subkey"="349";"value"="13025"}; +{"key"="1746";"subkey"="349";"value"="13034"}; +{"key"="1747";"subkey"="349";"value"="13042"}; +{"key"="1748";"subkey"="349";"value"="13050"}; +{"key"="1749";"subkey"="349";"value"="13059"}; +{"key"="1750";"subkey"="350";"value"="13067"}; +{"key"="1751";"subkey"="350";"value"="13076"}; +{"key"="1752";"subkey"="350";"value"="13084"}; +{"key"="1753";"subkey"="350";"value"="13093"}; +{"key"="1754";"subkey"="350";"value"="13101"}; +{"key"="1755";"subkey"="351";"value"="13110"}; +{"key"="1756";"subkey"="351";"value"="13118"}; +{"key"="1757";"subkey"="351";"value"="13127"}; +{"key"="1758";"subkey"="351";"value"="13135"}; +{"key"="1759";"subkey"="351";"value"="13144"}; +{"key"="1760";"subkey"="352";"value"="13152"}; +{"key"="1761";"subkey"="352";"value"="13161"}; +{"key"="1762";"subkey"="352";"value"="13169"}; +{"key"="1763";"subkey"="352";"value"="13178"}; +{"key"="1764";"subkey"="352";"value"="13186"}; +{"key"="1765";"subkey"="353";"value"="13194"}; +{"key"="1766";"subkey"="353";"value"="13203"}; +{"key"="1767";"subkey"="353";"value"="13211"}; +{"key"="1768";"subkey"="353";"value"="13220"}; +{"key"="1769";"subkey"="353";"value"="13228"}; +{"key"="1770";"subkey"="354";"value"="13237"}; +{"key"="1771";"subkey"="354";"value"="13245"}; +{"key"="1772";"subkey"="354";"value"="13254"}; +{"key"="1773";"subkey"="354";"value"="13262"}; +{"key"="1774";"subkey"="354";"value"="13271"}; +{"key"="1775";"subkey"="355";"value"="13279"}; +{"key"="1776";"subkey"="355";"value"="13288"}; +{"key"="1777";"subkey"="355";"value"="13296"}; +{"key"="1778";"subkey"="355";"value"="13305"}; +{"key"="1779";"subkey"="355";"value"="13313"}; +{"key"="1780";"subkey"="356";"value"="13322"}; +{"key"="1781";"subkey"="356";"value"="13330"}; +{"key"="1782";"subkey"="356";"value"="13339"}; +{"key"="1783";"subkey"="356";"value"="13347"}; +{"key"="1784";"subkey"="356";"value"="13356"}; +{"key"="1785";"subkey"="357";"value"="13364"}; +{"key"="1786";"subkey"="357";"value"="13373"}; +{"key"="1787";"subkey"="357";"value"="13381"}; +{"key"="1788";"subkey"="357";"value"="13390"}; +{"key"="1789";"subkey"="357";"value"="13398"}; +{"key"="1790";"subkey"="358";"value"="13407"}; +{"key"="1791";"subkey"="358";"value"="13415"}; +{"key"="1792";"subkey"="358";"value"="13424"}; +{"key"="1793";"subkey"="358";"value"="13432"}; +{"key"="1794";"subkey"="358";"value"="13441"}; +{"key"="1795";"subkey"="359";"value"="13449"}; +{"key"="1796";"subkey"="359";"value"="13457"}; +{"key"="1797";"subkey"="359";"value"="13466"}; +{"key"="1798";"subkey"="359";"value"="13474"}; +{"key"="1799";"subkey"="359";"value"="13483"}; +{"key"="1800";"subkey"="360";"value"="13491"}; +{"key"="1801";"subkey"="360";"value"="13500"}; +{"key"="1802";"subkey"="360";"value"="13508"}; +{"key"="1803";"subkey"="360";"value"="13517"}; +{"key"="1804";"subkey"="360";"value"="13525"}; +{"key"="1805";"subkey"="361";"value"="13534"}; +{"key"="1806";"subkey"="361";"value"="13542"}; +{"key"="1807";"subkey"="361";"value"="13551"}; +{"key"="1808";"subkey"="361";"value"="13559"}; +{"key"="1809";"subkey"="361";"value"="13568"}; +{"key"="1810";"subkey"="362";"value"="13576"}; +{"key"="1811";"subkey"="362";"value"="13585"}; +{"key"="1812";"subkey"="362";"value"="13593"}; +{"key"="1813";"subkey"="362";"value"="13602"}; +{"key"="1814";"subkey"="362";"value"="13610"}; +{"key"="1815";"subkey"="363";"value"="13619"}; +{"key"="1816";"subkey"="363";"value"="13627"}; +{"key"="1817";"subkey"="363";"value"="13636"}; +{"key"="1818";"subkey"="363";"value"="13644"}; +{"key"="1819";"subkey"="363";"value"="13653"}; +{"key"="1820";"subkey"="364";"value"="13661"}; +{"key"="1821";"subkey"="364";"value"="13670"}; +{"key"="1822";"subkey"="364";"value"="13679"}; +{"key"="1823";"subkey"="364";"value"="13687"}; +{"key"="1824";"subkey"="364";"value"="13696"}; +{"key"="1825";"subkey"="365";"value"="13704"}; +{"key"="1826";"subkey"="365";"value"="13713"}; +{"key"="1827";"subkey"="365";"value"="13721"}; +{"key"="1828";"subkey"="365";"value"="13730"}; +{"key"="1829";"subkey"="365";"value"="13738"}; +{"key"="1830";"subkey"="366";"value"="13747"}; +{"key"="1831";"subkey"="366";"value"="13755"}; +{"key"="1832";"subkey"="366";"value"="13764"}; +{"key"="1833";"subkey"="366";"value"="13772"}; +{"key"="1834";"subkey"="366";"value"="13781"}; +{"key"="1835";"subkey"="367";"value"="13789"}; +{"key"="1836";"subkey"="367";"value"="13798"}; +{"key"="1837";"subkey"="367";"value"="13806"}; +{"key"="1838";"subkey"="367";"value"="13815"}; +{"key"="1839";"subkey"="367";"value"="13823"}; +{"key"="1840";"subkey"="368";"value"="13832"}; +{"key"="1841";"subkey"="368";"value"="13840"}; +{"key"="1842";"subkey"="368";"value"="13849"}; +{"key"="1843";"subkey"="368";"value"="13857"}; +{"key"="1844";"subkey"="368";"value"="13866"}; +{"key"="1845";"subkey"="369";"value"="13874"}; +{"key"="1846";"subkey"="369";"value"="13883"}; +{"key"="1847";"subkey"="369";"value"="13891"}; +{"key"="1848";"subkey"="369";"value"="13900"}; +{"key"="1849";"subkey"="369";"value"="13908"}; +{"key"="1850";"subkey"="370";"value"="13917"}; +{"key"="1851";"subkey"="370";"value"="13925"}; +{"key"="1852";"subkey"="370";"value"="13934"}; +{"key"="1853";"subkey"="370";"value"="13943"}; +{"key"="1854";"subkey"="370";"value"="13951"}; +{"key"="1855";"subkey"="371";"value"="13960"}; +{"key"="1856";"subkey"="371";"value"="13968"}; +{"key"="1857";"subkey"="371";"value"="13977"}; +{"key"="1858";"subkey"="371";"value"="13985"}; +{"key"="1859";"subkey"="371";"value"="13994"}; +{"key"="1860";"subkey"="372";"value"="14002"}; +{"key"="1861";"subkey"="372";"value"="14011"}; +{"key"="1862";"subkey"="372";"value"="14019"}; +{"key"="1863";"subkey"="372";"value"="14028"}; +{"key"="1864";"subkey"="372";"value"="14036"}; +{"key"="1865";"subkey"="373";"value"="14045"}; +{"key"="1866";"subkey"="373";"value"="14053"}; +{"key"="1867";"subkey"="373";"value"="14062"}; +{"key"="1868";"subkey"="373";"value"="14070"}; +{"key"="1869";"subkey"="373";"value"="14079"}; +{"key"="1870";"subkey"="374";"value"="14088"}; +{"key"="1871";"subkey"="374";"value"="14096"}; +{"key"="1872";"subkey"="374";"value"="14105"}; +{"key"="1873";"subkey"="374";"value"="14113"}; +{"key"="1874";"subkey"="374";"value"="14122"}; +{"key"="1875";"subkey"="375";"value"="14130"}; +{"key"="1876";"subkey"="375";"value"="14139"}; +{"key"="1877";"subkey"="375";"value"="14147"}; +{"key"="1878";"subkey"="375";"value"="14156"}; +{"key"="1879";"subkey"="375";"value"="14164"}; +{"key"="1880";"subkey"="376";"value"="14173"}; +{"key"="1881";"subkey"="376";"value"="14181"}; +{"key"="1882";"subkey"="376";"value"="14190"}; +{"key"="1883";"subkey"="376";"value"="14198"}; +{"key"="1884";"subkey"="376";"value"="14207"}; +{"key"="1885";"subkey"="377";"value"="14216"}; +{"key"="1886";"subkey"="377";"value"="14224"}; +{"key"="1887";"subkey"="377";"value"="14233"}; +{"key"="1888";"subkey"="377";"value"="14241"}; +{"key"="1889";"subkey"="377";"value"="14250"}; +{"key"="1890";"subkey"="378";"value"="14258"}; +{"key"="1891";"subkey"="378";"value"="14267"}; +{"key"="1892";"subkey"="378";"value"="14275"}; +{"key"="1893";"subkey"="378";"value"="14284"}; +{"key"="1894";"subkey"="378";"value"="14292"}; +{"key"="1895";"subkey"="379";"value"="14301"}; +{"key"="1896";"subkey"="379";"value"="14310"}; +{"key"="1897";"subkey"="379";"value"="14318"}; +{"key"="1898";"subkey"="379";"value"="14327"}; +{"key"="1899";"subkey"="379";"value"="14335"}; +{"key"="1900";"subkey"="380";"value"="14344"}; +{"key"="1901";"subkey"="380";"value"="14352"}; +{"key"="1902";"subkey"="380";"value"="14361"}; +{"key"="1903";"subkey"="380";"value"="14369"}; +{"key"="1904";"subkey"="380";"value"="14378"}; +{"key"="1905";"subkey"="381";"value"="14387"}; +{"key"="1906";"subkey"="381";"value"="14395"}; +{"key"="1907";"subkey"="381";"value"="14404"}; +{"key"="1908";"subkey"="381";"value"="14412"}; +{"key"="1909";"subkey"="381";"value"="14421"}; +{"key"="1910";"subkey"="382";"value"="14429"}; +{"key"="1911";"subkey"="382";"value"="14438"}; +{"key"="1912";"subkey"="382";"value"="14446"}; +{"key"="1913";"subkey"="382";"value"="14455"}; +{"key"="1914";"subkey"="382";"value"="14464"}; +{"key"="1915";"subkey"="383";"value"="14472"}; +{"key"="1916";"subkey"="383";"value"="14481"}; +{"key"="1917";"subkey"="383";"value"="14489"}; +{"key"="1918";"subkey"="383";"value"="14498"}; +{"key"="1919";"subkey"="383";"value"="14506"}; +{"key"="1920";"subkey"="384";"value"="14515"}; +{"key"="1921";"subkey"="384";"value"="14523"}; +{"key"="1922";"subkey"="384";"value"="14532"}; +{"key"="1923";"subkey"="384";"value"="14541"}; +{"key"="1924";"subkey"="384";"value"="14549"}; +{"key"="1925";"subkey"="385";"value"="14558"}; +{"key"="1926";"subkey"="385";"value"="14566"}; +{"key"="1927";"subkey"="385";"value"="14575"}; +{"key"="1928";"subkey"="385";"value"="14583"}; +{"key"="1929";"subkey"="385";"value"="14592"}; +{"key"="1930";"subkey"="386";"value"="14600"}; +{"key"="1931";"subkey"="386";"value"="14609"}; +{"key"="1932";"subkey"="386";"value"="14618"}; +{"key"="1933";"subkey"="386";"value"="14626"}; +{"key"="1934";"subkey"="386";"value"="14635"}; +{"key"="1935";"subkey"="387";"value"="14643"}; +{"key"="1936";"subkey"="387";"value"="14652"}; +{"key"="1937";"subkey"="387";"value"="14660"}; +{"key"="1938";"subkey"="387";"value"="14669"}; +{"key"="1939";"subkey"="387";"value"="14678"}; +{"key"="1940";"subkey"="388";"value"="14686"}; +{"key"="1941";"subkey"="388";"value"="14695"}; +{"key"="1942";"subkey"="388";"value"="14703"}; +{"key"="1943";"subkey"="388";"value"="14712"}; +{"key"="1944";"subkey"="388";"value"="14720"}; +{"key"="1945";"subkey"="389";"value"="14729"}; +{"key"="1946";"subkey"="389";"value"="14738"}; +{"key"="1947";"subkey"="389";"value"="14746"}; +{"key"="1948";"subkey"="389";"value"="14755"}; +{"key"="1949";"subkey"="389";"value"="14763"}; +{"key"="1950";"subkey"="390";"value"="14772"}; +{"key"="1951";"subkey"="390";"value"="14780"}; +{"key"="1952";"subkey"="390";"value"="14789"}; +{"key"="1953";"subkey"="390";"value"="14798"}; +{"key"="1954";"subkey"="390";"value"="14806"}; +{"key"="1955";"subkey"="391";"value"="14815"}; +{"key"="1956";"subkey"="391";"value"="14823"}; +{"key"="1957";"subkey"="391";"value"="14832"}; +{"key"="1958";"subkey"="391";"value"="14841"}; +{"key"="1959";"subkey"="391";"value"="14849"}; +{"key"="1960";"subkey"="392";"value"="14858"}; +{"key"="1961";"subkey"="392";"value"="14866"}; +{"key"="1962";"subkey"="392";"value"="14875"}; +{"key"="1963";"subkey"="392";"value"="14883"}; +{"key"="1964";"subkey"="392";"value"="14892"}; +{"key"="1965";"subkey"="393";"value"="14901"}; +{"key"="1966";"subkey"="393";"value"="14909"}; +{"key"="1967";"subkey"="393";"value"="14918"}; +{"key"="1968";"subkey"="393";"value"="14926"}; +{"key"="1969";"subkey"="393";"value"="14935"}; +{"key"="1970";"subkey"="394";"value"="14944"}; +{"key"="1971";"subkey"="394";"value"="14952"}; +{"key"="1972";"subkey"="394";"value"="14961"}; +{"key"="1973";"subkey"="394";"value"="14969"}; +{"key"="1974";"subkey"="394";"value"="14978"}; +{"key"="1975";"subkey"="395";"value"="14986"}; +{"key"="1976";"subkey"="395";"value"="14995"}; +{"key"="1977";"subkey"="395";"value"="15004"}; +{"key"="1978";"subkey"="395";"value"="15012"}; +{"key"="1979";"subkey"="395";"value"="15021"}; +{"key"="1980";"subkey"="396";"value"="15029"}; +{"key"="1981";"subkey"="396";"value"="15038"}; +{"key"="1982";"subkey"="396";"value"="15047"}; +{"key"="1983";"subkey"="396";"value"="15055"}; +{"key"="1984";"subkey"="396";"value"="15064"}; +{"key"="1985";"subkey"="397";"value"="15072"}; +{"key"="1986";"subkey"="397";"value"="15081"}; +{"key"="1987";"subkey"="397";"value"="15090"}; +{"key"="1988";"subkey"="397";"value"="15098"}; +{"key"="1989";"subkey"="397";"value"="15107"}; +{"key"="1990";"subkey"="398";"value"="15115"}; +{"key"="1991";"subkey"="398";"value"="15124"}; +{"key"="1992";"subkey"="398";"value"="15133"}; +{"key"="1993";"subkey"="398";"value"="15141"}; +{"key"="1994";"subkey"="398";"value"="15150"}; +{"key"="1995";"subkey"="399";"value"="15158"}; +{"key"="1996";"subkey"="399";"value"="15167"}; +{"key"="1997";"subkey"="399";"value"="15176"}; +{"key"="1998";"subkey"="399";"value"="15184"}; +{"key"="1999";"subkey"="399";"value"="15193"}; +{"key"="2000";"subkey"="400";"value"="15201"}; +{"key"="2001";"subkey"="400";"value"="15210"}; +{"key"="2002";"subkey"="400";"value"="15219"}; +{"key"="2003";"subkey"="400";"value"="15227"}; +{"key"="2004";"subkey"="400";"value"="15236"}; +{"key"="2005";"subkey"="401";"value"="15244"}; +{"key"="2006";"subkey"="401";"value"="15253"}; +{"key"="2007";"subkey"="401";"value"="15262"}; +{"key"="2008";"subkey"="401";"value"="15270"}; +{"key"="2009";"subkey"="401";"value"="15279"}; +{"key"="2010";"subkey"="402";"value"="15287"}; +{"key"="2011";"subkey"="402";"value"="15296"}; +{"key"="2012";"subkey"="402";"value"="15305"}; +{"key"="2013";"subkey"="402";"value"="15313"}; +{"key"="2014";"subkey"="402";"value"="15322"}; +{"key"="2015";"subkey"="403";"value"="15330"}; +{"key"="2016";"subkey"="403";"value"="15339"}; +{"key"="2017";"subkey"="403";"value"="15348"}; +{"key"="2018";"subkey"="403";"value"="15356"}; +{"key"="2019";"subkey"="403";"value"="15365"}; +{"key"="2020";"subkey"="404";"value"="15373"}; +{"key"="2021";"subkey"="404";"value"="15382"}; +{"key"="2022";"subkey"="404";"value"="15391"}; +{"key"="2023";"subkey"="404";"value"="15399"}; +{"key"="2024";"subkey"="404";"value"="15408"}; +{"key"="2025";"subkey"="405";"value"="15416"}; +{"key"="2026";"subkey"="405";"value"="15425"}; +{"key"="2027";"subkey"="405";"value"="15434"}; +{"key"="2028";"subkey"="405";"value"="15442"}; +{"key"="2029";"subkey"="405";"value"="15451"}; +{"key"="2030";"subkey"="406";"value"="15460"}; +{"key"="2031";"subkey"="406";"value"="15468"}; +{"key"="2032";"subkey"="406";"value"="15477"}; +{"key"="2033";"subkey"="406";"value"="15485"}; +{"key"="2034";"subkey"="406";"value"="15494"}; +{"key"="2035";"subkey"="407";"value"="15503"}; +{"key"="2036";"subkey"="407";"value"="15511"}; +{"key"="2037";"subkey"="407";"value"="15520"}; +{"key"="2038";"subkey"="407";"value"="15528"}; +{"key"="2039";"subkey"="407";"value"="15537"}; +{"key"="2040";"subkey"="408";"value"="15546"}; +{"key"="2041";"subkey"="408";"value"="15554"}; +{"key"="2042";"subkey"="408";"value"="15563"}; +{"key"="2043";"subkey"="408";"value"="15572"}; +{"key"="2044";"subkey"="408";"value"="15580"}; +{"key"="2045";"subkey"="409";"value"="15589"}; +{"key"="2046";"subkey"="409";"value"="15597"}; +{"key"="2047";"subkey"="409";"value"="15606"}; +{"key"="2048";"subkey"="409";"value"="15615"}; +{"key"="2049";"subkey"="409";"value"="15623"}; +{"key"="2050";"subkey"="410";"value"="15632"}; +{"key"="2051";"subkey"="410";"value"="15641"}; +{"key"="2052";"subkey"="410";"value"="15649"}; +{"key"="2053";"subkey"="410";"value"="15658"}; +{"key"="2054";"subkey"="410";"value"="15666"}; +{"key"="2055";"subkey"="411";"value"="15675"}; +{"key"="2056";"subkey"="411";"value"="15684"}; +{"key"="2057";"subkey"="411";"value"="15692"}; +{"key"="2058";"subkey"="411";"value"="15701"}; +{"key"="2059";"subkey"="411";"value"="15710"}; +{"key"="2060";"subkey"="412";"value"="15718"}; +{"key"="2061";"subkey"="412";"value"="15727"}; +{"key"="2062";"subkey"="412";"value"="15736"}; +{"key"="2063";"subkey"="412";"value"="15744"}; +{"key"="2064";"subkey"="412";"value"="15753"}; +{"key"="2065";"subkey"="413";"value"="15761"}; +{"key"="2066";"subkey"="413";"value"="15770"}; +{"key"="2067";"subkey"="413";"value"="15779"}; +{"key"="2068";"subkey"="413";"value"="15787"}; +{"key"="2069";"subkey"="413";"value"="15796"}; +{"key"="2070";"subkey"="414";"value"="15805"}; +{"key"="2071";"subkey"="414";"value"="15813"}; +{"key"="2072";"subkey"="414";"value"="15822"}; +{"key"="2073";"subkey"="414";"value"="15830"}; +{"key"="2074";"subkey"="414";"value"="15839"}; +{"key"="2075";"subkey"="415";"value"="15848"}; +{"key"="2076";"subkey"="415";"value"="15856"}; +{"key"="2077";"subkey"="415";"value"="15865"}; +{"key"="2078";"subkey"="415";"value"="15874"}; +{"key"="2079";"subkey"="415";"value"="15882"}; +{"key"="2080";"subkey"="416";"value"="15891"}; +{"key"="2081";"subkey"="416";"value"="15900"}; +{"key"="2082";"subkey"="416";"value"="15908"}; +{"key"="2083";"subkey"="416";"value"="15917"}; +{"key"="2084";"subkey"="416";"value"="15926"}; +{"key"="2085";"subkey"="417";"value"="15934"}; +{"key"="2086";"subkey"="417";"value"="15943"}; +{"key"="2087";"subkey"="417";"value"="15951"}; +{"key"="2088";"subkey"="417";"value"="15960"}; +{"key"="2089";"subkey"="417";"value"="15969"}; +{"key"="2090";"subkey"="418";"value"="15977"}; +{"key"="2091";"subkey"="418";"value"="15986"}; +{"key"="2092";"subkey"="418";"value"="15995"}; +{"key"="2093";"subkey"="418";"value"="16003"}; +{"key"="2094";"subkey"="418";"value"="16012"}; +{"key"="2095";"subkey"="419";"value"="16021"}; +{"key"="2096";"subkey"="419";"value"="16029"}; +{"key"="2097";"subkey"="419";"value"="16038"}; +{"key"="2098";"subkey"="419";"value"="16047"}; +{"key"="2099";"subkey"="419";"value"="16055"}; +{"key"="2100";"subkey"="420";"value"="16064"}; +{"key"="2101";"subkey"="420";"value"="16073"}; +{"key"="2102";"subkey"="420";"value"="16081"}; +{"key"="2103";"subkey"="420";"value"="16090"}; +{"key"="2104";"subkey"="420";"value"="16098"}; +{"key"="2105";"subkey"="421";"value"="16107"}; +{"key"="2106";"subkey"="421";"value"="16116"}; +{"key"="2107";"subkey"="421";"value"="16124"}; +{"key"="2108";"subkey"="421";"value"="16133"}; +{"key"="2109";"subkey"="421";"value"="16142"}; +{"key"="2110";"subkey"="422";"value"="16150"}; +{"key"="2111";"subkey"="422";"value"="16159"}; +{"key"="2112";"subkey"="422";"value"="16168"}; +{"key"="2113";"subkey"="422";"value"="16176"}; +{"key"="2114";"subkey"="422";"value"="16185"}; +{"key"="2115";"subkey"="423";"value"="16194"}; +{"key"="2116";"subkey"="423";"value"="16202"}; +{"key"="2117";"subkey"="423";"value"="16211"}; +{"key"="2118";"subkey"="423";"value"="16220"}; +{"key"="2119";"subkey"="423";"value"="16228"}; +{"key"="2120";"subkey"="424";"value"="16237"}; +{"key"="2121";"subkey"="424";"value"="16246"}; +{"key"="2122";"subkey"="424";"value"="16254"}; +{"key"="2123";"subkey"="424";"value"="16263"}; +{"key"="2124";"subkey"="424";"value"="16272"}; +{"key"="2125";"subkey"="425";"value"="16280"}; +{"key"="2126";"subkey"="425";"value"="16289"}; +{"key"="2127";"subkey"="425";"value"="16298"}; +{"key"="2128";"subkey"="425";"value"="16306"}; +{"key"="2129";"subkey"="425";"value"="16315"}; +{"key"="2130";"subkey"="426";"value"="16324"}; +{"key"="2131";"subkey"="426";"value"="16332"}; +{"key"="2132";"subkey"="426";"value"="16341"}; +{"key"="2133";"subkey"="426";"value"="16350"}; +{"key"="2134";"subkey"="426";"value"="16358"}; +{"key"="2135";"subkey"="427";"value"="16367"}; +{"key"="2136";"subkey"="427";"value"="16376"}; +{"key"="2137";"subkey"="427";"value"="16384"}; +{"key"="2138";"subkey"="427";"value"="16393"}; +{"key"="2139";"subkey"="427";"value"="16402"}; +{"key"="2140";"subkey"="428";"value"="16410"}; +{"key"="2141";"subkey"="428";"value"="16419"}; +{"key"="2142";"subkey"="428";"value"="16428"}; +{"key"="2143";"subkey"="428";"value"="16436"}; +{"key"="2144";"subkey"="428";"value"="16445"}; +{"key"="2145";"subkey"="429";"value"="16454"}; +{"key"="2146";"subkey"="429";"value"="16462"}; +{"key"="2147";"subkey"="429";"value"="16471"}; +{"key"="2148";"subkey"="429";"value"="16480"}; +{"key"="2149";"subkey"="429";"value"="16488"}; +{"key"="2150";"subkey"="430";"value"="16497"}; +{"key"="2151";"subkey"="430";"value"="16506"}; +{"key"="2152";"subkey"="430";"value"="16514"}; +{"key"="2153";"subkey"="430";"value"="16523"}; +{"key"="2154";"subkey"="430";"value"="16532"}; +{"key"="2155";"subkey"="431";"value"="16540"}; +{"key"="2156";"subkey"="431";"value"="16549"}; +{"key"="2157";"subkey"="431";"value"="16558"}; +{"key"="2158";"subkey"="431";"value"="16566"}; +{"key"="2159";"subkey"="431";"value"="16575"}; +{"key"="2160";"subkey"="432";"value"="16584"}; +{"key"="2161";"subkey"="432";"value"="16592"}; +{"key"="2162";"subkey"="432";"value"="16601"}; +{"key"="2163";"subkey"="432";"value"="16610"}; +{"key"="2164";"subkey"="432";"value"="16618"}; +{"key"="2165";"subkey"="433";"value"="16627"}; +{"key"="2166";"subkey"="433";"value"="16636"}; +{"key"="2167";"subkey"="433";"value"="16644"}; +{"key"="2168";"subkey"="433";"value"="16653"}; +{"key"="2169";"subkey"="433";"value"="16662"}; +{"key"="2170";"subkey"="434";"value"="16670"}; +{"key"="2171";"subkey"="434";"value"="16679"}; +{"key"="2172";"subkey"="434";"value"="16688"}; +{"key"="2173";"subkey"="434";"value"="16697"}; +{"key"="2174";"subkey"="434";"value"="16705"}; +{"key"="2175";"subkey"="435";"value"="16714"}; +{"key"="2176";"subkey"="435";"value"="16723"}; +{"key"="2177";"subkey"="435";"value"="16731"}; +{"key"="2178";"subkey"="435";"value"="16740"}; +{"key"="2179";"subkey"="435";"value"="16749"}; +{"key"="2180";"subkey"="436";"value"="16757"}; +{"key"="2181";"subkey"="436";"value"="16766"}; +{"key"="2182";"subkey"="436";"value"="16775"}; +{"key"="2183";"subkey"="436";"value"="16783"}; +{"key"="2184";"subkey"="436";"value"="16792"}; +{"key"="2185";"subkey"="437";"value"="16801"}; +{"key"="2186";"subkey"="437";"value"="16809"}; +{"key"="2187";"subkey"="437";"value"="16818"}; +{"key"="2188";"subkey"="437";"value"="16827"}; +{"key"="2189";"subkey"="437";"value"="16836"}; +{"key"="2190";"subkey"="438";"value"="16844"}; +{"key"="2191";"subkey"="438";"value"="16853"}; +{"key"="2192";"subkey"="438";"value"="16862"}; +{"key"="2193";"subkey"="438";"value"="16870"}; +{"key"="2194";"subkey"="438";"value"="16879"}; +{"key"="2195";"subkey"="439";"value"="16888"}; +{"key"="2196";"subkey"="439";"value"="16896"}; +{"key"="2197";"subkey"="439";"value"="16905"}; +{"key"="2198";"subkey"="439";"value"="16914"}; +{"key"="2199";"subkey"="439";"value"="16922"}; +{"key"="2200";"subkey"="440";"value"="16931"}; +{"key"="2201";"subkey"="440";"value"="16940"}; +{"key"="2202";"subkey"="440";"value"="16949"}; +{"key"="2203";"subkey"="440";"value"="16957"}; +{"key"="2204";"subkey"="440";"value"="16966"}; +{"key"="2205";"subkey"="441";"value"="16975"}; +{"key"="2206";"subkey"="441";"value"="16983"}; +{"key"="2207";"subkey"="441";"value"="16992"}; +{"key"="2208";"subkey"="441";"value"="17001"}; +{"key"="2209";"subkey"="441";"value"="17009"}; +{"key"="2210";"subkey"="442";"value"="17018"}; +{"key"="2211";"subkey"="442";"value"="17027"}; +{"key"="2212";"subkey"="442";"value"="17036"}; +{"key"="2213";"subkey"="442";"value"="17044"}; +{"key"="2214";"subkey"="442";"value"="17053"}; +{"key"="2215";"subkey"="443";"value"="17062"}; +{"key"="2216";"subkey"="443";"value"="17070"}; +{"key"="2217";"subkey"="443";"value"="17079"}; +{"key"="2218";"subkey"="443";"value"="17088"}; +{"key"="2219";"subkey"="443";"value"="17096"}; +{"key"="2220";"subkey"="444";"value"="17105"}; +{"key"="2221";"subkey"="444";"value"="17114"}; +{"key"="2222";"subkey"="444";"value"="17123"}; +{"key"="2223";"subkey"="444";"value"="17131"}; +{"key"="2224";"subkey"="444";"value"="17140"}; +{"key"="2225";"subkey"="445";"value"="17149"}; +{"key"="2226";"subkey"="445";"value"="17157"}; +{"key"="2227";"subkey"="445";"value"="17166"}; +{"key"="2228";"subkey"="445";"value"="17175"}; +{"key"="2229";"subkey"="445";"value"="17184"}; +{"key"="2230";"subkey"="446";"value"="17192"}; +{"key"="2231";"subkey"="446";"value"="17201"}; +{"key"="2232";"subkey"="446";"value"="17210"}; +{"key"="2233";"subkey"="446";"value"="17218"}; +{"key"="2234";"subkey"="446";"value"="17227"}; +{"key"="2235";"subkey"="447";"value"="17236"}; +{"key"="2236";"subkey"="447";"value"="17245"}; +{"key"="2237";"subkey"="447";"value"="17253"}; +{"key"="2238";"subkey"="447";"value"="17262"}; +{"key"="2239";"subkey"="447";"value"="17271"}; +{"key"="2240";"subkey"="448";"value"="17279"}; +{"key"="2241";"subkey"="448";"value"="17288"}; +{"key"="2242";"subkey"="448";"value"="17297"}; +{"key"="2243";"subkey"="448";"value"="17306"}; +{"key"="2244";"subkey"="448";"value"="17314"}; +{"key"="2245";"subkey"="449";"value"="17323"}; +{"key"="2246";"subkey"="449";"value"="17332"}; +{"key"="2247";"subkey"="449";"value"="17340"}; +{"key"="2248";"subkey"="449";"value"="17349"}; +{"key"="2249";"subkey"="449";"value"="17358"}; +{"key"="2250";"subkey"="450";"value"="17367"}; +{"key"="2251";"subkey"="450";"value"="17375"}; +{"key"="2252";"subkey"="450";"value"="17384"}; +{"key"="2253";"subkey"="450";"value"="17393"}; +{"key"="2254";"subkey"="450";"value"="17401"}; +{"key"="2255";"subkey"="451";"value"="17410"}; +{"key"="2256";"subkey"="451";"value"="17419"}; +{"key"="2257";"subkey"="451";"value"="17428"}; +{"key"="2258";"subkey"="451";"value"="17436"}; +{"key"="2259";"subkey"="451";"value"="17445"}; +{"key"="2260";"subkey"="452";"value"="17454"}; +{"key"="2261";"subkey"="452";"value"="17462"}; +{"key"="2262";"subkey"="452";"value"="17471"}; +{"key"="2263";"subkey"="452";"value"="17480"}; +{"key"="2264";"subkey"="452";"value"="17489"}; +{"key"="2265";"subkey"="453";"value"="17497"}; +{"key"="2266";"subkey"="453";"value"="17506"}; +{"key"="2267";"subkey"="453";"value"="17515"}; +{"key"="2268";"subkey"="453";"value"="17524"}; +{"key"="2269";"subkey"="453";"value"="17532"}; +{"key"="2270";"subkey"="454";"value"="17541"}; +{"key"="2271";"subkey"="454";"value"="17550"}; +{"key"="2272";"subkey"="454";"value"="17558"}; +{"key"="2273";"subkey"="454";"value"="17567"}; +{"key"="2274";"subkey"="454";"value"="17576"}; +{"key"="2275";"subkey"="455";"value"="17585"}; +{"key"="2276";"subkey"="455";"value"="17593"}; +{"key"="2277";"subkey"="455";"value"="17602"}; +{"key"="2278";"subkey"="455";"value"="17611"}; +{"key"="2279";"subkey"="455";"value"="17620"}; +{"key"="2280";"subkey"="456";"value"="17628"}; +{"key"="2281";"subkey"="456";"value"="17637"}; +{"key"="2282";"subkey"="456";"value"="17646"}; +{"key"="2283";"subkey"="456";"value"="17654"}; +{"key"="2284";"subkey"="456";"value"="17663"}; +{"key"="2285";"subkey"="457";"value"="17672"}; +{"key"="2286";"subkey"="457";"value"="17681"}; +{"key"="2287";"subkey"="457";"value"="17689"}; +{"key"="2288";"subkey"="457";"value"="17698"}; +{"key"="2289";"subkey"="457";"value"="17707"}; +{"key"="2290";"subkey"="458";"value"="17716"}; +{"key"="2291";"subkey"="458";"value"="17724"}; +{"key"="2292";"subkey"="458";"value"="17733"}; +{"key"="2293";"subkey"="458";"value"="17742"}; +{"key"="2294";"subkey"="458";"value"="17751"}; +{"key"="2295";"subkey"="459";"value"="17759"}; +{"key"="2296";"subkey"="459";"value"="17768"}; +{"key"="2297";"subkey"="459";"value"="17777"}; +{"key"="2298";"subkey"="459";"value"="17786"}; +{"key"="2299";"subkey"="459";"value"="17794"}; +{"key"="2300";"subkey"="460";"value"="17803"}; +{"key"="2301";"subkey"="460";"value"="17812"}; +{"key"="2302";"subkey"="460";"value"="17821"}; +{"key"="2303";"subkey"="460";"value"="17829"}; +{"key"="2304";"subkey"="460";"value"="17838"}; +{"key"="2305";"subkey"="461";"value"="17847"}; +{"key"="2306";"subkey"="461";"value"="17855"}; +{"key"="2307";"subkey"="461";"value"="17864"}; +{"key"="2308";"subkey"="461";"value"="17873"}; +{"key"="2309";"subkey"="461";"value"="17882"}; +{"key"="2310";"subkey"="462";"value"="17890"}; +{"key"="2311";"subkey"="462";"value"="17899"}; +{"key"="2312";"subkey"="462";"value"="17908"}; +{"key"="2313";"subkey"="462";"value"="17917"}; +{"key"="2314";"subkey"="462";"value"="17925"}; +{"key"="2315";"subkey"="463";"value"="17934"}; +{"key"="2316";"subkey"="463";"value"="17943"}; +{"key"="2317";"subkey"="463";"value"="17952"}; +{"key"="2318";"subkey"="463";"value"="17960"}; +{"key"="2319";"subkey"="463";"value"="17969"}; +{"key"="2320";"subkey"="464";"value"="17978"}; +{"key"="2321";"subkey"="464";"value"="17987"}; +{"key"="2322";"subkey"="464";"value"="17995"}; +{"key"="2323";"subkey"="464";"value"="18004"}; +{"key"="2324";"subkey"="464";"value"="18013"}; +{"key"="2325";"subkey"="465";"value"="18022"}; +{"key"="2326";"subkey"="465";"value"="18030"}; +{"key"="2327";"subkey"="465";"value"="18039"}; +{"key"="2328";"subkey"="465";"value"="18048"}; +{"key"="2329";"subkey"="465";"value"="18057"}; +{"key"="2330";"subkey"="466";"value"="18065"}; +{"key"="2331";"subkey"="466";"value"="18074"}; +{"key"="2332";"subkey"="466";"value"="18083"}; +{"key"="2333";"subkey"="466";"value"="18092"}; +{"key"="2334";"subkey"="466";"value"="18100"}; +{"key"="2335";"subkey"="467";"value"="18109"}; +{"key"="2336";"subkey"="467";"value"="18118"}; +{"key"="2337";"subkey"="467";"value"="18127"}; +{"key"="2338";"subkey"="467";"value"="18135"}; +{"key"="2339";"subkey"="467";"value"="18144"}; +{"key"="2340";"subkey"="468";"value"="18153"}; +{"key"="2341";"subkey"="468";"value"="18162"}; +{"key"="2342";"subkey"="468";"value"="18171"}; +{"key"="2343";"subkey"="468";"value"="18179"}; +{"key"="2344";"subkey"="468";"value"="18188"}; +{"key"="2345";"subkey"="469";"value"="18197"}; +{"key"="2346";"subkey"="469";"value"="18206"}; +{"key"="2347";"subkey"="469";"value"="18214"}; +{"key"="2348";"subkey"="469";"value"="18223"}; +{"key"="2349";"subkey"="469";"value"="18232"}; +{"key"="2350";"subkey"="470";"value"="18241"}; +{"key"="2351";"subkey"="470";"value"="18249"}; +{"key"="2352";"subkey"="470";"value"="18258"}; +{"key"="2353";"subkey"="470";"value"="18267"}; +{"key"="2354";"subkey"="470";"value"="18276"}; +{"key"="2355";"subkey"="471";"value"="18284"}; +{"key"="2356";"subkey"="471";"value"="18293"}; +{"key"="2357";"subkey"="471";"value"="18302"}; +{"key"="2358";"subkey"="471";"value"="18311"}; +{"key"="2359";"subkey"="471";"value"="18319"}; +{"key"="2360";"subkey"="472";"value"="18328"}; +{"key"="2361";"subkey"="472";"value"="18337"}; +{"key"="2362";"subkey"="472";"value"="18346"}; +{"key"="2363";"subkey"="472";"value"="18355"}; +{"key"="2364";"subkey"="472";"value"="18363"}; +{"key"="2365";"subkey"="473";"value"="18372"}; +{"key"="2366";"subkey"="473";"value"="18381"}; +{"key"="2367";"subkey"="473";"value"="18390"}; +{"key"="2368";"subkey"="473";"value"="18398"}; +{"key"="2369";"subkey"="473";"value"="18407"}; +{"key"="2370";"subkey"="474";"value"="18416"}; +{"key"="2371";"subkey"="474";"value"="18425"}; +{"key"="2372";"subkey"="474";"value"="18433"}; +{"key"="2373";"subkey"="474";"value"="18442"}; +{"key"="2374";"subkey"="474";"value"="18451"}; +{"key"="2375";"subkey"="475";"value"="18460"}; +{"key"="2376";"subkey"="475";"value"="18469"}; +{"key"="2377";"subkey"="475";"value"="18477"}; +{"key"="2378";"subkey"="475";"value"="18486"}; +{"key"="2379";"subkey"="475";"value"="18495"}; +{"key"="2380";"subkey"="476";"value"="18504"}; +{"key"="2381";"subkey"="476";"value"="18512"}; +{"key"="2382";"subkey"="476";"value"="18521"}; +{"key"="2383";"subkey"="476";"value"="18530"}; +{"key"="2384";"subkey"="476";"value"="18539"}; +{"key"="2385";"subkey"="477";"value"="18548"}; +{"key"="2386";"subkey"="477";"value"="18556"}; +{"key"="2387";"subkey"="477";"value"="18565"}; +{"key"="2388";"subkey"="477";"value"="18574"}; +{"key"="2389";"subkey"="477";"value"="18583"}; +{"key"="2390";"subkey"="478";"value"="18591"}; +{"key"="2391";"subkey"="478";"value"="18600"}; +{"key"="2392";"subkey"="478";"value"="18609"}; +{"key"="2393";"subkey"="478";"value"="18618"}; +{"key"="2394";"subkey"="478";"value"="18627"}; +{"key"="2395";"subkey"="479";"value"="18635"}; +{"key"="2396";"subkey"="479";"value"="18644"}; +{"key"="2397";"subkey"="479";"value"="18653"}; +{"key"="2398";"subkey"="479";"value"="18662"}; +{"key"="2399";"subkey"="479";"value"="18670"}; +{"key"="2400";"subkey"="480";"value"="18679"}; +{"key"="2401";"subkey"="480";"value"="18688"}; +{"key"="2402";"subkey"="480";"value"="18697"}; +{"key"="2403";"subkey"="480";"value"="18706"}; +{"key"="2404";"subkey"="480";"value"="18714"}; +{"key"="2405";"subkey"="481";"value"="18723"}; +{"key"="2406";"subkey"="481";"value"="18732"}; +{"key"="2407";"subkey"="481";"value"="18741"}; +{"key"="2408";"subkey"="481";"value"="18750"}; +{"key"="2409";"subkey"="481";"value"="18758"}; +{"key"="2410";"subkey"="482";"value"="18767"}; +{"key"="2411";"subkey"="482";"value"="18776"}; +{"key"="2412";"subkey"="482";"value"="18785"}; +{"key"="2413";"subkey"="482";"value"="18793"}; +{"key"="2414";"subkey"="482";"value"="18802"}; +{"key"="2415";"subkey"="483";"value"="18811"}; +{"key"="2416";"subkey"="483";"value"="18820"}; +{"key"="2417";"subkey"="483";"value"="18829"}; +{"key"="2418";"subkey"="483";"value"="18837"}; +{"key"="2419";"subkey"="483";"value"="18846"}; +{"key"="2420";"subkey"="484";"value"="18855"}; +{"key"="2421";"subkey"="484";"value"="18864"}; +{"key"="2422";"subkey"="484";"value"="18873"}; +{"key"="2423";"subkey"="484";"value"="18881"}; +{"key"="2424";"subkey"="484";"value"="18890"}; +{"key"="2425";"subkey"="485";"value"="18899"}; +{"key"="2426";"subkey"="485";"value"="18908"}; +{"key"="2427";"subkey"="485";"value"="18917"}; +{"key"="2428";"subkey"="485";"value"="18925"}; +{"key"="2429";"subkey"="485";"value"="18934"}; +{"key"="2430";"subkey"="486";"value"="18943"}; +{"key"="2431";"subkey"="486";"value"="18952"}; +{"key"="2432";"subkey"="486";"value"="18961"}; +{"key"="2433";"subkey"="486";"value"="18969"}; +{"key"="2434";"subkey"="486";"value"="18978"}; +{"key"="2435";"subkey"="487";"value"="18987"}; +{"key"="2436";"subkey"="487";"value"="18996"}; +{"key"="2437";"subkey"="487";"value"="19005"}; +{"key"="2438";"subkey"="487";"value"="19013"}; +{"key"="2439";"subkey"="487";"value"="19022"}; +{"key"="2440";"subkey"="488";"value"="19031"}; +{"key"="2441";"subkey"="488";"value"="19040"}; +{"key"="2442";"subkey"="488";"value"="19048"}; +{"key"="2443";"subkey"="488";"value"="19057"}; +{"key"="2444";"subkey"="488";"value"="19066"}; +{"key"="2445";"subkey"="489";"value"="19075"}; +{"key"="2446";"subkey"="489";"value"="19084"}; +{"key"="2447";"subkey"="489";"value"="19093"}; +{"key"="2448";"subkey"="489";"value"="19101"}; +{"key"="2449";"subkey"="489";"value"="19110"}; +{"key"="2450";"subkey"="490";"value"="19119"}; +{"key"="2451";"subkey"="490";"value"="19128"}; +{"key"="2452";"subkey"="490";"value"="19137"}; +{"key"="2453";"subkey"="490";"value"="19145"}; +{"key"="2454";"subkey"="490";"value"="19154"}; +{"key"="2455";"subkey"="491";"value"="19163"}; +{"key"="2456";"subkey"="491";"value"="19172"}; +{"key"="2457";"subkey"="491";"value"="19181"}; +{"key"="2458";"subkey"="491";"value"="19189"}; +{"key"="2459";"subkey"="491";"value"="19198"}; +{"key"="2460";"subkey"="492";"value"="19207"}; +{"key"="2461";"subkey"="492";"value"="19216"}; +{"key"="2462";"subkey"="492";"value"="19225"}; +{"key"="2463";"subkey"="492";"value"="19233"}; +{"key"="2464";"subkey"="492";"value"="19242"}; +{"key"="2465";"subkey"="493";"value"="19251"}; +{"key"="2466";"subkey"="493";"value"="19260"}; +{"key"="2467";"subkey"="493";"value"="19269"}; +{"key"="2468";"subkey"="493";"value"="19277"}; +{"key"="2469";"subkey"="493";"value"="19286"}; +{"key"="2470";"subkey"="494";"value"="19295"}; +{"key"="2471";"subkey"="494";"value"="19304"}; +{"key"="2472";"subkey"="494";"value"="19313"}; +{"key"="2473";"subkey"="494";"value"="19322"}; +{"key"="2474";"subkey"="494";"value"="19330"}; +{"key"="2475";"subkey"="495";"value"="19339"}; +{"key"="2476";"subkey"="495";"value"="19348"}; +{"key"="2477";"subkey"="495";"value"="19357"}; +{"key"="2478";"subkey"="495";"value"="19366"}; +{"key"="2479";"subkey"="495";"value"="19374"}; +{"key"="2480";"subkey"="496";"value"="19383"}; +{"key"="2481";"subkey"="496";"value"="19392"}; +{"key"="2482";"subkey"="496";"value"="19401"}; +{"key"="2483";"subkey"="496";"value"="19410"}; +{"key"="2484";"subkey"="496";"value"="19418"}; +{"key"="2485";"subkey"="497";"value"="19427"}; +{"key"="2486";"subkey"="497";"value"="19436"}; +{"key"="2487";"subkey"="497";"value"="19445"}; +{"key"="2488";"subkey"="497";"value"="19454"}; +{"key"="2489";"subkey"="497";"value"="19463"}; +{"key"="2490";"subkey"="498";"value"="19471"}; +{"key"="2491";"subkey"="498";"value"="19480"}; +{"key"="2492";"subkey"="498";"value"="19489"}; +{"key"="2493";"subkey"="498";"value"="19498"}; +{"key"="2494";"subkey"="498";"value"="19507"}; +{"key"="2495";"subkey"="499";"value"="19515"}; +{"key"="2496";"subkey"="499";"value"="19524"}; +{"key"="2497";"subkey"="499";"value"="19533"}; +{"key"="2498";"subkey"="499";"value"="19542"}; +{"key"="2499";"subkey"="499";"value"="19551"}; +{"key"="2500";"subkey"="500";"value"="19560"}; +{"key"="2501";"subkey"="500";"value"="19568"}; +{"key"="2502";"subkey"="500";"value"="19577"}; +{"key"="2503";"subkey"="500";"value"="19586"}; +{"key"="2504";"subkey"="500";"value"="19595"}; +{"key"="2505";"subkey"="501";"value"="19604"}; +{"key"="2506";"subkey"="501";"value"="19613"}; +{"key"="2507";"subkey"="501";"value"="19621"}; +{"key"="2508";"subkey"="501";"value"="19630"}; +{"key"="2509";"subkey"="501";"value"="19639"}; +{"key"="2510";"subkey"="502";"value"="19648"}; +{"key"="2511";"subkey"="502";"value"="19657"}; +{"key"="2512";"subkey"="502";"value"="19666"}; +{"key"="2513";"subkey"="502";"value"="19674"}; +{"key"="2514";"subkey"="502";"value"="19683"}; +{"key"="2515";"subkey"="503";"value"="19692"}; +{"key"="2516";"subkey"="503";"value"="19701"}; +{"key"="2517";"subkey"="503";"value"="19710"}; +{"key"="2518";"subkey"="503";"value"="19719"}; +{"key"="2519";"subkey"="503";"value"="19727"}; +{"key"="2520";"subkey"="504";"value"="19736"}; +{"key"="2521";"subkey"="504";"value"="19745"}; +{"key"="2522";"subkey"="504";"value"="19754"}; +{"key"="2523";"subkey"="504";"value"="19763"}; +{"key"="2524";"subkey"="504";"value"="19772"}; +{"key"="2525";"subkey"="505";"value"="19780"}; +{"key"="2526";"subkey"="505";"value"="19789"}; +{"key"="2527";"subkey"="505";"value"="19798"}; +{"key"="2528";"subkey"="505";"value"="19807"}; +{"key"="2529";"subkey"="505";"value"="19816"}; +{"key"="2530";"subkey"="506";"value"="19825"}; +{"key"="2531";"subkey"="506";"value"="19833"}; +{"key"="2532";"subkey"="506";"value"="19842"}; +{"key"="2533";"subkey"="506";"value"="19851"}; +{"key"="2534";"subkey"="506";"value"="19860"}; +{"key"="2535";"subkey"="507";"value"="19869"}; +{"key"="2536";"subkey"="507";"value"="19878"}; +{"key"="2537";"subkey"="507";"value"="19886"}; +{"key"="2538";"subkey"="507";"value"="19895"}; +{"key"="2539";"subkey"="507";"value"="19904"}; +{"key"="2540";"subkey"="508";"value"="19913"}; +{"key"="2541";"subkey"="508";"value"="19922"}; +{"key"="2542";"subkey"="508";"value"="19931"}; +{"key"="2543";"subkey"="508";"value"="19939"}; +{"key"="2544";"subkey"="508";"value"="19948"}; +{"key"="2545";"subkey"="509";"value"="19957"}; +{"key"="2546";"subkey"="509";"value"="19966"}; +{"key"="2547";"subkey"="509";"value"="19975"}; +{"key"="2548";"subkey"="509";"value"="19984"}; +{"key"="2549";"subkey"="509";"value"="19992"}; +{"key"="2550";"subkey"="510";"value"="20001"}; +{"key"="2551";"subkey"="510";"value"="20010"}; +{"key"="2552";"subkey"="510";"value"="20019"}; +{"key"="2553";"subkey"="510";"value"="20028"}; +{"key"="2554";"subkey"="510";"value"="20037"}; +{"key"="2555";"subkey"="511";"value"="20046"}; +{"key"="2556";"subkey"="511";"value"="20054"}; +{"key"="2557";"subkey"="511";"value"="20063"}; +{"key"="2558";"subkey"="511";"value"="20072"}; +{"key"="2559";"subkey"="511";"value"="20081"}; +{"key"="2560";"subkey"="512";"value"="20090"}; +{"key"="2561";"subkey"="512";"value"="20099"}; +{"key"="2562";"subkey"="512";"value"="20107"}; +{"key"="2563";"subkey"="512";"value"="20116"}; +{"key"="2564";"subkey"="512";"value"="20125"}; +{"key"="2565";"subkey"="513";"value"="20134"}; +{"key"="2566";"subkey"="513";"value"="20143"}; +{"key"="2567";"subkey"="513";"value"="20152"}; +{"key"="2568";"subkey"="513";"value"="20161"}; +{"key"="2569";"subkey"="513";"value"="20169"}; +{"key"="2570";"subkey"="514";"value"="20178"}; +{"key"="2571";"subkey"="514";"value"="20187"}; +{"key"="2572";"subkey"="514";"value"="20196"}; +{"key"="2573";"subkey"="514";"value"="20205"}; +{"key"="2574";"subkey"="514";"value"="20214"}; +{"key"="2575";"subkey"="515";"value"="20223"}; +{"key"="2576";"subkey"="515";"value"="20231"}; +{"key"="2577";"subkey"="515";"value"="20240"}; +{"key"="2578";"subkey"="515";"value"="20249"}; +{"key"="2579";"subkey"="515";"value"="20258"}; +{"key"="2580";"subkey"="516";"value"="20267"}; +{"key"="2581";"subkey"="516";"value"="20276"}; +{"key"="2582";"subkey"="516";"value"="20285"}; +{"key"="2583";"subkey"="516";"value"="20293"}; +{"key"="2584";"subkey"="516";"value"="20302"}; +{"key"="2585";"subkey"="517";"value"="20311"}; +{"key"="2586";"subkey"="517";"value"="20320"}; +{"key"="2587";"subkey"="517";"value"="20329"}; +{"key"="2588";"subkey"="517";"value"="20338"}; +{"key"="2589";"subkey"="517";"value"="20347"}; +{"key"="2590";"subkey"="518";"value"="20355"}; +{"key"="2591";"subkey"="518";"value"="20364"}; +{"key"="2592";"subkey"="518";"value"="20373"}; +{"key"="2593";"subkey"="518";"value"="20382"}; +{"key"="2594";"subkey"="518";"value"="20391"}; +{"key"="2595";"subkey"="519";"value"="20400"}; +{"key"="2596";"subkey"="519";"value"="20409"}; +{"key"="2597";"subkey"="519";"value"="20417"}; +{"key"="2598";"subkey"="519";"value"="20426"}; +{"key"="2599";"subkey"="519";"value"="20435"}; +{"key"="2600";"subkey"="520";"value"="20444"}; +{"key"="2601";"subkey"="520";"value"="20453"}; +{"key"="2602";"subkey"="520";"value"="20462"}; +{"key"="2603";"subkey"="520";"value"="20471"}; +{"key"="2604";"subkey"="520";"value"="20479"}; +{"key"="2605";"subkey"="521";"value"="20488"}; +{"key"="2606";"subkey"="521";"value"="20497"}; +{"key"="2607";"subkey"="521";"value"="20506"}; +{"key"="2608";"subkey"="521";"value"="20515"}; +{"key"="2609";"subkey"="521";"value"="20524"}; +{"key"="2610";"subkey"="522";"value"="20533"}; +{"key"="2611";"subkey"="522";"value"="20542"}; +{"key"="2612";"subkey"="522";"value"="20550"}; +{"key"="2613";"subkey"="522";"value"="20559"}; +{"key"="2614";"subkey"="522";"value"="20568"}; +{"key"="2615";"subkey"="523";"value"="20577"}; +{"key"="2616";"subkey"="523";"value"="20586"}; +{"key"="2617";"subkey"="523";"value"="20595"}; +{"key"="2618";"subkey"="523";"value"="20604"}; +{"key"="2619";"subkey"="523";"value"="20612"}; +{"key"="2620";"subkey"="524";"value"="20621"}; +{"key"="2621";"subkey"="524";"value"="20630"}; +{"key"="2622";"subkey"="524";"value"="20639"}; +{"key"="2623";"subkey"="524";"value"="20648"}; +{"key"="2624";"subkey"="524";"value"="20657"}; +{"key"="2625";"subkey"="525";"value"="20666"}; +{"key"="2626";"subkey"="525";"value"="20675"}; +{"key"="2627";"subkey"="525";"value"="20683"}; +{"key"="2628";"subkey"="525";"value"="20692"}; +{"key"="2629";"subkey"="525";"value"="20701"}; +{"key"="2630";"subkey"="526";"value"="20710"}; +{"key"="2631";"subkey"="526";"value"="20719"}; +{"key"="2632";"subkey"="526";"value"="20728"}; +{"key"="2633";"subkey"="526";"value"="20737"}; +{"key"="2634";"subkey"="526";"value"="20746"}; +{"key"="2635";"subkey"="527";"value"="20754"}; +{"key"="2636";"subkey"="527";"value"="20763"}; +{"key"="2637";"subkey"="527";"value"="20772"}; +{"key"="2638";"subkey"="527";"value"="20781"}; +{"key"="2639";"subkey"="527";"value"="20790"}; +{"key"="2640";"subkey"="528";"value"="20799"}; +{"key"="2641";"subkey"="528";"value"="20808"}; +{"key"="2642";"subkey"="528";"value"="20817"}; +{"key"="2643";"subkey"="528";"value"="20825"}; +{"key"="2644";"subkey"="528";"value"="20834"}; +{"key"="2645";"subkey"="529";"value"="20843"}; +{"key"="2646";"subkey"="529";"value"="20852"}; +{"key"="2647";"subkey"="529";"value"="20861"}; +{"key"="2648";"subkey"="529";"value"="20870"}; +{"key"="2649";"subkey"="529";"value"="20879"}; +{"key"="2650";"subkey"="530";"value"="20888"}; +{"key"="2651";"subkey"="530";"value"="20897"}; +{"key"="2652";"subkey"="530";"value"="20905"}; +{"key"="2653";"subkey"="530";"value"="20914"}; +{"key"="2654";"subkey"="530";"value"="20923"}; +{"key"="2655";"subkey"="531";"value"="20932"}; +{"key"="2656";"subkey"="531";"value"="20941"}; +{"key"="2657";"subkey"="531";"value"="20950"}; +{"key"="2658";"subkey"="531";"value"="20959"}; +{"key"="2659";"subkey"="531";"value"="20968"}; +{"key"="2660";"subkey"="532";"value"="20976"}; +{"key"="2661";"subkey"="532";"value"="20985"}; +{"key"="2662";"subkey"="532";"value"="20994"}; +{"key"="2663";"subkey"="532";"value"="21003"}; +{"key"="2664";"subkey"="532";"value"="21012"}; +{"key"="2665";"subkey"="533";"value"="21021"}; +{"key"="2666";"subkey"="533";"value"="21030"}; +{"key"="2667";"subkey"="533";"value"="21039"}; +{"key"="2668";"subkey"="533";"value"="21048"}; +{"key"="2669";"subkey"="533";"value"="21056"}; +{"key"="2670";"subkey"="534";"value"="21065"}; +{"key"="2671";"subkey"="534";"value"="21074"}; +{"key"="2672";"subkey"="534";"value"="21083"}; +{"key"="2673";"subkey"="534";"value"="21092"}; +{"key"="2674";"subkey"="534";"value"="21101"}; +{"key"="2675";"subkey"="535";"value"="21110"}; +{"key"="2676";"subkey"="535";"value"="21119"}; +{"key"="2677";"subkey"="535";"value"="21128"}; +{"key"="2678";"subkey"="535";"value"="21136"}; +{"key"="2679";"subkey"="535";"value"="21145"}; +{"key"="2680";"subkey"="536";"value"="21154"}; +{"key"="2681";"subkey"="536";"value"="21163"}; +{"key"="2682";"subkey"="536";"value"="21172"}; +{"key"="2683";"subkey"="536";"value"="21181"}; +{"key"="2684";"subkey"="536";"value"="21190"}; +{"key"="2685";"subkey"="537";"value"="21199"}; +{"key"="2686";"subkey"="537";"value"="21208"}; +{"key"="2687";"subkey"="537";"value"="21217"}; +{"key"="2688";"subkey"="537";"value"="21225"}; +{"key"="2689";"subkey"="537";"value"="21234"}; +{"key"="2690";"subkey"="538";"value"="21243"}; +{"key"="2691";"subkey"="538";"value"="21252"}; +{"key"="2692";"subkey"="538";"value"="21261"}; +{"key"="2693";"subkey"="538";"value"="21270"}; +{"key"="2694";"subkey"="538";"value"="21279"}; +{"key"="2695";"subkey"="539";"value"="21288"}; +{"key"="2696";"subkey"="539";"value"="21297"}; +{"key"="2697";"subkey"="539";"value"="21306"}; +{"key"="2698";"subkey"="539";"value"="21314"}; +{"key"="2699";"subkey"="539";"value"="21323"}; +{"key"="2700";"subkey"="540";"value"="21332"}; +{"key"="2701";"subkey"="540";"value"="21341"}; +{"key"="2702";"subkey"="540";"value"="21350"}; +{"key"="2703";"subkey"="540";"value"="21359"}; +{"key"="2704";"subkey"="540";"value"="21368"}; +{"key"="2705";"subkey"="541";"value"="21377"}; +{"key"="2706";"subkey"="541";"value"="21386"}; +{"key"="2707";"subkey"="541";"value"="21395"}; +{"key"="2708";"subkey"="541";"value"="21403"}; +{"key"="2709";"subkey"="541";"value"="21412"}; +{"key"="2710";"subkey"="542";"value"="21421"}; +{"key"="2711";"subkey"="542";"value"="21430"}; +{"key"="2712";"subkey"="542";"value"="21439"}; +{"key"="2713";"subkey"="542";"value"="21448"}; +{"key"="2714";"subkey"="542";"value"="21457"}; +{"key"="2715";"subkey"="543";"value"="21466"}; +{"key"="2716";"subkey"="543";"value"="21475"}; +{"key"="2717";"subkey"="543";"value"="21484"}; +{"key"="2718";"subkey"="543";"value"="21492"}; +{"key"="2719";"subkey"="543";"value"="21501"}; +{"key"="2720";"subkey"="544";"value"="21510"}; +{"key"="2721";"subkey"="544";"value"="21519"}; +{"key"="2722";"subkey"="544";"value"="21528"}; +{"key"="2723";"subkey"="544";"value"="21537"}; +{"key"="2724";"subkey"="544";"value"="21546"}; +{"key"="2725";"subkey"="545";"value"="21555"}; +{"key"="2726";"subkey"="545";"value"="21564"}; +{"key"="2727";"subkey"="545";"value"="21573"}; +{"key"="2728";"subkey"="545";"value"="21582"}; +{"key"="2729";"subkey"="545";"value"="21591"}; +{"key"="2730";"subkey"="546";"value"="21599"}; +{"key"="2731";"subkey"="546";"value"="21608"}; +{"key"="2732";"subkey"="546";"value"="21617"}; +{"key"="2733";"subkey"="546";"value"="21626"}; +{"key"="2734";"subkey"="546";"value"="21635"}; +{"key"="2735";"subkey"="547";"value"="21644"}; +{"key"="2736";"subkey"="547";"value"="21653"}; +{"key"="2737";"subkey"="547";"value"="21662"}; +{"key"="2738";"subkey"="547";"value"="21671"}; +{"key"="2739";"subkey"="547";"value"="21680"}; +{"key"="2740";"subkey"="548";"value"="21689"}; +{"key"="2741";"subkey"="548";"value"="21697"}; +{"key"="2742";"subkey"="548";"value"="21706"}; +{"key"="2743";"subkey"="548";"value"="21715"}; +{"key"="2744";"subkey"="548";"value"="21724"}; +{"key"="2745";"subkey"="549";"value"="21733"}; +{"key"="2746";"subkey"="549";"value"="21742"}; +{"key"="2747";"subkey"="549";"value"="21751"}; +{"key"="2748";"subkey"="549";"value"="21760"}; +{"key"="2749";"subkey"="549";"value"="21769"}; +{"key"="2750";"subkey"="550";"value"="21778"}; +{"key"="2751";"subkey"="550";"value"="21787"}; +{"key"="2752";"subkey"="550";"value"="21796"}; +{"key"="2753";"subkey"="550";"value"="21804"}; +{"key"="2754";"subkey"="550";"value"="21813"}; +{"key"="2755";"subkey"="551";"value"="21822"}; +{"key"="2756";"subkey"="551";"value"="21831"}; +{"key"="2757";"subkey"="551";"value"="21840"}; +{"key"="2758";"subkey"="551";"value"="21849"}; +{"key"="2759";"subkey"="551";"value"="21858"}; +{"key"="2760";"subkey"="552";"value"="21867"}; +{"key"="2761";"subkey"="552";"value"="21876"}; +{"key"="2762";"subkey"="552";"value"="21885"}; +{"key"="2763";"subkey"="552";"value"="21894"}; +{"key"="2764";"subkey"="552";"value"="21903"}; +{"key"="2765";"subkey"="553";"value"="21912"}; +{"key"="2766";"subkey"="553";"value"="21920"}; +{"key"="2767";"subkey"="553";"value"="21929"}; +{"key"="2768";"subkey"="553";"value"="21938"}; +{"key"="2769";"subkey"="553";"value"="21947"}; +{"key"="2770";"subkey"="554";"value"="21956"}; +{"key"="2771";"subkey"="554";"value"="21965"}; +{"key"="2772";"subkey"="554";"value"="21974"}; +{"key"="2773";"subkey"="554";"value"="21983"}; +{"key"="2774";"subkey"="554";"value"="21992"}; +{"key"="2775";"subkey"="555";"value"="22001"}; +{"key"="2776";"subkey"="555";"value"="22010"}; +{"key"="2777";"subkey"="555";"value"="22019"}; +{"key"="2778";"subkey"="555";"value"="22028"}; +{"key"="2779";"subkey"="555";"value"="22037"}; +{"key"="2780";"subkey"="556";"value"="22045"}; +{"key"="2781";"subkey"="556";"value"="22054"}; +{"key"="2782";"subkey"="556";"value"="22063"}; +{"key"="2783";"subkey"="556";"value"="22072"}; +{"key"="2784";"subkey"="556";"value"="22081"}; +{"key"="2785";"subkey"="557";"value"="22090"}; +{"key"="2786";"subkey"="557";"value"="22099"}; +{"key"="2787";"subkey"="557";"value"="22108"}; +{"key"="2788";"subkey"="557";"value"="22117"}; +{"key"="2789";"subkey"="557";"value"="22126"}; +{"key"="2790";"subkey"="558";"value"="22135"}; +{"key"="2791";"subkey"="558";"value"="22144"}; +{"key"="2792";"subkey"="558";"value"="22153"}; +{"key"="2793";"subkey"="558";"value"="22162"}; +{"key"="2794";"subkey"="558";"value"="22171"}; +{"key"="2795";"subkey"="559";"value"="22179"}; +{"key"="2796";"subkey"="559";"value"="22188"}; +{"key"="2797";"subkey"="559";"value"="22197"}; +{"key"="2798";"subkey"="559";"value"="22206"}; +{"key"="2799";"subkey"="559";"value"="22215"}; +{"key"="2800";"subkey"="560";"value"="22224"}; +{"key"="2801";"subkey"="560";"value"="22233"}; +{"key"="2802";"subkey"="560";"value"="22242"}; +{"key"="2803";"subkey"="560";"value"="22251"}; +{"key"="2804";"subkey"="560";"value"="22260"}; +{"key"="2805";"subkey"="561";"value"="22269"}; +{"key"="2806";"subkey"="561";"value"="22278"}; +{"key"="2807";"subkey"="561";"value"="22287"}; +{"key"="2808";"subkey"="561";"value"="22296"}; +{"key"="2809";"subkey"="561";"value"="22305"}; +{"key"="2810";"subkey"="562";"value"="22314"}; +{"key"="2811";"subkey"="562";"value"="22322"}; +{"key"="2812";"subkey"="562";"value"="22331"}; +{"key"="2813";"subkey"="562";"value"="22340"}; +{"key"="2814";"subkey"="562";"value"="22349"}; +{"key"="2815";"subkey"="563";"value"="22358"}; +{"key"="2816";"subkey"="563";"value"="22367"}; +{"key"="2817";"subkey"="563";"value"="22376"}; +{"key"="2818";"subkey"="563";"value"="22385"}; +{"key"="2819";"subkey"="563";"value"="22394"}; +{"key"="2820";"subkey"="564";"value"="22403"}; +{"key"="2821";"subkey"="564";"value"="22412"}; +{"key"="2822";"subkey"="564";"value"="22421"}; +{"key"="2823";"subkey"="564";"value"="22430"}; +{"key"="2824";"subkey"="564";"value"="22439"}; +{"key"="2825";"subkey"="565";"value"="22448"}; +{"key"="2826";"subkey"="565";"value"="22457"}; +{"key"="2827";"subkey"="565";"value"="22466"}; +{"key"="2828";"subkey"="565";"value"="22475"}; +{"key"="2829";"subkey"="565";"value"="22483"}; +{"key"="2830";"subkey"="566";"value"="22492"}; +{"key"="2831";"subkey"="566";"value"="22501"}; +{"key"="2832";"subkey"="566";"value"="22510"}; +{"key"="2833";"subkey"="566";"value"="22519"}; +{"key"="2834";"subkey"="566";"value"="22528"}; +{"key"="2835";"subkey"="567";"value"="22537"}; +{"key"="2836";"subkey"="567";"value"="22546"}; +{"key"="2837";"subkey"="567";"value"="22555"}; +{"key"="2838";"subkey"="567";"value"="22564"}; +{"key"="2839";"subkey"="567";"value"="22573"}; +{"key"="2840";"subkey"="568";"value"="22582"}; +{"key"="2841";"subkey"="568";"value"="22591"}; +{"key"="2842";"subkey"="568";"value"="22600"}; +{"key"="2843";"subkey"="568";"value"="22609"}; +{"key"="2844";"subkey"="568";"value"="22618"}; +{"key"="2845";"subkey"="569";"value"="22627"}; +{"key"="2846";"subkey"="569";"value"="22636"}; +{"key"="2847";"subkey"="569";"value"="22645"}; +{"key"="2848";"subkey"="569";"value"="22654"}; +{"key"="2849";"subkey"="569";"value"="22663"}; +{"key"="2850";"subkey"="570";"value"="22671"}; +{"key"="2851";"subkey"="570";"value"="22680"}; +{"key"="2852";"subkey"="570";"value"="22689"}; +{"key"="2853";"subkey"="570";"value"="22698"}; +{"key"="2854";"subkey"="570";"value"="22707"}; +{"key"="2855";"subkey"="571";"value"="22716"}; +{"key"="2856";"subkey"="571";"value"="22725"}; +{"key"="2857";"subkey"="571";"value"="22734"}; +{"key"="2858";"subkey"="571";"value"="22743"}; +{"key"="2859";"subkey"="571";"value"="22752"}; +{"key"="2860";"subkey"="572";"value"="22761"}; +{"key"="2861";"subkey"="572";"value"="22770"}; +{"key"="2862";"subkey"="572";"value"="22779"}; +{"key"="2863";"subkey"="572";"value"="22788"}; +{"key"="2864";"subkey"="572";"value"="22797"}; +{"key"="2865";"subkey"="573";"value"="22806"}; +{"key"="2866";"subkey"="573";"value"="22815"}; +{"key"="2867";"subkey"="573";"value"="22824"}; +{"key"="2868";"subkey"="573";"value"="22833"}; +{"key"="2869";"subkey"="573";"value"="22842"}; +{"key"="2870";"subkey"="574";"value"="22851"}; +{"key"="2871";"subkey"="574";"value"="22860"}; +{"key"="2872";"subkey"="574";"value"="22869"}; +{"key"="2873";"subkey"="574";"value"="22878"}; +{"key"="2874";"subkey"="574";"value"="22886"}; +{"key"="2875";"subkey"="575";"value"="22895"}; +{"key"="2876";"subkey"="575";"value"="22904"}; +{"key"="2877";"subkey"="575";"value"="22913"}; +{"key"="2878";"subkey"="575";"value"="22922"}; +{"key"="2879";"subkey"="575";"value"="22931"}; +{"key"="2880";"subkey"="576";"value"="22940"}; +{"key"="2881";"subkey"="576";"value"="22949"}; +{"key"="2882";"subkey"="576";"value"="22958"}; +{"key"="2883";"subkey"="576";"value"="22967"}; +{"key"="2884";"subkey"="576";"value"="22976"}; +{"key"="2885";"subkey"="577";"value"="22985"}; +{"key"="2886";"subkey"="577";"value"="22994"}; +{"key"="2887";"subkey"="577";"value"="23003"}; +{"key"="2888";"subkey"="577";"value"="23012"}; +{"key"="2889";"subkey"="577";"value"="23021"}; +{"key"="2890";"subkey"="578";"value"="23030"}; +{"key"="2891";"subkey"="578";"value"="23039"}; +{"key"="2892";"subkey"="578";"value"="23048"}; +{"key"="2893";"subkey"="578";"value"="23057"}; +{"key"="2894";"subkey"="578";"value"="23066"}; +{"key"="2895";"subkey"="579";"value"="23075"}; +{"key"="2896";"subkey"="579";"value"="23084"}; +{"key"="2897";"subkey"="579";"value"="23093"}; +{"key"="2898";"subkey"="579";"value"="23102"}; +{"key"="2899";"subkey"="579";"value"="23111"}; +{"key"="2900";"subkey"="580";"value"="23120"}; +{"key"="2901";"subkey"="580";"value"="23129"}; +{"key"="2902";"subkey"="580";"value"="23138"}; +{"key"="2903";"subkey"="580";"value"="23147"}; +{"key"="2904";"subkey"="580";"value"="23156"}; +{"key"="2905";"subkey"="581";"value"="23165"}; +{"key"="2906";"subkey"="581";"value"="23173"}; +{"key"="2907";"subkey"="581";"value"="23182"}; +{"key"="2908";"subkey"="581";"value"="23191"}; +{"key"="2909";"subkey"="581";"value"="23200"}; +{"key"="2910";"subkey"="582";"value"="23209"}; +{"key"="2911";"subkey"="582";"value"="23218"}; +{"key"="2912";"subkey"="582";"value"="23227"}; +{"key"="2913";"subkey"="582";"value"="23236"}; +{"key"="2914";"subkey"="582";"value"="23245"}; +{"key"="2915";"subkey"="583";"value"="23254"}; +{"key"="2916";"subkey"="583";"value"="23263"}; +{"key"="2917";"subkey"="583";"value"="23272"}; +{"key"="2918";"subkey"="583";"value"="23281"}; +{"key"="2919";"subkey"="583";"value"="23290"}; +{"key"="2920";"subkey"="584";"value"="23299"}; +{"key"="2921";"subkey"="584";"value"="23308"}; +{"key"="2922";"subkey"="584";"value"="23317"}; +{"key"="2923";"subkey"="584";"value"="23326"}; +{"key"="2924";"subkey"="584";"value"="23335"}; +{"key"="2925";"subkey"="585";"value"="23344"}; +{"key"="2926";"subkey"="585";"value"="23353"}; +{"key"="2927";"subkey"="585";"value"="23362"}; +{"key"="2928";"subkey"="585";"value"="23371"}; +{"key"="2929";"subkey"="585";"value"="23380"}; +{"key"="2930";"subkey"="586";"value"="23389"}; +{"key"="2931";"subkey"="586";"value"="23398"}; +{"key"="2932";"subkey"="586";"value"="23407"}; +{"key"="2933";"subkey"="586";"value"="23416"}; +{"key"="2934";"subkey"="586";"value"="23425"}; +{"key"="2935";"subkey"="587";"value"="23434"}; +{"key"="2936";"subkey"="587";"value"="23443"}; +{"key"="2937";"subkey"="587";"value"="23452"}; +{"key"="2938";"subkey"="587";"value"="23461"}; +{"key"="2939";"subkey"="587";"value"="23470"}; +{"key"="2940";"subkey"="588";"value"="23479"}; +{"key"="2941";"subkey"="588";"value"="23488"}; +{"key"="2942";"subkey"="588";"value"="23497"}; +{"key"="2943";"subkey"="588";"value"="23506"}; +{"key"="2944";"subkey"="588";"value"="23515"}; +{"key"="2945";"subkey"="589";"value"="23524"}; +{"key"="2946";"subkey"="589";"value"="23533"}; +{"key"="2947";"subkey"="589";"value"="23542"}; +{"key"="2948";"subkey"="589";"value"="23551"}; +{"key"="2949";"subkey"="589";"value"="23560"}; +{"key"="2950";"subkey"="590";"value"="23569"}; +{"key"="2951";"subkey"="590";"value"="23578"}; +{"key"="2952";"subkey"="590";"value"="23587"}; +{"key"="2953";"subkey"="590";"value"="23596"}; +{"key"="2954";"subkey"="590";"value"="23605"}; +{"key"="2955";"subkey"="591";"value"="23614"}; +{"key"="2956";"subkey"="591";"value"="23623"}; +{"key"="2957";"subkey"="591";"value"="23632"}; +{"key"="2958";"subkey"="591";"value"="23641"}; +{"key"="2959";"subkey"="591";"value"="23650"}; +{"key"="2960";"subkey"="592";"value"="23659"}; +{"key"="2961";"subkey"="592";"value"="23668"}; +{"key"="2962";"subkey"="592";"value"="23677"}; +{"key"="2963";"subkey"="592";"value"="23686"}; +{"key"="2964";"subkey"="592";"value"="23695"}; +{"key"="2965";"subkey"="593";"value"="23704"}; +{"key"="2966";"subkey"="593";"value"="23713"}; +{"key"="2967";"subkey"="593";"value"="23722"}; +{"key"="2968";"subkey"="593";"value"="23731"}; +{"key"="2969";"subkey"="593";"value"="23740"}; +{"key"="2970";"subkey"="594";"value"="23749"}; +{"key"="2971";"subkey"="594";"value"="23758"}; +{"key"="2972";"subkey"="594";"value"="23767"}; +{"key"="2973";"subkey"="594";"value"="23776"}; +{"key"="2974";"subkey"="594";"value"="23785"}; +{"key"="2975";"subkey"="595";"value"="23794"}; +{"key"="2976";"subkey"="595";"value"="23803"}; +{"key"="2977";"subkey"="595";"value"="23812"}; +{"key"="2978";"subkey"="595";"value"="23821"}; +{"key"="2979";"subkey"="595";"value"="23830"}; +{"key"="2980";"subkey"="596";"value"="23839"}; +{"key"="2981";"subkey"="596";"value"="23848"}; +{"key"="2982";"subkey"="596";"value"="23857"}; +{"key"="2983";"subkey"="596";"value"="23866"}; +{"key"="2984";"subkey"="596";"value"="23875"}; +{"key"="2985";"subkey"="597";"value"="23884"}; +{"key"="2986";"subkey"="597";"value"="23893"}; +{"key"="2987";"subkey"="597";"value"="23902"}; +{"key"="2988";"subkey"="597";"value"="23911"}; +{"key"="2989";"subkey"="597";"value"="23920"}; +{"key"="2990";"subkey"="598";"value"="23929"}; +{"key"="2991";"subkey"="598";"value"="23938"}; +{"key"="2992";"subkey"="598";"value"="23947"}; +{"key"="2993";"subkey"="598";"value"="23956"}; +{"key"="2994";"subkey"="598";"value"="23965"}; +{"key"="2995";"subkey"="599";"value"="23974"}; +{"key"="2996";"subkey"="599";"value"="23983"}; +{"key"="2997";"subkey"="599";"value"="23992"}; +{"key"="2998";"subkey"="599";"value"="24001"}; +{"key"="2999";"subkey"="599";"value"="24010"}; +{"key"="3000";"subkey"="600";"value"="24019"}; +{"key"="3001";"subkey"="600";"value"="24028"}; +{"key"="3002";"subkey"="600";"value"="24037"}; +{"key"="3003";"subkey"="600";"value"="24046"}; +{"key"="3004";"subkey"="600";"value"="24055"}; +{"key"="3005";"subkey"="601";"value"="24064"}; +{"key"="3006";"subkey"="601";"value"="24073"}; +{"key"="3007";"subkey"="601";"value"="24082"}; +{"key"="3008";"subkey"="601";"value"="24091"}; +{"key"="3009";"subkey"="601";"value"="24100"}; +{"key"="3010";"subkey"="602";"value"="24109"}; +{"key"="3011";"subkey"="602";"value"="24118"}; +{"key"="3012";"subkey"="602";"value"="24127"}; +{"key"="3013";"subkey"="602";"value"="24136"}; +{"key"="3014";"subkey"="602";"value"="24145"}; +{"key"="3015";"subkey"="603";"value"="24154"}; +{"key"="3016";"subkey"="603";"value"="24163"}; +{"key"="3017";"subkey"="603";"value"="24172"}; +{"key"="3018";"subkey"="603";"value"="24181"}; +{"key"="3019";"subkey"="603";"value"="24190"}; +{"key"="3020";"subkey"="604";"value"="24199"}; +{"key"="3021";"subkey"="604";"value"="24208"}; +{"key"="3022";"subkey"="604";"value"="24217"}; +{"key"="3023";"subkey"="604";"value"="24226"}; +{"key"="3024";"subkey"="604";"value"="24235"}; +{"key"="3025";"subkey"="605";"value"="24244"}; +{"key"="3026";"subkey"="605";"value"="24253"}; +{"key"="3027";"subkey"="605";"value"="24262"}; +{"key"="3028";"subkey"="605";"value"="24271"}; +{"key"="3029";"subkey"="605";"value"="24280"}; +{"key"="3030";"subkey"="606";"value"="24289"}; +{"key"="3031";"subkey"="606";"value"="24298"}; +{"key"="3032";"subkey"="606";"value"="24307"}; +{"key"="3033";"subkey"="606";"value"="24316"}; +{"key"="3034";"subkey"="606";"value"="24325"}; +{"key"="3035";"subkey"="607";"value"="24334"}; +{"key"="3036";"subkey"="607";"value"="24343"}; +{"key"="3037";"subkey"="607";"value"="24352"}; +{"key"="3038";"subkey"="607";"value"="24361"}; +{"key"="3039";"subkey"="607";"value"="24370"}; +{"key"="3040";"subkey"="608";"value"="24379"}; +{"key"="3041";"subkey"="608";"value"="24388"}; +{"key"="3042";"subkey"="608";"value"="24397"}; +{"key"="3043";"subkey"="608";"value"="24406"}; +{"key"="3044";"subkey"="608";"value"="24415"}; +{"key"="3045";"subkey"="609";"value"="24424"}; +{"key"="3046";"subkey"="609";"value"="24433"}; +{"key"="3047";"subkey"="609";"value"="24442"}; +{"key"="3048";"subkey"="609";"value"="24451"}; +{"key"="3049";"subkey"="609";"value"="24460"}; +{"key"="3050";"subkey"="610";"value"="24469"}; +{"key"="3051";"subkey"="610";"value"="24478"}; +{"key"="3052";"subkey"="610";"value"="24487"}; +{"key"="3053";"subkey"="610";"value"="24496"}; +{"key"="3054";"subkey"="610";"value"="24505"}; +{"key"="3055";"subkey"="611";"value"="24514"}; +{"key"="3056";"subkey"="611";"value"="24523"}; +{"key"="3057";"subkey"="611";"value"="24533"}; +{"key"="3058";"subkey"="611";"value"="24542"}; +{"key"="3059";"subkey"="611";"value"="24551"}; +{"key"="3060";"subkey"="612";"value"="24560"}; +{"key"="3061";"subkey"="612";"value"="24569"}; +{"key"="3062";"subkey"="612";"value"="24578"}; +{"key"="3063";"subkey"="612";"value"="24587"}; +{"key"="3064";"subkey"="612";"value"="24596"}; +{"key"="3065";"subkey"="613";"value"="24605"}; +{"key"="3066";"subkey"="613";"value"="24614"}; +{"key"="3067";"subkey"="613";"value"="24623"}; +{"key"="3068";"subkey"="613";"value"="24632"}; +{"key"="3069";"subkey"="613";"value"="24641"}; +{"key"="3070";"subkey"="614";"value"="24650"}; +{"key"="3071";"subkey"="614";"value"="24659"}; +{"key"="3072";"subkey"="614";"value"="24668"}; +{"key"="3073";"subkey"="614";"value"="24677"}; +{"key"="3074";"subkey"="614";"value"="24686"}; +{"key"="3075";"subkey"="615";"value"="24695"}; +{"key"="3076";"subkey"="615";"value"="24704"}; +{"key"="3077";"subkey"="615";"value"="24713"}; +{"key"="3078";"subkey"="615";"value"="24722"}; +{"key"="3079";"subkey"="615";"value"="24731"}; +{"key"="3080";"subkey"="616";"value"="24740"}; +{"key"="3081";"subkey"="616";"value"="24749"}; +{"key"="3082";"subkey"="616";"value"="24758"}; +{"key"="3083";"subkey"="616";"value"="24767"}; +{"key"="3084";"subkey"="616";"value"="24776"}; +{"key"="3085";"subkey"="617";"value"="24785"}; +{"key"="3086";"subkey"="617";"value"="24794"}; +{"key"="3087";"subkey"="617";"value"="24803"}; +{"key"="3088";"subkey"="617";"value"="24812"}; +{"key"="3089";"subkey"="617";"value"="24821"}; +{"key"="3090";"subkey"="618";"value"="24831"}; +{"key"="3091";"subkey"="618";"value"="24840"}; +{"key"="3092";"subkey"="618";"value"="24849"}; +{"key"="3093";"subkey"="618";"value"="24858"}; +{"key"="3094";"subkey"="618";"value"="24867"}; +{"key"="3095";"subkey"="619";"value"="24876"}; +{"key"="3096";"subkey"="619";"value"="24885"}; +{"key"="3097";"subkey"="619";"value"="24894"}; +{"key"="3098";"subkey"="619";"value"="24903"}; +{"key"="3099";"subkey"="619";"value"="24912"}; +{"key"="3100";"subkey"="620";"value"="24921"}; +{"key"="3101";"subkey"="620";"value"="24930"}; +{"key"="3102";"subkey"="620";"value"="24939"}; +{"key"="3103";"subkey"="620";"value"="24948"}; +{"key"="3104";"subkey"="620";"value"="24957"}; +{"key"="3105";"subkey"="621";"value"="24966"}; +{"key"="3106";"subkey"="621";"value"="24975"}; +{"key"="3107";"subkey"="621";"value"="24984"}; +{"key"="3108";"subkey"="621";"value"="24993"}; +{"key"="3109";"subkey"="621";"value"="25002"}; +{"key"="3110";"subkey"="622";"value"="25011"}; +{"key"="3111";"subkey"="622";"value"="25020"}; +{"key"="3112";"subkey"="622";"value"="25029"}; +{"key"="3113";"subkey"="622";"value"="25038"}; +{"key"="3114";"subkey"="622";"value"="25047"}; +{"key"="3115";"subkey"="623";"value"="25057"}; +{"key"="3116";"subkey"="623";"value"="25066"}; +{"key"="3117";"subkey"="623";"value"="25075"}; +{"key"="3118";"subkey"="623";"value"="25084"}; +{"key"="3119";"subkey"="623";"value"="25093"}; +{"key"="3120";"subkey"="624";"value"="25102"}; +{"key"="3121";"subkey"="624";"value"="25111"}; +{"key"="3122";"subkey"="624";"value"="25120"}; +{"key"="3123";"subkey"="624";"value"="25129"}; +{"key"="3124";"subkey"="624";"value"="25138"}; +{"key"="3125";"subkey"="625";"value"="25147"}; +{"key"="3126";"subkey"="625";"value"="25156"}; +{"key"="3127";"subkey"="625";"value"="25165"}; +{"key"="3128";"subkey"="625";"value"="25174"}; +{"key"="3129";"subkey"="625";"value"="25183"}; +{"key"="3130";"subkey"="626";"value"="25192"}; +{"key"="3131";"subkey"="626";"value"="25201"}; +{"key"="3132";"subkey"="626";"value"="25210"}; +{"key"="3133";"subkey"="626";"value"="25219"}; +{"key"="3134";"subkey"="626";"value"="25228"}; +{"key"="3135";"subkey"="627";"value"="25237"}; +{"key"="3136";"subkey"="627";"value"="25247"}; +{"key"="3137";"subkey"="627";"value"="25256"}; +{"key"="3138";"subkey"="627";"value"="25265"}; +{"key"="3139";"subkey"="627";"value"="25274"}; +{"key"="3140";"subkey"="628";"value"="25283"}; +{"key"="3141";"subkey"="628";"value"="25292"}; +{"key"="3142";"subkey"="628";"value"="25301"}; +{"key"="3143";"subkey"="628";"value"="25310"}; +{"key"="3144";"subkey"="628";"value"="25319"}; +{"key"="3145";"subkey"="629";"value"="25328"}; +{"key"="3146";"subkey"="629";"value"="25337"}; +{"key"="3147";"subkey"="629";"value"="25346"}; +{"key"="3148";"subkey"="629";"value"="25355"}; +{"key"="3149";"subkey"="629";"value"="25364"}; +{"key"="3150";"subkey"="630";"value"="25373"}; +{"key"="3151";"subkey"="630";"value"="25382"}; +{"key"="3152";"subkey"="630";"value"="25391"}; +{"key"="3153";"subkey"="630";"value"="25400"}; +{"key"="3154";"subkey"="630";"value"="25409"}; +{"key"="3155";"subkey"="631";"value"="25419"}; +{"key"="3156";"subkey"="631";"value"="25428"}; +{"key"="3157";"subkey"="631";"value"="25437"}; +{"key"="3158";"subkey"="631";"value"="25446"}; +{"key"="3159";"subkey"="631";"value"="25455"}; +{"key"="3160";"subkey"="632";"value"="25464"}; +{"key"="3161";"subkey"="632";"value"="25473"}; +{"key"="3162";"subkey"="632";"value"="25482"}; +{"key"="3163";"subkey"="632";"value"="25491"}; +{"key"="3164";"subkey"="632";"value"="25500"}; +{"key"="3165";"subkey"="633";"value"="25509"}; +{"key"="3166";"subkey"="633";"value"="25518"}; +{"key"="3167";"subkey"="633";"value"="25527"}; +{"key"="3168";"subkey"="633";"value"="25536"}; +{"key"="3169";"subkey"="633";"value"="25545"}; +{"key"="3170";"subkey"="634";"value"="25554"}; +{"key"="3171";"subkey"="634";"value"="25563"}; +{"key"="3172";"subkey"="634";"value"="25573"}; +{"key"="3173";"subkey"="634";"value"="25582"}; +{"key"="3174";"subkey"="634";"value"="25591"}; +{"key"="3175";"subkey"="635";"value"="25600"}; +{"key"="3176";"subkey"="635";"value"="25609"}; +{"key"="3177";"subkey"="635";"value"="25618"}; +{"key"="3178";"subkey"="635";"value"="25627"}; +{"key"="3179";"subkey"="635";"value"="25636"}; +{"key"="3180";"subkey"="636";"value"="25645"}; +{"key"="3181";"subkey"="636";"value"="25654"}; +{"key"="3182";"subkey"="636";"value"="25663"}; +{"key"="3183";"subkey"="636";"value"="25672"}; +{"key"="3184";"subkey"="636";"value"="25681"}; +{"key"="3185";"subkey"="637";"value"="25690"}; +{"key"="3186";"subkey"="637";"value"="25699"}; +{"key"="3187";"subkey"="637";"value"="25709"}; +{"key"="3188";"subkey"="637";"value"="25718"}; +{"key"="3189";"subkey"="637";"value"="25727"}; +{"key"="3190";"subkey"="638";"value"="25736"}; +{"key"="3191";"subkey"="638";"value"="25745"}; +{"key"="3192";"subkey"="638";"value"="25754"}; +{"key"="3193";"subkey"="638";"value"="25763"}; +{"key"="3194";"subkey"="638";"value"="25772"}; +{"key"="3195";"subkey"="639";"value"="25781"}; +{"key"="3196";"subkey"="639";"value"="25790"}; +{"key"="3197";"subkey"="639";"value"="25799"}; +{"key"="3198";"subkey"="639";"value"="25808"}; +{"key"="3199";"subkey"="639";"value"="25817"}; +{"key"="3200";"subkey"="640";"value"="25826"}; +{"key"="3201";"subkey"="640";"value"="25835"}; +{"key"="3202";"subkey"="640";"value"="25845"}; +{"key"="3203";"subkey"="640";"value"="25854"}; +{"key"="3204";"subkey"="640";"value"="25863"}; +{"key"="3205";"subkey"="641";"value"="25872"}; +{"key"="3206";"subkey"="641";"value"="25881"}; +{"key"="3207";"subkey"="641";"value"="25890"}; +{"key"="3208";"subkey"="641";"value"="25899"}; +{"key"="3209";"subkey"="641";"value"="25908"}; +{"key"="3210";"subkey"="642";"value"="25917"}; +{"key"="3211";"subkey"="642";"value"="25926"}; +{"key"="3212";"subkey"="642";"value"="25935"}; +{"key"="3213";"subkey"="642";"value"="25944"}; +{"key"="3214";"subkey"="642";"value"="25953"}; +{"key"="3215";"subkey"="643";"value"="25962"}; +{"key"="3216";"subkey"="643";"value"="25972"}; +{"key"="3217";"subkey"="643";"value"="25981"}; +{"key"="3218";"subkey"="643";"value"="25990"}; +{"key"="3219";"subkey"="643";"value"="25999"}; +{"key"="3220";"subkey"="644";"value"="26008"}; +{"key"="3221";"subkey"="644";"value"="26017"}; +{"key"="3222";"subkey"="644";"value"="26026"}; +{"key"="3223";"subkey"="644";"value"="26035"}; +{"key"="3224";"subkey"="644";"value"="26044"}; +{"key"="3225";"subkey"="645";"value"="26053"}; +{"key"="3226";"subkey"="645";"value"="26062"}; +{"key"="3227";"subkey"="645";"value"="26071"}; +{"key"="3228";"subkey"="645";"value"="26081"}; +{"key"="3229";"subkey"="645";"value"="26090"}; +{"key"="3230";"subkey"="646";"value"="26099"}; +{"key"="3231";"subkey"="646";"value"="26108"}; +{"key"="3232";"subkey"="646";"value"="26117"}; +{"key"="3233";"subkey"="646";"value"="26126"}; +{"key"="3234";"subkey"="646";"value"="26135"}; +{"key"="3235";"subkey"="647";"value"="26144"}; +{"key"="3236";"subkey"="647";"value"="26153"}; +{"key"="3237";"subkey"="647";"value"="26162"}; +{"key"="3238";"subkey"="647";"value"="26171"}; +{"key"="3239";"subkey"="647";"value"="26180"}; +{"key"="3240";"subkey"="648";"value"="26189"}; +{"key"="3241";"subkey"="648";"value"="26199"}; +{"key"="3242";"subkey"="648";"value"="26208"}; +{"key"="3243";"subkey"="648";"value"="26217"}; +{"key"="3244";"subkey"="648";"value"="26226"}; +{"key"="3245";"subkey"="649";"value"="26235"}; +{"key"="3246";"subkey"="649";"value"="26244"}; +{"key"="3247";"subkey"="649";"value"="26253"}; +{"key"="3248";"subkey"="649";"value"="26262"}; +{"key"="3249";"subkey"="649";"value"="26271"}; +{"key"="3250";"subkey"="650";"value"="26280"}; +{"key"="3251";"subkey"="650";"value"="26289"}; +{"key"="3252";"subkey"="650";"value"="26299"}; +{"key"="3253";"subkey"="650";"value"="26308"}; +{"key"="3254";"subkey"="650";"value"="26317"}; +{"key"="3255";"subkey"="651";"value"="26326"}; +{"key"="3256";"subkey"="651";"value"="26335"}; +{"key"="3257";"subkey"="651";"value"="26344"}; +{"key"="3258";"subkey"="651";"value"="26353"}; +{"key"="3259";"subkey"="651";"value"="26362"}; +{"key"="3260";"subkey"="652";"value"="26371"}; +{"key"="3261";"subkey"="652";"value"="26380"}; +{"key"="3262";"subkey"="652";"value"="26389"}; +{"key"="3263";"subkey"="652";"value"="26398"}; +{"key"="3264";"subkey"="652";"value"="26408"}; +{"key"="3265";"subkey"="653";"value"="26417"}; +{"key"="3266";"subkey"="653";"value"="26426"}; +{"key"="3267";"subkey"="653";"value"="26435"}; +{"key"="3268";"subkey"="653";"value"="26444"}; +{"key"="3269";"subkey"="653";"value"="26453"}; +{"key"="3270";"subkey"="654";"value"="26462"}; +{"key"="3271";"subkey"="654";"value"="26471"}; +{"key"="3272";"subkey"="654";"value"="26480"}; +{"key"="3273";"subkey"="654";"value"="26489"}; +{"key"="3274";"subkey"="654";"value"="26498"}; +{"key"="3275";"subkey"="655";"value"="26508"}; +{"key"="3276";"subkey"="655";"value"="26517"}; +{"key"="3277";"subkey"="655";"value"="26526"}; +{"key"="3278";"subkey"="655";"value"="26535"}; +{"key"="3279";"subkey"="655";"value"="26544"}; +{"key"="3280";"subkey"="656";"value"="26553"}; +{"key"="3281";"subkey"="656";"value"="26562"}; +{"key"="3282";"subkey"="656";"value"="26571"}; +{"key"="3283";"subkey"="656";"value"="26580"}; +{"key"="3284";"subkey"="656";"value"="26589"}; +{"key"="3285";"subkey"="657";"value"="26599"}; +{"key"="3286";"subkey"="657";"value"="26608"}; +{"key"="3287";"subkey"="657";"value"="26617"}; +{"key"="3288";"subkey"="657";"value"="26626"}; +{"key"="3289";"subkey"="657";"value"="26635"}; +{"key"="3290";"subkey"="658";"value"="26644"}; +{"key"="3291";"subkey"="658";"value"="26653"}; +{"key"="3292";"subkey"="658";"value"="26662"}; +{"key"="3293";"subkey"="658";"value"="26671"}; +{"key"="3294";"subkey"="658";"value"="26680"}; +{"key"="3295";"subkey"="659";"value"="26690"}; +{"key"="3296";"subkey"="659";"value"="26699"}; +{"key"="3297";"subkey"="659";"value"="26708"}; +{"key"="3298";"subkey"="659";"value"="26717"}; +{"key"="3299";"subkey"="659";"value"="26726"}; +{"key"="3300";"subkey"="660";"value"="26735"}; +{"key"="3301";"subkey"="660";"value"="26744"}; +{"key"="3302";"subkey"="660";"value"="26753"}; +{"key"="3303";"subkey"="660";"value"="26762"}; +{"key"="3304";"subkey"="660";"value"="26771"}; +{"key"="3305";"subkey"="661";"value"="26781"}; +{"key"="3306";"subkey"="661";"value"="26790"}; +{"key"="3307";"subkey"="661";"value"="26799"}; +{"key"="3308";"subkey"="661";"value"="26808"}; +{"key"="3309";"subkey"="661";"value"="26817"}; +{"key"="3310";"subkey"="662";"value"="26826"}; +{"key"="3311";"subkey"="662";"value"="26835"}; +{"key"="3312";"subkey"="662";"value"="26844"}; +{"key"="3313";"subkey"="662";"value"="26853"}; +{"key"="3314";"subkey"="662";"value"="26862"}; +{"key"="3315";"subkey"="663";"value"="26872"}; +{"key"="3316";"subkey"="663";"value"="26881"}; +{"key"="3317";"subkey"="663";"value"="26890"}; +{"key"="3318";"subkey"="663";"value"="26899"}; +{"key"="3319";"subkey"="663";"value"="26908"}; +{"key"="3320";"subkey"="664";"value"="26917"}; +{"key"="3321";"subkey"="664";"value"="26926"}; +{"key"="3322";"subkey"="664";"value"="26935"}; +{"key"="3323";"subkey"="664";"value"="26944"}; +{"key"="3324";"subkey"="664";"value"="26954"}; +{"key"="3325";"subkey"="665";"value"="26963"}; +{"key"="3326";"subkey"="665";"value"="26972"}; +{"key"="3327";"subkey"="665";"value"="26981"}; +{"key"="3328";"subkey"="665";"value"="26990"}; +{"key"="3329";"subkey"="665";"value"="26999"}; +{"key"="3330";"subkey"="666";"value"="27008"}; +{"key"="3331";"subkey"="666";"value"="27017"}; +{"key"="3332";"subkey"="666";"value"="27026"}; +{"key"="3333";"subkey"="666";"value"="27036"}; +{"key"="3334";"subkey"="666";"value"="27045"}; +{"key"="3335";"subkey"="667";"value"="27054"}; +{"key"="3336";"subkey"="667";"value"="27063"}; +{"key"="3337";"subkey"="667";"value"="27072"}; +{"key"="3338";"subkey"="667";"value"="27081"}; +{"key"="3339";"subkey"="667";"value"="27090"}; +{"key"="3340";"subkey"="668";"value"="27099"}; +{"key"="3341";"subkey"="668";"value"="27108"}; +{"key"="3342";"subkey"="668";"value"="27118"}; +{"key"="3343";"subkey"="668";"value"="27127"}; +{"key"="3344";"subkey"="668";"value"="27136"}; +{"key"="3345";"subkey"="669";"value"="27145"}; +{"key"="3346";"subkey"="669";"value"="27154"}; +{"key"="3347";"subkey"="669";"value"="27163"}; +{"key"="3348";"subkey"="669";"value"="27172"}; +{"key"="3349";"subkey"="669";"value"="27181"}; +{"key"="3350";"subkey"="670";"value"="27190"}; +{"key"="3351";"subkey"="670";"value"="27200"}; +{"key"="3352";"subkey"="670";"value"="27209"}; +{"key"="3353";"subkey"="670";"value"="27218"}; +{"key"="3354";"subkey"="670";"value"="27227"}; +{"key"="3355";"subkey"="671";"value"="27236"}; +{"key"="3356";"subkey"="671";"value"="27245"}; +{"key"="3357";"subkey"="671";"value"="27254"}; +{"key"="3358";"subkey"="671";"value"="27263"}; +{"key"="3359";"subkey"="671";"value"="27273"}; +{"key"="3360";"subkey"="672";"value"="27282"}; +{"key"="3361";"subkey"="672";"value"="27291"}; +{"key"="3362";"subkey"="672";"value"="27300"}; +{"key"="3363";"subkey"="672";"value"="27309"}; +{"key"="3364";"subkey"="672";"value"="27318"}; +{"key"="3365";"subkey"="673";"value"="27327"}; +{"key"="3366";"subkey"="673";"value"="27336"}; +{"key"="3367";"subkey"="673";"value"="27346"}; +{"key"="3368";"subkey"="673";"value"="27355"}; +{"key"="3369";"subkey"="673";"value"="27364"}; +{"key"="3370";"subkey"="674";"value"="27373"}; +{"key"="3371";"subkey"="674";"value"="27382"}; +{"key"="3372";"subkey"="674";"value"="27391"}; +{"key"="3373";"subkey"="674";"value"="27400"}; +{"key"="3374";"subkey"="674";"value"="27409"}; +{"key"="3375";"subkey"="675";"value"="27419"}; +{"key"="3376";"subkey"="675";"value"="27428"}; +{"key"="3377";"subkey"="675";"value"="27437"}; +{"key"="3378";"subkey"="675";"value"="27446"}; +{"key"="3379";"subkey"="675";"value"="27455"}; +{"key"="3380";"subkey"="676";"value"="27464"}; +{"key"="3381";"subkey"="676";"value"="27473"}; +{"key"="3382";"subkey"="676";"value"="27482"}; +{"key"="3383";"subkey"="676";"value"="27492"}; +{"key"="3384";"subkey"="676";"value"="27501"}; +{"key"="3385";"subkey"="677";"value"="27510"}; +{"key"="3386";"subkey"="677";"value"="27519"}; +{"key"="3387";"subkey"="677";"value"="27528"}; +{"key"="3388";"subkey"="677";"value"="27537"}; +{"key"="3389";"subkey"="677";"value"="27546"}; +{"key"="3390";"subkey"="678";"value"="27555"}; +{"key"="3391";"subkey"="678";"value"="27565"}; +{"key"="3392";"subkey"="678";"value"="27574"}; +{"key"="3393";"subkey"="678";"value"="27583"}; +{"key"="3394";"subkey"="678";"value"="27592"}; +{"key"="3395";"subkey"="679";"value"="27601"}; +{"key"="3396";"subkey"="679";"value"="27610"}; +{"key"="3397";"subkey"="679";"value"="27619"}; +{"key"="3398";"subkey"="679";"value"="27628"}; +{"key"="3399";"subkey"="679";"value"="27638"}; +{"key"="3400";"subkey"="680";"value"="27647"}; +{"key"="3401";"subkey"="680";"value"="27656"}; +{"key"="3402";"subkey"="680";"value"="27665"}; +{"key"="3403";"subkey"="680";"value"="27674"}; +{"key"="3404";"subkey"="680";"value"="27683"}; +{"key"="3405";"subkey"="681";"value"="27692"}; +{"key"="3406";"subkey"="681";"value"="27701"}; +{"key"="3407";"subkey"="681";"value"="27711"}; +{"key"="3408";"subkey"="681";"value"="27720"}; +{"key"="3409";"subkey"="681";"value"="27729"}; +{"key"="3410";"subkey"="682";"value"="27738"}; +{"key"="3411";"subkey"="682";"value"="27747"}; +{"key"="3412";"subkey"="682";"value"="27756"}; +{"key"="3413";"subkey"="682";"value"="27765"}; +{"key"="3414";"subkey"="682";"value"="27775"}; +{"key"="3415";"subkey"="683";"value"="27784"}; +{"key"="3416";"subkey"="683";"value"="27793"}; +{"key"="3417";"subkey"="683";"value"="27802"}; +{"key"="3418";"subkey"="683";"value"="27811"}; +{"key"="3419";"subkey"="683";"value"="27820"}; +{"key"="3420";"subkey"="684";"value"="27829"}; +{"key"="3421";"subkey"="684";"value"="27839"}; +{"key"="3422";"subkey"="684";"value"="27848"}; +{"key"="3423";"subkey"="684";"value"="27857"}; +{"key"="3424";"subkey"="684";"value"="27866"}; +{"key"="3425";"subkey"="685";"value"="27875"}; +{"key"="3426";"subkey"="685";"value"="27884"}; +{"key"="3427";"subkey"="685";"value"="27893"}; +{"key"="3428";"subkey"="685";"value"="27903"}; +{"key"="3429";"subkey"="685";"value"="27912"}; +{"key"="3430";"subkey"="686";"value"="27921"}; +{"key"="3431";"subkey"="686";"value"="27930"}; +{"key"="3432";"subkey"="686";"value"="27939"}; +{"key"="3433";"subkey"="686";"value"="27948"}; +{"key"="3434";"subkey"="686";"value"="27957"}; +{"key"="3435";"subkey"="687";"value"="27966"}; +{"key"="3436";"subkey"="687";"value"="27976"}; +{"key"="3437";"subkey"="687";"value"="27985"}; +{"key"="3438";"subkey"="687";"value"="27994"}; +{"key"="3439";"subkey"="687";"value"="28003"}; +{"key"="3440";"subkey"="688";"value"="28012"}; +{"key"="3441";"subkey"="688";"value"="28021"}; +{"key"="3442";"subkey"="688";"value"="28030"}; +{"key"="3443";"subkey"="688";"value"="28040"}; +{"key"="3444";"subkey"="688";"value"="28049"}; +{"key"="3445";"subkey"="689";"value"="28058"}; +{"key"="3446";"subkey"="689";"value"="28067"}; +{"key"="3447";"subkey"="689";"value"="28076"}; +{"key"="3448";"subkey"="689";"value"="28085"}; +{"key"="3449";"subkey"="689";"value"="28095"}; +{"key"="3450";"subkey"="690";"value"="28104"}; +{"key"="3451";"subkey"="690";"value"="28113"}; +{"key"="3452";"subkey"="690";"value"="28122"}; +{"key"="3453";"subkey"="690";"value"="28131"}; +{"key"="3454";"subkey"="690";"value"="28140"}; +{"key"="3455";"subkey"="691";"value"="28149"}; +{"key"="3456";"subkey"="691";"value"="28159"}; +{"key"="3457";"subkey"="691";"value"="28168"}; +{"key"="3458";"subkey"="691";"value"="28177"}; +{"key"="3459";"subkey"="691";"value"="28186"}; +{"key"="3460";"subkey"="692";"value"="28195"}; +{"key"="3461";"subkey"="692";"value"="28204"}; +{"key"="3462";"subkey"="692";"value"="28213"}; +{"key"="3463";"subkey"="692";"value"="28223"}; +{"key"="3464";"subkey"="692";"value"="28232"}; +{"key"="3465";"subkey"="693";"value"="28241"}; +{"key"="3466";"subkey"="693";"value"="28250"}; +{"key"="3467";"subkey"="693";"value"="28259"}; +{"key"="3468";"subkey"="693";"value"="28268"}; +{"key"="3469";"subkey"="693";"value"="28277"}; +{"key"="3470";"subkey"="694";"value"="28287"}; +{"key"="3471";"subkey"="694";"value"="28296"}; +{"key"="3472";"subkey"="694";"value"="28305"}; +{"key"="3473";"subkey"="694";"value"="28314"}; +{"key"="3474";"subkey"="694";"value"="28323"}; +{"key"="3475";"subkey"="695";"value"="28332"}; +{"key"="3476";"subkey"="695";"value"="28342"}; +{"key"="3477";"subkey"="695";"value"="28351"}; +{"key"="3478";"subkey"="695";"value"="28360"}; +{"key"="3479";"subkey"="695";"value"="28369"}; +{"key"="3480";"subkey"="696";"value"="28378"}; +{"key"="3481";"subkey"="696";"value"="28387"}; +{"key"="3482";"subkey"="696";"value"="28396"}; +{"key"="3483";"subkey"="696";"value"="28406"}; +{"key"="3484";"subkey"="696";"value"="28415"}; +{"key"="3485";"subkey"="697";"value"="28424"}; +{"key"="3486";"subkey"="697";"value"="28433"}; +{"key"="3487";"subkey"="697";"value"="28442"}; +{"key"="3488";"subkey"="697";"value"="28451"}; +{"key"="3489";"subkey"="697";"value"="28461"}; +{"key"="3490";"subkey"="698";"value"="28470"}; +{"key"="3491";"subkey"="698";"value"="28479"}; +{"key"="3492";"subkey"="698";"value"="28488"}; +{"key"="3493";"subkey"="698";"value"="28497"}; +{"key"="3494";"subkey"="698";"value"="28506"}; +{"key"="3495";"subkey"="699";"value"="28516"}; +{"key"="3496";"subkey"="699";"value"="28525"}; +{"key"="3497";"subkey"="699";"value"="28534"}; +{"key"="3498";"subkey"="699";"value"="28543"}; +{"key"="3499";"subkey"="699";"value"="28552"}; +{"key"="3500";"subkey"="700";"value"="28561"}; +{"key"="3501";"subkey"="700";"value"="28570"}; +{"key"="3502";"subkey"="700";"value"="28580"}; +{"key"="3503";"subkey"="700";"value"="28589"}; +{"key"="3504";"subkey"="700";"value"="28598"}; +{"key"="3505";"subkey"="701";"value"="28607"}; +{"key"="3506";"subkey"="701";"value"="28616"}; +{"key"="3507";"subkey"="701";"value"="28625"}; +{"key"="3508";"subkey"="701";"value"="28635"}; +{"key"="3509";"subkey"="701";"value"="28644"}; +{"key"="3510";"subkey"="702";"value"="28653"}; +{"key"="3511";"subkey"="702";"value"="28662"}; +{"key"="3512";"subkey"="702";"value"="28671"}; +{"key"="3513";"subkey"="702";"value"="28680"}; +{"key"="3514";"subkey"="702";"value"="28690"}; +{"key"="3515";"subkey"="703";"value"="28699"}; +{"key"="3516";"subkey"="703";"value"="28708"}; +{"key"="3517";"subkey"="703";"value"="28717"}; +{"key"="3518";"subkey"="703";"value"="28726"}; +{"key"="3519";"subkey"="703";"value"="28735"}; +{"key"="3520";"subkey"="704";"value"="28745"}; +{"key"="3521";"subkey"="704";"value"="28754"}; +{"key"="3522";"subkey"="704";"value"="28763"}; +{"key"="3523";"subkey"="704";"value"="28772"}; +{"key"="3524";"subkey"="704";"value"="28781"}; +{"key"="3525";"subkey"="705";"value"="28790"}; +{"key"="3526";"subkey"="705";"value"="28800"}; +{"key"="3527";"subkey"="705";"value"="28809"}; +{"key"="3528";"subkey"="705";"value"="28818"}; +{"key"="3529";"subkey"="705";"value"="28827"}; +{"key"="3530";"subkey"="706";"value"="28836"}; +{"key"="3531";"subkey"="706";"value"="28845"}; +{"key"="3532";"subkey"="706";"value"="28855"}; +{"key"="3533";"subkey"="706";"value"="28864"}; +{"key"="3534";"subkey"="706";"value"="28873"}; +{"key"="3535";"subkey"="707";"value"="28882"}; +{"key"="3536";"subkey"="707";"value"="28891"}; +{"key"="3537";"subkey"="707";"value"="28900"}; +{"key"="3538";"subkey"="707";"value"="28910"}; +{"key"="3539";"subkey"="707";"value"="28919"}; +{"key"="3540";"subkey"="708";"value"="28928"}; +{"key"="3541";"subkey"="708";"value"="28937"}; +{"key"="3542";"subkey"="708";"value"="28946"}; +{"key"="3543";"subkey"="708";"value"="28955"}; +{"key"="3544";"subkey"="708";"value"="28965"}; +{"key"="3545";"subkey"="709";"value"="28974"}; +{"key"="3546";"subkey"="709";"value"="28983"}; +{"key"="3547";"subkey"="709";"value"="28992"}; +{"key"="3548";"subkey"="709";"value"="29001"}; +{"key"="3549";"subkey"="709";"value"="29011"}; +{"key"="3550";"subkey"="710";"value"="29020"}; +{"key"="3551";"subkey"="710";"value"="29029"}; +{"key"="3552";"subkey"="710";"value"="29038"}; +{"key"="3553";"subkey"="710";"value"="29047"}; +{"key"="3554";"subkey"="710";"value"="29056"}; +{"key"="3555";"subkey"="711";"value"="29066"}; +{"key"="3556";"subkey"="711";"value"="29075"}; +{"key"="3557";"subkey"="711";"value"="29084"}; +{"key"="3558";"subkey"="711";"value"="29093"}; +{"key"="3559";"subkey"="711";"value"="29102"}; +{"key"="3560";"subkey"="712";"value"="29111"}; +{"key"="3561";"subkey"="712";"value"="29121"}; +{"key"="3562";"subkey"="712";"value"="29130"}; +{"key"="3563";"subkey"="712";"value"="29139"}; +{"key"="3564";"subkey"="712";"value"="29148"}; +{"key"="3565";"subkey"="713";"value"="29157"}; +{"key"="3566";"subkey"="713";"value"="29167"}; +{"key"="3567";"subkey"="713";"value"="29176"}; +{"key"="3568";"subkey"="713";"value"="29185"}; +{"key"="3569";"subkey"="713";"value"="29194"}; +{"key"="3570";"subkey"="714";"value"="29203"}; +{"key"="3571";"subkey"="714";"value"="29212"}; +{"key"="3572";"subkey"="714";"value"="29222"}; +{"key"="3573";"subkey"="714";"value"="29231"}; +{"key"="3574";"subkey"="714";"value"="29240"}; +{"key"="3575";"subkey"="715";"value"="29249"}; +{"key"="3576";"subkey"="715";"value"="29258"}; +{"key"="3577";"subkey"="715";"value"="29268"}; +{"key"="3578";"subkey"="715";"value"="29277"}; +{"key"="3579";"subkey"="715";"value"="29286"}; +{"key"="3580";"subkey"="716";"value"="29295"}; +{"key"="3581";"subkey"="716";"value"="29304"}; +{"key"="3582";"subkey"="716";"value"="29313"}; +{"key"="3583";"subkey"="716";"value"="29323"}; +{"key"="3584";"subkey"="716";"value"="29332"}; +{"key"="3585";"subkey"="717";"value"="29341"}; +{"key"="3586";"subkey"="717";"value"="29350"}; +{"key"="3587";"subkey"="717";"value"="29359"}; +{"key"="3588";"subkey"="717";"value"="29369"}; +{"key"="3589";"subkey"="717";"value"="29378"}; +{"key"="3590";"subkey"="718";"value"="29387"}; +{"key"="3591";"subkey"="718";"value"="29396"}; +{"key"="3592";"subkey"="718";"value"="29405"}; +{"key"="3593";"subkey"="718";"value"="29414"}; +{"key"="3594";"subkey"="718";"value"="29424"}; +{"key"="3595";"subkey"="719";"value"="29433"}; +{"key"="3596";"subkey"="719";"value"="29442"}; +{"key"="3597";"subkey"="719";"value"="29451"}; +{"key"="3598";"subkey"="719";"value"="29460"}; +{"key"="3599";"subkey"="719";"value"="29470"}; +{"key"="3600";"subkey"="720";"value"="29479"}; +{"key"="3601";"subkey"="720";"value"="29488"}; +{"key"="3602";"subkey"="720";"value"="29497"}; +{"key"="3603";"subkey"="720";"value"="29506"}; +{"key"="3604";"subkey"="720";"value"="29516"}; +{"key"="3605";"subkey"="721";"value"="29525"}; +{"key"="3606";"subkey"="721";"value"="29534"}; +{"key"="3607";"subkey"="721";"value"="29543"}; +{"key"="3608";"subkey"="721";"value"="29552"}; +{"key"="3609";"subkey"="721";"value"="29561"}; +{"key"="3610";"subkey"="722";"value"="29571"}; +{"key"="3611";"subkey"="722";"value"="29580"}; +{"key"="3612";"subkey"="722";"value"="29589"}; +{"key"="3613";"subkey"="722";"value"="29598"}; +{"key"="3614";"subkey"="722";"value"="29607"}; +{"key"="3615";"subkey"="723";"value"="29617"}; +{"key"="3616";"subkey"="723";"value"="29626"}; +{"key"="3617";"subkey"="723";"value"="29635"}; +{"key"="3618";"subkey"="723";"value"="29644"}; +{"key"="3619";"subkey"="723";"value"="29653"}; +{"key"="3620";"subkey"="724";"value"="29663"}; +{"key"="3621";"subkey"="724";"value"="29672"}; +{"key"="3622";"subkey"="724";"value"="29681"}; +{"key"="3623";"subkey"="724";"value"="29690"}; +{"key"="3624";"subkey"="724";"value"="29699"}; +{"key"="3625";"subkey"="725";"value"="29709"}; +{"key"="3626";"subkey"="725";"value"="29718"}; +{"key"="3627";"subkey"="725";"value"="29727"}; +{"key"="3628";"subkey"="725";"value"="29736"}; +{"key"="3629";"subkey"="725";"value"="29745"}; +{"key"="3630";"subkey"="726";"value"="29755"}; +{"key"="3631";"subkey"="726";"value"="29764"}; +{"key"="3632";"subkey"="726";"value"="29773"}; +{"key"="3633";"subkey"="726";"value"="29782"}; +{"key"="3634";"subkey"="726";"value"="29791"}; +{"key"="3635";"subkey"="727";"value"="29801"}; +{"key"="3636";"subkey"="727";"value"="29810"}; +{"key"="3637";"subkey"="727";"value"="29819"}; +{"key"="3638";"subkey"="727";"value"="29828"}; +{"key"="3639";"subkey"="727";"value"="29837"}; +{"key"="3640";"subkey"="728";"value"="29847"}; +{"key"="3641";"subkey"="728";"value"="29856"}; +{"key"="3642";"subkey"="728";"value"="29865"}; +{"key"="3643";"subkey"="728";"value"="29874"}; +{"key"="3644";"subkey"="728";"value"="29883"}; +{"key"="3645";"subkey"="729";"value"="29893"}; +{"key"="3646";"subkey"="729";"value"="29902"}; +{"key"="3647";"subkey"="729";"value"="29911"}; +{"key"="3648";"subkey"="729";"value"="29920"}; +{"key"="3649";"subkey"="729";"value"="29929"}; +{"key"="3650";"subkey"="730";"value"="29939"}; +{"key"="3651";"subkey"="730";"value"="29948"}; +{"key"="3652";"subkey"="730";"value"="29957"}; +{"key"="3653";"subkey"="730";"value"="29966"}; +{"key"="3654";"subkey"="730";"value"="29975"}; +{"key"="3655";"subkey"="731";"value"="29985"}; +{"key"="3656";"subkey"="731";"value"="29994"}; +{"key"="3657";"subkey"="731";"value"="30003"}; +{"key"="3658";"subkey"="731";"value"="30012"}; +{"key"="3659";"subkey"="731";"value"="30021"}; +{"key"="3660";"subkey"="732";"value"="30031"}; +{"key"="3661";"subkey"="732";"value"="30040"}; +{"key"="3662";"subkey"="732";"value"="30049"}; +{"key"="3663";"subkey"="732";"value"="30058"}; +{"key"="3664";"subkey"="732";"value"="30067"}; +{"key"="3665";"subkey"="733";"value"="30077"}; +{"key"="3666";"subkey"="733";"value"="30086"}; +{"key"="3667";"subkey"="733";"value"="30095"}; +{"key"="3668";"subkey"="733";"value"="30104"}; +{"key"="3669";"subkey"="733";"value"="30113"}; +{"key"="3670";"subkey"="734";"value"="30123"}; +{"key"="3671";"subkey"="734";"value"="30132"}; +{"key"="3672";"subkey"="734";"value"="30141"}; +{"key"="3673";"subkey"="734";"value"="30150"}; +{"key"="3674";"subkey"="734";"value"="30159"}; +{"key"="3675";"subkey"="735";"value"="30169"}; +{"key"="3676";"subkey"="735";"value"="30178"}; +{"key"="3677";"subkey"="735";"value"="30187"}; +{"key"="3678";"subkey"="735";"value"="30196"}; +{"key"="3679";"subkey"="735";"value"="30206"}; +{"key"="3680";"subkey"="736";"value"="30215"}; +{"key"="3681";"subkey"="736";"value"="30224"}; +{"key"="3682";"subkey"="736";"value"="30233"}; +{"key"="3683";"subkey"="736";"value"="30242"}; +{"key"="3684";"subkey"="736";"value"="30252"}; +{"key"="3685";"subkey"="737";"value"="30261"}; +{"key"="3686";"subkey"="737";"value"="30270"}; +{"key"="3687";"subkey"="737";"value"="30279"}; +{"key"="3688";"subkey"="737";"value"="30288"}; +{"key"="3689";"subkey"="737";"value"="30298"}; +{"key"="3690";"subkey"="738";"value"="30307"}; +{"key"="3691";"subkey"="738";"value"="30316"}; +{"key"="3692";"subkey"="738";"value"="30325"}; +{"key"="3693";"subkey"="738";"value"="30335"}; +{"key"="3694";"subkey"="738";"value"="30344"}; +{"key"="3695";"subkey"="739";"value"="30353"}; +{"key"="3696";"subkey"="739";"value"="30362"}; +{"key"="3697";"subkey"="739";"value"="30371"}; +{"key"="3698";"subkey"="739";"value"="30381"}; +{"key"="3699";"subkey"="739";"value"="30390"}; +{"key"="3700";"subkey"="740";"value"="30399"}; +{"key"="3701";"subkey"="740";"value"="30408"}; +{"key"="3702";"subkey"="740";"value"="30417"}; +{"key"="3703";"subkey"="740";"value"="30427"}; +{"key"="3704";"subkey"="740";"value"="30436"}; +{"key"="3705";"subkey"="741";"value"="30445"}; +{"key"="3706";"subkey"="741";"value"="30454"}; +{"key"="3707";"subkey"="741";"value"="30464"}; +{"key"="3708";"subkey"="741";"value"="30473"}; +{"key"="3709";"subkey"="741";"value"="30482"}; +{"key"="3710";"subkey"="742";"value"="30491"}; +{"key"="3711";"subkey"="742";"value"="30500"}; +{"key"="3712";"subkey"="742";"value"="30510"}; +{"key"="3713";"subkey"="742";"value"="30519"}; +{"key"="3714";"subkey"="742";"value"="30528"}; +{"key"="3715";"subkey"="743";"value"="30537"}; +{"key"="3716";"subkey"="743";"value"="30547"}; +{"key"="3717";"subkey"="743";"value"="30556"}; +{"key"="3718";"subkey"="743";"value"="30565"}; +{"key"="3719";"subkey"="743";"value"="30574"}; +{"key"="3720";"subkey"="744";"value"="30583"}; +{"key"="3721";"subkey"="744";"value"="30593"}; +{"key"="3722";"subkey"="744";"value"="30602"}; +{"key"="3723";"subkey"="744";"value"="30611"}; +{"key"="3724";"subkey"="744";"value"="30620"}; +{"key"="3725";"subkey"="745";"value"="30630"}; +{"key"="3726";"subkey"="745";"value"="30639"}; +{"key"="3727";"subkey"="745";"value"="30648"}; +{"key"="3728";"subkey"="745";"value"="30657"}; +{"key"="3729";"subkey"="745";"value"="30666"}; +{"key"="3730";"subkey"="746";"value"="30676"}; +{"key"="3731";"subkey"="746";"value"="30685"}; +{"key"="3732";"subkey"="746";"value"="30694"}; +{"key"="3733";"subkey"="746";"value"="30703"}; +{"key"="3734";"subkey"="746";"value"="30713"}; +{"key"="3735";"subkey"="747";"value"="30722"}; +{"key"="3736";"subkey"="747";"value"="30731"}; +{"key"="3737";"subkey"="747";"value"="30740"}; +{"key"="3738";"subkey"="747";"value"="30749"}; +{"key"="3739";"subkey"="747";"value"="30759"}; +{"key"="3740";"subkey"="748";"value"="30768"}; +{"key"="3741";"subkey"="748";"value"="30777"}; +{"key"="3742";"subkey"="748";"value"="30786"}; +{"key"="3743";"subkey"="748";"value"="30796"}; +{"key"="3744";"subkey"="748";"value"="30805"}; +{"key"="3745";"subkey"="749";"value"="30814"}; +{"key"="3746";"subkey"="749";"value"="30823"}; +{"key"="3747";"subkey"="749";"value"="30832"}; +{"key"="3748";"subkey"="749";"value"="30842"}; +{"key"="3749";"subkey"="749";"value"="30851"}; +{"key"="3750";"subkey"="750";"value"="30860"}; +{"key"="3751";"subkey"="750";"value"="30869"}; +{"key"="3752";"subkey"="750";"value"="30879"}; +{"key"="3753";"subkey"="750";"value"="30888"}; +{"key"="3754";"subkey"="750";"value"="30897"}; +{"key"="3755";"subkey"="751";"value"="30906"}; +{"key"="3756";"subkey"="751";"value"="30916"}; +{"key"="3757";"subkey"="751";"value"="30925"}; +{"key"="3758";"subkey"="751";"value"="30934"}; +{"key"="3759";"subkey"="751";"value"="30943"}; +{"key"="3760";"subkey"="752";"value"="30952"}; +{"key"="3761";"subkey"="752";"value"="30962"}; +{"key"="3762";"subkey"="752";"value"="30971"}; +{"key"="3763";"subkey"="752";"value"="30980"}; +{"key"="3764";"subkey"="752";"value"="30989"}; +{"key"="3765";"subkey"="753";"value"="30999"}; +{"key"="3766";"subkey"="753";"value"="31008"}; +{"key"="3767";"subkey"="753";"value"="31017"}; +{"key"="3768";"subkey"="753";"value"="31026"}; +{"key"="3769";"subkey"="753";"value"="31036"}; +{"key"="3770";"subkey"="754";"value"="31045"}; +{"key"="3771";"subkey"="754";"value"="31054"}; +{"key"="3772";"subkey"="754";"value"="31063"}; +{"key"="3773";"subkey"="754";"value"="31073"}; +{"key"="3774";"subkey"="754";"value"="31082"}; +{"key"="3775";"subkey"="755";"value"="31091"}; +{"key"="3776";"subkey"="755";"value"="31100"}; +{"key"="3777";"subkey"="755";"value"="31109"}; +{"key"="3778";"subkey"="755";"value"="31119"}; +{"key"="3779";"subkey"="755";"value"="31128"}; +{"key"="3780";"subkey"="756";"value"="31137"}; +{"key"="3781";"subkey"="756";"value"="31146"}; +{"key"="3782";"subkey"="756";"value"="31156"}; +{"key"="3783";"subkey"="756";"value"="31165"}; +{"key"="3784";"subkey"="756";"value"="31174"}; +{"key"="3785";"subkey"="757";"value"="31183"}; +{"key"="3786";"subkey"="757";"value"="31193"}; +{"key"="3787";"subkey"="757";"value"="31202"}; +{"key"="3788";"subkey"="757";"value"="31211"}; +{"key"="3789";"subkey"="757";"value"="31220"}; +{"key"="3790";"subkey"="758";"value"="31230"}; +{"key"="3791";"subkey"="758";"value"="31239"}; +{"key"="3792";"subkey"="758";"value"="31248"}; +{"key"="3793";"subkey"="758";"value"="31257"}; +{"key"="3794";"subkey"="758";"value"="31267"}; +{"key"="3795";"subkey"="759";"value"="31276"}; +{"key"="3796";"subkey"="759";"value"="31285"}; +{"key"="3797";"subkey"="759";"value"="31294"}; +{"key"="3798";"subkey"="759";"value"="31303"}; +{"key"="3799";"subkey"="759";"value"="31313"}; +{"key"="3800";"subkey"="760";"value"="31322"}; +{"key"="3801";"subkey"="760";"value"="31331"}; +{"key"="3802";"subkey"="760";"value"="31340"}; +{"key"="3803";"subkey"="760";"value"="31350"}; +{"key"="3804";"subkey"="760";"value"="31359"}; +{"key"="3805";"subkey"="761";"value"="31368"}; +{"key"="3806";"subkey"="761";"value"="31377"}; +{"key"="3807";"subkey"="761";"value"="31387"}; +{"key"="3808";"subkey"="761";"value"="31396"}; +{"key"="3809";"subkey"="761";"value"="31405"}; +{"key"="3810";"subkey"="762";"value"="31414"}; +{"key"="3811";"subkey"="762";"value"="31424"}; +{"key"="3812";"subkey"="762";"value"="31433"}; +{"key"="3813";"subkey"="762";"value"="31442"}; +{"key"="3814";"subkey"="762";"value"="31451"}; +{"key"="3815";"subkey"="763";"value"="31461"}; +{"key"="3816";"subkey"="763";"value"="31470"}; +{"key"="3817";"subkey"="763";"value"="31479"}; +{"key"="3818";"subkey"="763";"value"="31488"}; +{"key"="3819";"subkey"="763";"value"="31498"}; +{"key"="3820";"subkey"="764";"value"="31507"}; +{"key"="3821";"subkey"="764";"value"="31516"}; +{"key"="3822";"subkey"="764";"value"="31525"}; +{"key"="3823";"subkey"="764";"value"="31535"}; +{"key"="3824";"subkey"="764";"value"="31544"}; +{"key"="3825";"subkey"="765";"value"="31553"}; +{"key"="3826";"subkey"="765";"value"="31562"}; +{"key"="3827";"subkey"="765";"value"="31572"}; +{"key"="3828";"subkey"="765";"value"="31581"}; +{"key"="3829";"subkey"="765";"value"="31590"}; +{"key"="3830";"subkey"="766";"value"="31599"}; +{"key"="3831";"subkey"="766";"value"="31609"}; +{"key"="3832";"subkey"="766";"value"="31618"}; +{"key"="3833";"subkey"="766";"value"="31627"}; +{"key"="3834";"subkey"="766";"value"="31636"}; +{"key"="3835";"subkey"="767";"value"="31646"}; +{"key"="3836";"subkey"="767";"value"="31655"}; +{"key"="3837";"subkey"="767";"value"="31664"}; +{"key"="3838";"subkey"="767";"value"="31673"}; +{"key"="3839";"subkey"="767";"value"="31683"}; +{"key"="3840";"subkey"="768";"value"="31692"}; +{"key"="3841";"subkey"="768";"value"="31701"}; +{"key"="3842";"subkey"="768";"value"="31710"}; +{"key"="3843";"subkey"="768";"value"="31720"}; +{"key"="3844";"subkey"="768";"value"="31729"}; +{"key"="3845";"subkey"="769";"value"="31738"}; +{"key"="3846";"subkey"="769";"value"="31747"}; +{"key"="3847";"subkey"="769";"value"="31757"}; +{"key"="3848";"subkey"="769";"value"="31766"}; +{"key"="3849";"subkey"="769";"value"="31775"}; +{"key"="3850";"subkey"="770";"value"="31784"}; +{"key"="3851";"subkey"="770";"value"="31794"}; +{"key"="3852";"subkey"="770";"value"="31803"}; +{"key"="3853";"subkey"="770";"value"="31812"}; +{"key"="3854";"subkey"="770";"value"="31821"}; +{"key"="3855";"subkey"="771";"value"="31831"}; +{"key"="3856";"subkey"="771";"value"="31840"}; +{"key"="3857";"subkey"="771";"value"="31849"}; +{"key"="3858";"subkey"="771";"value"="31858"}; +{"key"="3859";"subkey"="771";"value"="31868"}; +{"key"="3860";"subkey"="772";"value"="31877"}; +{"key"="3861";"subkey"="772";"value"="31886"}; +{"key"="3862";"subkey"="772";"value"="31896"}; +{"key"="3863";"subkey"="772";"value"="31905"}; +{"key"="3864";"subkey"="772";"value"="31914"}; +{"key"="3865";"subkey"="773";"value"="31923"}; +{"key"="3866";"subkey"="773";"value"="31933"}; +{"key"="3867";"subkey"="773";"value"="31942"}; +{"key"="3868";"subkey"="773";"value"="31951"}; +{"key"="3869";"subkey"="773";"value"="31960"}; +{"key"="3870";"subkey"="774";"value"="31970"}; +{"key"="3871";"subkey"="774";"value"="31979"}; +{"key"="3872";"subkey"="774";"value"="31988"}; +{"key"="3873";"subkey"="774";"value"="31997"}; +{"key"="3874";"subkey"="774";"value"="32007"}; +{"key"="3875";"subkey"="775";"value"="32016"}; +{"key"="3876";"subkey"="775";"value"="32025"}; +{"key"="3877";"subkey"="775";"value"="32034"}; +{"key"="3878";"subkey"="775";"value"="32044"}; +{"key"="3879";"subkey"="775";"value"="32053"}; +{"key"="3880";"subkey"="776";"value"="32062"}; +{"key"="3881";"subkey"="776";"value"="32071"}; +{"key"="3882";"subkey"="776";"value"="32081"}; +{"key"="3883";"subkey"="776";"value"="32090"}; +{"key"="3884";"subkey"="776";"value"="32099"}; +{"key"="3885";"subkey"="777";"value"="32109"}; +{"key"="3886";"subkey"="777";"value"="32118"}; +{"key"="3887";"subkey"="777";"value"="32127"}; +{"key"="3888";"subkey"="777";"value"="32136"}; +{"key"="3889";"subkey"="777";"value"="32146"}; +{"key"="3890";"subkey"="778";"value"="32155"}; +{"key"="3891";"subkey"="778";"value"="32164"}; +{"key"="3892";"subkey"="778";"value"="32173"}; +{"key"="3893";"subkey"="778";"value"="32183"}; +{"key"="3894";"subkey"="778";"value"="32192"}; +{"key"="3895";"subkey"="779";"value"="32201"}; +{"key"="3896";"subkey"="779";"value"="32210"}; +{"key"="3897";"subkey"="779";"value"="32220"}; +{"key"="3898";"subkey"="779";"value"="32229"}; +{"key"="3899";"subkey"="779";"value"="32238"}; +{"key"="3900";"subkey"="780";"value"="32248"}; +{"key"="3901";"subkey"="780";"value"="32257"}; +{"key"="3902";"subkey"="780";"value"="32266"}; +{"key"="3903";"subkey"="780";"value"="32275"}; +{"key"="3904";"subkey"="780";"value"="32285"}; +{"key"="3905";"subkey"="781";"value"="32294"}; +{"key"="3906";"subkey"="781";"value"="32303"}; +{"key"="3907";"subkey"="781";"value"="32312"}; +{"key"="3908";"subkey"="781";"value"="32322"}; +{"key"="3909";"subkey"="781";"value"="32331"}; +{"key"="3910";"subkey"="782";"value"="32340"}; +{"key"="3911";"subkey"="782";"value"="32350"}; +{"key"="3912";"subkey"="782";"value"="32359"}; +{"key"="3913";"subkey"="782";"value"="32368"}; +{"key"="3914";"subkey"="782";"value"="32377"}; +{"key"="3915";"subkey"="783";"value"="32387"}; +{"key"="3916";"subkey"="783";"value"="32396"}; +{"key"="3917";"subkey"="783";"value"="32405"}; +{"key"="3918";"subkey"="783";"value"="32414"}; +{"key"="3919";"subkey"="783";"value"="32424"}; +{"key"="3920";"subkey"="784";"value"="32433"}; +{"key"="3921";"subkey"="784";"value"="32442"}; +{"key"="3922";"subkey"="784";"value"="32452"}; +{"key"="3923";"subkey"="784";"value"="32461"}; +{"key"="3924";"subkey"="784";"value"="32470"}; +{"key"="3925";"subkey"="785";"value"="32479"}; +{"key"="3926";"subkey"="785";"value"="32489"}; +{"key"="3927";"subkey"="785";"value"="32498"}; +{"key"="3928";"subkey"="785";"value"="32507"}; +{"key"="3929";"subkey"="785";"value"="32516"}; +{"key"="3930";"subkey"="786";"value"="32526"}; +{"key"="3931";"subkey"="786";"value"="32535"}; +{"key"="3932";"subkey"="786";"value"="32544"}; +{"key"="3933";"subkey"="786";"value"="32554"}; +{"key"="3934";"subkey"="786";"value"="32563"}; +{"key"="3935";"subkey"="787";"value"="32572"}; +{"key"="3936";"subkey"="787";"value"="32581"}; +{"key"="3937";"subkey"="787";"value"="32591"}; +{"key"="3938";"subkey"="787";"value"="32600"}; +{"key"="3939";"subkey"="787";"value"="32609"}; +{"key"="3940";"subkey"="788";"value"="32619"}; +{"key"="3941";"subkey"="788";"value"="32628"}; +{"key"="3942";"subkey"="788";"value"="32637"}; +{"key"="3943";"subkey"="788";"value"="32646"}; +{"key"="3944";"subkey"="788";"value"="32656"}; +{"key"="3945";"subkey"="789";"value"="32665"}; +{"key"="3946";"subkey"="789";"value"="32674"}; +{"key"="3947";"subkey"="789";"value"="32683"}; +{"key"="3948";"subkey"="789";"value"="32693"}; +{"key"="3949";"subkey"="789";"value"="32702"}; +{"key"="3950";"subkey"="790";"value"="32711"}; +{"key"="3951";"subkey"="790";"value"="32721"}; +{"key"="3952";"subkey"="790";"value"="32730"}; +{"key"="3953";"subkey"="790";"value"="32739"}; +{"key"="3954";"subkey"="790";"value"="32748"}; +{"key"="3955";"subkey"="791";"value"="32758"}; +{"key"="3956";"subkey"="791";"value"="32767"}; +{"key"="3957";"subkey"="791";"value"="32776"}; +{"key"="3958";"subkey"="791";"value"="32786"}; +{"key"="3959";"subkey"="791";"value"="32795"}; +{"key"="3960";"subkey"="792";"value"="32804"}; +{"key"="3961";"subkey"="792";"value"="32813"}; +{"key"="3962";"subkey"="792";"value"="32823"}; +{"key"="3963";"subkey"="792";"value"="32832"}; +{"key"="3964";"subkey"="792";"value"="32841"}; +{"key"="3965";"subkey"="793";"value"="32851"}; +{"key"="3966";"subkey"="793";"value"="32860"}; +{"key"="3967";"subkey"="793";"value"="32869"}; +{"key"="3968";"subkey"="793";"value"="32878"}; +{"key"="3969";"subkey"="793";"value"="32888"}; +{"key"="3970";"subkey"="794";"value"="32897"}; +{"key"="3971";"subkey"="794";"value"="32906"}; +{"key"="3972";"subkey"="794";"value"="32916"}; +{"key"="3973";"subkey"="794";"value"="32925"}; +{"key"="3974";"subkey"="794";"value"="32934"}; +{"key"="3975";"subkey"="795";"value"="32943"}; +{"key"="3976";"subkey"="795";"value"="32953"}; +{"key"="3977";"subkey"="795";"value"="32962"}; +{"key"="3978";"subkey"="795";"value"="32971"}; +{"key"="3979";"subkey"="795";"value"="32981"}; +{"key"="3980";"subkey"="796";"value"="32990"}; +{"key"="3981";"subkey"="796";"value"="32999"}; +{"key"="3982";"subkey"="796";"value"="33008"}; +{"key"="3983";"subkey"="796";"value"="33018"}; +{"key"="3984";"subkey"="796";"value"="33027"}; +{"key"="3985";"subkey"="797";"value"="33036"}; +{"key"="3986";"subkey"="797";"value"="33046"}; +{"key"="3987";"subkey"="797";"value"="33055"}; +{"key"="3988";"subkey"="797";"value"="33064"}; +{"key"="3989";"subkey"="797";"value"="33073"}; +{"key"="3990";"subkey"="798";"value"="33083"}; +{"key"="3991";"subkey"="798";"value"="33092"}; +{"key"="3992";"subkey"="798";"value"="33101"}; +{"key"="3993";"subkey"="798";"value"="33111"}; +{"key"="3994";"subkey"="798";"value"="33120"}; +{"key"="3995";"subkey"="799";"value"="33129"}; +{"key"="3996";"subkey"="799";"value"="33139"}; +{"key"="3997";"subkey"="799";"value"="33148"}; +{"key"="3998";"subkey"="799";"value"="33157"}; +{"key"="3999";"subkey"="799";"value"="33166"}; +{"key"="4000";"subkey"="800";"value"="33176"}; +{"key"="4001";"subkey"="800";"value"="33185"}; +{"key"="4002";"subkey"="800";"value"="33194"}; +{"key"="4003";"subkey"="800";"value"="33204"}; +{"key"="4004";"subkey"="800";"value"="33213"}; +{"key"="4005";"subkey"="801";"value"="33222"}; +{"key"="4006";"subkey"="801";"value"="33231"}; +{"key"="4007";"subkey"="801";"value"="33241"}; +{"key"="4008";"subkey"="801";"value"="33250"}; +{"key"="4009";"subkey"="801";"value"="33259"}; +{"key"="4010";"subkey"="802";"value"="33269"}; +{"key"="4011";"subkey"="802";"value"="33278"}; +{"key"="4012";"subkey"="802";"value"="33287"}; +{"key"="4013";"subkey"="802";"value"="33297"}; +{"key"="4014";"subkey"="802";"value"="33306"}; +{"key"="4015";"subkey"="803";"value"="33315"}; +{"key"="4016";"subkey"="803";"value"="33324"}; +{"key"="4017";"subkey"="803";"value"="33334"}; +{"key"="4018";"subkey"="803";"value"="33343"}; +{"key"="4019";"subkey"="803";"value"="33352"}; +{"key"="4020";"subkey"="804";"value"="33362"}; +{"key"="4021";"subkey"="804";"value"="33371"}; +{"key"="4022";"subkey"="804";"value"="33380"}; +{"key"="4023";"subkey"="804";"value"="33390"}; +{"key"="4024";"subkey"="804";"value"="33399"}; +{"key"="4025";"subkey"="805";"value"="33408"}; +{"key"="4026";"subkey"="805";"value"="33417"}; +{"key"="4027";"subkey"="805";"value"="33427"}; +{"key"="4028";"subkey"="805";"value"="33436"}; +{"key"="4029";"subkey"="805";"value"="33445"}; +{"key"="4030";"subkey"="806";"value"="33455"}; +{"key"="4031";"subkey"="806";"value"="33464"}; +{"key"="4032";"subkey"="806";"value"="33473"}; +{"key"="4033";"subkey"="806";"value"="33483"}; +{"key"="4034";"subkey"="806";"value"="33492"}; +{"key"="4035";"subkey"="807";"value"="33501"}; +{"key"="4036";"subkey"="807";"value"="33510"}; +{"key"="4037";"subkey"="807";"value"="33520"}; +{"key"="4038";"subkey"="807";"value"="33529"}; +{"key"="4039";"subkey"="807";"value"="33538"}; +{"key"="4040";"subkey"="808";"value"="33548"}; +{"key"="4041";"subkey"="808";"value"="33557"}; +{"key"="4042";"subkey"="808";"value"="33566"}; +{"key"="4043";"subkey"="808";"value"="33576"}; +{"key"="4044";"subkey"="808";"value"="33585"}; +{"key"="4045";"subkey"="809";"value"="33594"}; +{"key"="4046";"subkey"="809";"value"="33603"}; +{"key"="4047";"subkey"="809";"value"="33613"}; +{"key"="4048";"subkey"="809";"value"="33622"}; +{"key"="4049";"subkey"="809";"value"="33631"}; +{"key"="4050";"subkey"="810";"value"="33641"}; +{"key"="4051";"subkey"="810";"value"="33650"}; +{"key"="4052";"subkey"="810";"value"="33659"}; +{"key"="4053";"subkey"="810";"value"="33669"}; +{"key"="4054";"subkey"="810";"value"="33678"}; +{"key"="4055";"subkey"="811";"value"="33687"}; +{"key"="4056";"subkey"="811";"value"="33697"}; +{"key"="4057";"subkey"="811";"value"="33706"}; +{"key"="4058";"subkey"="811";"value"="33715"}; +{"key"="4059";"subkey"="811";"value"="33724"}; +{"key"="4060";"subkey"="812";"value"="33734"}; +{"key"="4061";"subkey"="812";"value"="33743"}; +{"key"="4062";"subkey"="812";"value"="33752"}; +{"key"="4063";"subkey"="812";"value"="33762"}; +{"key"="4064";"subkey"="812";"value"="33771"}; +{"key"="4065";"subkey"="813";"value"="33780"}; +{"key"="4066";"subkey"="813";"value"="33790"}; +{"key"="4067";"subkey"="813";"value"="33799"}; +{"key"="4068";"subkey"="813";"value"="33808"}; +{"key"="4069";"subkey"="813";"value"="33818"}; +{"key"="4070";"subkey"="814";"value"="33827"}; +{"key"="4071";"subkey"="814";"value"="33836"}; +{"key"="4072";"subkey"="814";"value"="33846"}; +{"key"="4073";"subkey"="814";"value"="33855"}; +{"key"="4074";"subkey"="814";"value"="33864"}; +{"key"="4075";"subkey"="815";"value"="33873"}; +{"key"="4076";"subkey"="815";"value"="33883"}; +{"key"="4077";"subkey"="815";"value"="33892"}; +{"key"="4078";"subkey"="815";"value"="33901"}; +{"key"="4079";"subkey"="815";"value"="33911"}; +{"key"="4080";"subkey"="816";"value"="33920"}; +{"key"="4081";"subkey"="816";"value"="33929"}; +{"key"="4082";"subkey"="816";"value"="33939"}; +{"key"="4083";"subkey"="816";"value"="33948"}; +{"key"="4084";"subkey"="816";"value"="33957"}; +{"key"="4085";"subkey"="817";"value"="33967"}; +{"key"="4086";"subkey"="817";"value"="33976"}; +{"key"="4087";"subkey"="817";"value"="33985"}; +{"key"="4088";"subkey"="817";"value"="33995"}; +{"key"="4089";"subkey"="817";"value"="34004"}; +{"key"="4090";"subkey"="818";"value"="34013"}; +{"key"="4091";"subkey"="818";"value"="34022"}; +{"key"="4092";"subkey"="818";"value"="34032"}; +{"key"="4093";"subkey"="818";"value"="34041"}; +{"key"="4094";"subkey"="818";"value"="34050"}; +{"key"="4095";"subkey"="819";"value"="34060"}; +{"key"="4096";"subkey"="819";"value"="34069"}; +{"key"="4097";"subkey"="819";"value"="34078"}; +{"key"="4098";"subkey"="819";"value"="34088"}; +{"key"="4099";"subkey"="819";"value"="34097"}; +{"key"="4100";"subkey"="820";"value"="34106"}; +{"key"="4101";"subkey"="820";"value"="34116"}; +{"key"="4102";"subkey"="820";"value"="34125"}; +{"key"="4103";"subkey"="820";"value"="34134"}; +{"key"="4104";"subkey"="820";"value"="34144"}; +{"key"="4105";"subkey"="821";"value"="34153"}; +{"key"="4106";"subkey"="821";"value"="34162"}; +{"key"="4107";"subkey"="821";"value"="34172"}; +{"key"="4108";"subkey"="821";"value"="34181"}; +{"key"="4109";"subkey"="821";"value"="34190"}; +{"key"="4110";"subkey"="822";"value"="34200"}; +{"key"="4111";"subkey"="822";"value"="34209"}; +{"key"="4112";"subkey"="822";"value"="34218"}; +{"key"="4113";"subkey"="822";"value"="34228"}; +{"key"="4114";"subkey"="822";"value"="34237"}; +{"key"="4115";"subkey"="823";"value"="34246"}; +{"key"="4116";"subkey"="823";"value"="34255"}; +{"key"="4117";"subkey"="823";"value"="34265"}; +{"key"="4118";"subkey"="823";"value"="34274"}; +{"key"="4119";"subkey"="823";"value"="34283"}; +{"key"="4120";"subkey"="824";"value"="34293"}; +{"key"="4121";"subkey"="824";"value"="34302"}; +{"key"="4122";"subkey"="824";"value"="34311"}; +{"key"="4123";"subkey"="824";"value"="34321"}; +{"key"="4124";"subkey"="824";"value"="34330"}; +{"key"="4125";"subkey"="825";"value"="34339"}; +{"key"="4126";"subkey"="825";"value"="34349"}; +{"key"="4127";"subkey"="825";"value"="34358"}; +{"key"="4128";"subkey"="825";"value"="34367"}; +{"key"="4129";"subkey"="825";"value"="34377"}; +{"key"="4130";"subkey"="826";"value"="34386"}; +{"key"="4131";"subkey"="826";"value"="34395"}; +{"key"="4132";"subkey"="826";"value"="34405"}; +{"key"="4133";"subkey"="826";"value"="34414"}; +{"key"="4134";"subkey"="826";"value"="34423"}; +{"key"="4135";"subkey"="827";"value"="34433"}; +{"key"="4136";"subkey"="827";"value"="34442"}; +{"key"="4137";"subkey"="827";"value"="34451"}; +{"key"="4138";"subkey"="827";"value"="34461"}; +{"key"="4139";"subkey"="827";"value"="34470"}; +{"key"="4140";"subkey"="828";"value"="34479"}; +{"key"="4141";"subkey"="828";"value"="34489"}; +{"key"="4142";"subkey"="828";"value"="34498"}; +{"key"="4143";"subkey"="828";"value"="34507"}; +{"key"="4144";"subkey"="828";"value"="34517"}; +{"key"="4145";"subkey"="829";"value"="34526"}; +{"key"="4146";"subkey"="829";"value"="34535"}; +{"key"="4147";"subkey"="829";"value"="34545"}; +{"key"="4148";"subkey"="829";"value"="34554"}; +{"key"="4149";"subkey"="829";"value"="34563"}; +{"key"="4150";"subkey"="830";"value"="34573"}; +{"key"="4151";"subkey"="830";"value"="34582"}; +{"key"="4152";"subkey"="830";"value"="34591"}; +{"key"="4153";"subkey"="830";"value"="34601"}; +{"key"="4154";"subkey"="830";"value"="34610"}; +{"key"="4155";"subkey"="831";"value"="34619"}; +{"key"="4156";"subkey"="831";"value"="34629"}; +{"key"="4157";"subkey"="831";"value"="34638"}; +{"key"="4158";"subkey"="831";"value"="34647"}; +{"key"="4159";"subkey"="831";"value"="34657"}; +{"key"="4160";"subkey"="832";"value"="34666"}; +{"key"="4161";"subkey"="832";"value"="34675"}; +{"key"="4162";"subkey"="832";"value"="34685"}; +{"key"="4163";"subkey"="832";"value"="34694"}; +{"key"="4164";"subkey"="832";"value"="34703"}; +{"key"="4165";"subkey"="833";"value"="34713"}; +{"key"="4166";"subkey"="833";"value"="34722"}; +{"key"="4167";"subkey"="833";"value"="34731"}; +{"key"="4168";"subkey"="833";"value"="34741"}; +{"key"="4169";"subkey"="833";"value"="34750"}; +{"key"="4170";"subkey"="834";"value"="34759"}; +{"key"="4171";"subkey"="834";"value"="34769"}; +{"key"="4172";"subkey"="834";"value"="34778"}; +{"key"="4173";"subkey"="834";"value"="34787"}; +{"key"="4174";"subkey"="834";"value"="34797"}; +{"key"="4175";"subkey"="835";"value"="34806"}; +{"key"="4176";"subkey"="835";"value"="34815"}; +{"key"="4177";"subkey"="835";"value"="34825"}; +{"key"="4178";"subkey"="835";"value"="34834"}; +{"key"="4179";"subkey"="835";"value"="34843"}; +{"key"="4180";"subkey"="836";"value"="34853"}; +{"key"="4181";"subkey"="836";"value"="34862"}; +{"key"="4182";"subkey"="836";"value"="34871"}; +{"key"="4183";"subkey"="836";"value"="34881"}; +{"key"="4184";"subkey"="836";"value"="34890"}; +{"key"="4185";"subkey"="837";"value"="34899"}; +{"key"="4186";"subkey"="837";"value"="34909"}; +{"key"="4187";"subkey"="837";"value"="34918"}; +{"key"="4188";"subkey"="837";"value"="34927"}; +{"key"="4189";"subkey"="837";"value"="34937"}; +{"key"="4190";"subkey"="838";"value"="34946"}; +{"key"="4191";"subkey"="838";"value"="34955"}; +{"key"="4192";"subkey"="838";"value"="34965"}; +{"key"="4193";"subkey"="838";"value"="34974"}; +{"key"="4194";"subkey"="838";"value"="34983"}; +{"key"="4195";"subkey"="839";"value"="34993"}; +{"key"="4196";"subkey"="839";"value"="35002"}; +{"key"="4197";"subkey"="839";"value"="35011"}; +{"key"="4198";"subkey"="839";"value"="35021"}; +{"key"="4199";"subkey"="839";"value"="35030"}; +{"key"="4200";"subkey"="840";"value"="35039"}; +{"key"="4201";"subkey"="840";"value"="35049"}; +{"key"="4202";"subkey"="840";"value"="35058"}; +{"key"="4203";"subkey"="840";"value"="35067"}; +{"key"="4204";"subkey"="840";"value"="35077"}; +{"key"="4205";"subkey"="841";"value"="35086"}; +{"key"="4206";"subkey"="841";"value"="35095"}; +{"key"="4207";"subkey"="841";"value"="35105"}; +{"key"="4208";"subkey"="841";"value"="35114"}; +{"key"="4209";"subkey"="841";"value"="35124"}; +{"key"="4210";"subkey"="842";"value"="35133"}; +{"key"="4211";"subkey"="842";"value"="35142"}; +{"key"="4212";"subkey"="842";"value"="35152"}; +{"key"="4213";"subkey"="842";"value"="35161"}; +{"key"="4214";"subkey"="842";"value"="35170"}; +{"key"="4215";"subkey"="843";"value"="35180"}; +{"key"="4216";"subkey"="843";"value"="35189"}; +{"key"="4217";"subkey"="843";"value"="35198"}; +{"key"="4218";"subkey"="843";"value"="35208"}; +{"key"="4219";"subkey"="843";"value"="35217"}; +{"key"="4220";"subkey"="844";"value"="35226"}; +{"key"="4221";"subkey"="844";"value"="35236"}; +{"key"="4222";"subkey"="844";"value"="35245"}; +{"key"="4223";"subkey"="844";"value"="35254"}; +{"key"="4224";"subkey"="844";"value"="35264"}; +{"key"="4225";"subkey"="845";"value"="35273"}; +{"key"="4226";"subkey"="845";"value"="35282"}; +{"key"="4227";"subkey"="845";"value"="35292"}; +{"key"="4228";"subkey"="845";"value"="35301"}; +{"key"="4229";"subkey"="845";"value"="35310"}; +{"key"="4230";"subkey"="846";"value"="35320"}; +{"key"="4231";"subkey"="846";"value"="35329"}; +{"key"="4232";"subkey"="846";"value"="35339"}; +{"key"="4233";"subkey"="846";"value"="35348"}; +{"key"="4234";"subkey"="846";"value"="35357"}; +{"key"="4235";"subkey"="847";"value"="35367"}; +{"key"="4236";"subkey"="847";"value"="35376"}; +{"key"="4237";"subkey"="847";"value"="35385"}; +{"key"="4238";"subkey"="847";"value"="35395"}; +{"key"="4239";"subkey"="847";"value"="35404"}; +{"key"="4240";"subkey"="848";"value"="35413"}; +{"key"="4241";"subkey"="848";"value"="35423"}; +{"key"="4242";"subkey"="848";"value"="35432"}; +{"key"="4243";"subkey"="848";"value"="35441"}; +{"key"="4244";"subkey"="848";"value"="35451"}; +{"key"="4245";"subkey"="849";"value"="35460"}; +{"key"="4246";"subkey"="849";"value"="35469"}; +{"key"="4247";"subkey"="849";"value"="35479"}; +{"key"="4248";"subkey"="849";"value"="35488"}; +{"key"="4249";"subkey"="849";"value"="35498"}; +{"key"="4250";"subkey"="850";"value"="35507"}; +{"key"="4251";"subkey"="850";"value"="35516"}; +{"key"="4252";"subkey"="850";"value"="35526"}; +{"key"="4253";"subkey"="850";"value"="35535"}; +{"key"="4254";"subkey"="850";"value"="35544"}; +{"key"="4255";"subkey"="851";"value"="35554"}; +{"key"="4256";"subkey"="851";"value"="35563"}; +{"key"="4257";"subkey"="851";"value"="35572"}; +{"key"="4258";"subkey"="851";"value"="35582"}; +{"key"="4259";"subkey"="851";"value"="35591"}; +{"key"="4260";"subkey"="852";"value"="35600"}; +{"key"="4261";"subkey"="852";"value"="35610"}; +{"key"="4262";"subkey"="852";"value"="35619"}; +{"key"="4263";"subkey"="852";"value"="35628"}; +{"key"="4264";"subkey"="852";"value"="35638"}; +{"key"="4265";"subkey"="853";"value"="35647"}; +{"key"="4266";"subkey"="853";"value"="35657"}; +{"key"="4267";"subkey"="853";"value"="35666"}; +{"key"="4268";"subkey"="853";"value"="35675"}; +{"key"="4269";"subkey"="853";"value"="35685"}; +{"key"="4270";"subkey"="854";"value"="35694"}; +{"key"="4271";"subkey"="854";"value"="35703"}; +{"key"="4272";"subkey"="854";"value"="35713"}; +{"key"="4273";"subkey"="854";"value"="35722"}; +{"key"="4274";"subkey"="854";"value"="35731"}; +{"key"="4275";"subkey"="855";"value"="35741"}; +{"key"="4276";"subkey"="855";"value"="35750"}; +{"key"="4277";"subkey"="855";"value"="35760"}; +{"key"="4278";"subkey"="855";"value"="35769"}; +{"key"="4279";"subkey"="855";"value"="35778"}; +{"key"="4280";"subkey"="856";"value"="35788"}; +{"key"="4281";"subkey"="856";"value"="35797"}; +{"key"="4282";"subkey"="856";"value"="35806"}; +{"key"="4283";"subkey"="856";"value"="35816"}; +{"key"="4284";"subkey"="856";"value"="35825"}; +{"key"="4285";"subkey"="857";"value"="35834"}; +{"key"="4286";"subkey"="857";"value"="35844"}; +{"key"="4287";"subkey"="857";"value"="35853"}; +{"key"="4288";"subkey"="857";"value"="35863"}; +{"key"="4289";"subkey"="857";"value"="35872"}; +{"key"="4290";"subkey"="858";"value"="35881"}; +{"key"="4291";"subkey"="858";"value"="35891"}; +{"key"="4292";"subkey"="858";"value"="35900"}; +{"key"="4293";"subkey"="858";"value"="35909"}; +{"key"="4294";"subkey"="858";"value"="35919"}; +{"key"="4295";"subkey"="859";"value"="35928"}; +{"key"="4296";"subkey"="859";"value"="35937"}; +{"key"="4297";"subkey"="859";"value"="35947"}; +{"key"="4298";"subkey"="859";"value"="35956"}; +{"key"="4299";"subkey"="859";"value"="35966"}; +{"key"="4300";"subkey"="860";"value"="35975"}; +{"key"="4301";"subkey"="860";"value"="35984"}; +{"key"="4302";"subkey"="860";"value"="35994"}; +{"key"="4303";"subkey"="860";"value"="36003"}; +{"key"="4304";"subkey"="860";"value"="36012"}; +{"key"="4305";"subkey"="861";"value"="36022"}; +{"key"="4306";"subkey"="861";"value"="36031"}; +{"key"="4307";"subkey"="861";"value"="36040"}; +{"key"="4308";"subkey"="861";"value"="36050"}; +{"key"="4309";"subkey"="861";"value"="36059"}; +{"key"="4310";"subkey"="862";"value"="36069"}; +{"key"="4311";"subkey"="862";"value"="36078"}; +{"key"="4312";"subkey"="862";"value"="36087"}; +{"key"="4313";"subkey"="862";"value"="36097"}; +{"key"="4314";"subkey"="862";"value"="36106"}; +{"key"="4315";"subkey"="863";"value"="36115"}; +{"key"="4316";"subkey"="863";"value"="36125"}; +{"key"="4317";"subkey"="863";"value"="36134"}; +{"key"="4318";"subkey"="863";"value"="36144"}; +{"key"="4319";"subkey"="863";"value"="36153"}; +{"key"="4320";"subkey"="864";"value"="36162"}; +{"key"="4321";"subkey"="864";"value"="36172"}; +{"key"="4322";"subkey"="864";"value"="36181"}; +{"key"="4323";"subkey"="864";"value"="36190"}; +{"key"="4324";"subkey"="864";"value"="36200"}; +{"key"="4325";"subkey"="865";"value"="36209"}; +{"key"="4326";"subkey"="865";"value"="36218"}; +{"key"="4327";"subkey"="865";"value"="36228"}; +{"key"="4328";"subkey"="865";"value"="36237"}; +{"key"="4329";"subkey"="865";"value"="36247"}; +{"key"="4330";"subkey"="866";"value"="36256"}; +{"key"="4331";"subkey"="866";"value"="36265"}; +{"key"="4332";"subkey"="866";"value"="36275"}; +{"key"="4333";"subkey"="866";"value"="36284"}; +{"key"="4334";"subkey"="866";"value"="36293"}; +{"key"="4335";"subkey"="867";"value"="36303"}; +{"key"="4336";"subkey"="867";"value"="36312"}; +{"key"="4337";"subkey"="867";"value"="36322"}; +{"key"="4338";"subkey"="867";"value"="36331"}; +{"key"="4339";"subkey"="867";"value"="36340"}; +{"key"="4340";"subkey"="868";"value"="36350"}; +{"key"="4341";"subkey"="868";"value"="36359"}; +{"key"="4342";"subkey"="868";"value"="36368"}; +{"key"="4343";"subkey"="868";"value"="36378"}; +{"key"="4344";"subkey"="868";"value"="36387"}; +{"key"="4345";"subkey"="869";"value"="36397"}; +{"key"="4346";"subkey"="869";"value"="36406"}; +{"key"="4347";"subkey"="869";"value"="36415"}; +{"key"="4348";"subkey"="869";"value"="36425"}; +{"key"="4349";"subkey"="869";"value"="36434"}; +{"key"="4350";"subkey"="870";"value"="36444"}; +{"key"="4351";"subkey"="870";"value"="36453"}; +{"key"="4352";"subkey"="870";"value"="36462"}; +{"key"="4353";"subkey"="870";"value"="36472"}; +{"key"="4354";"subkey"="870";"value"="36481"}; +{"key"="4355";"subkey"="871";"value"="36490"}; +{"key"="4356";"subkey"="871";"value"="36500"}; +{"key"="4357";"subkey"="871";"value"="36509"}; +{"key"="4358";"subkey"="871";"value"="36519"}; +{"key"="4359";"subkey"="871";"value"="36528"}; +{"key"="4360";"subkey"="872";"value"="36537"}; +{"key"="4361";"subkey"="872";"value"="36547"}; +{"key"="4362";"subkey"="872";"value"="36556"}; +{"key"="4363";"subkey"="872";"value"="36565"}; +{"key"="4364";"subkey"="872";"value"="36575"}; +{"key"="4365";"subkey"="873";"value"="36584"}; +{"key"="4366";"subkey"="873";"value"="36594"}; +{"key"="4367";"subkey"="873";"value"="36603"}; +{"key"="4368";"subkey"="873";"value"="36612"}; +{"key"="4369";"subkey"="873";"value"="36622"}; +{"key"="4370";"subkey"="874";"value"="36631"}; +{"key"="4371";"subkey"="874";"value"="36640"}; +{"key"="4372";"subkey"="874";"value"="36650"}; +{"key"="4373";"subkey"="874";"value"="36659"}; +{"key"="4374";"subkey"="874";"value"="36669"}; +{"key"="4375";"subkey"="875";"value"="36678"}; +{"key"="4376";"subkey"="875";"value"="36687"}; +{"key"="4377";"subkey"="875";"value"="36697"}; +{"key"="4378";"subkey"="875";"value"="36706"}; +{"key"="4379";"subkey"="875";"value"="36716"}; +{"key"="4380";"subkey"="876";"value"="36725"}; +{"key"="4381";"subkey"="876";"value"="36734"}; +{"key"="4382";"subkey"="876";"value"="36744"}; +{"key"="4383";"subkey"="876";"value"="36753"}; +{"key"="4384";"subkey"="876";"value"="36762"}; +{"key"="4385";"subkey"="877";"value"="36772"}; +{"key"="4386";"subkey"="877";"value"="36781"}; +{"key"="4387";"subkey"="877";"value"="36791"}; +{"key"="4388";"subkey"="877";"value"="36800"}; +{"key"="4389";"subkey"="877";"value"="36809"}; +{"key"="4390";"subkey"="878";"value"="36819"}; +{"key"="4391";"subkey"="878";"value"="36828"}; +{"key"="4392";"subkey"="878";"value"="36838"}; +{"key"="4393";"subkey"="878";"value"="36847"}; +{"key"="4394";"subkey"="878";"value"="36856"}; +{"key"="4395";"subkey"="879";"value"="36866"}; +{"key"="4396";"subkey"="879";"value"="36875"}; +{"key"="4397";"subkey"="879";"value"="36885"}; +{"key"="4398";"subkey"="879";"value"="36894"}; +{"key"="4399";"subkey"="879";"value"="36903"}; +{"key"="4400";"subkey"="880";"value"="36913"}; +{"key"="4401";"subkey"="880";"value"="36922"}; +{"key"="4402";"subkey"="880";"value"="36931"}; +{"key"="4403";"subkey"="880";"value"="36941"}; +{"key"="4404";"subkey"="880";"value"="36950"}; +{"key"="4405";"subkey"="881";"value"="36960"}; +{"key"="4406";"subkey"="881";"value"="36969"}; +{"key"="4407";"subkey"="881";"value"="36978"}; +{"key"="4408";"subkey"="881";"value"="36988"}; +{"key"="4409";"subkey"="881";"value"="36997"}; +{"key"="4410";"subkey"="882";"value"="37007"}; +{"key"="4411";"subkey"="882";"value"="37016"}; +{"key"="4412";"subkey"="882";"value"="37025"}; +{"key"="4413";"subkey"="882";"value"="37035"}; +{"key"="4414";"subkey"="882";"value"="37044"}; +{"key"="4415";"subkey"="883";"value"="37054"}; +{"key"="4416";"subkey"="883";"value"="37063"}; +{"key"="4417";"subkey"="883";"value"="37072"}; +{"key"="4418";"subkey"="883";"value"="37082"}; +{"key"="4419";"subkey"="883";"value"="37091"}; +{"key"="4420";"subkey"="884";"value"="37101"}; +{"key"="4421";"subkey"="884";"value"="37110"}; +{"key"="4422";"subkey"="884";"value"="37119"}; +{"key"="4423";"subkey"="884";"value"="37129"}; +{"key"="4424";"subkey"="884";"value"="37138"}; +{"key"="4425";"subkey"="885";"value"="37147"}; +{"key"="4426";"subkey"="885";"value"="37157"}; +{"key"="4427";"subkey"="885";"value"="37166"}; +{"key"="4428";"subkey"="885";"value"="37176"}; +{"key"="4429";"subkey"="885";"value"="37185"}; +{"key"="4430";"subkey"="886";"value"="37194"}; +{"key"="4431";"subkey"="886";"value"="37204"}; +{"key"="4432";"subkey"="886";"value"="37213"}; +{"key"="4433";"subkey"="886";"value"="37223"}; +{"key"="4434";"subkey"="886";"value"="37232"}; +{"key"="4435";"subkey"="887";"value"="37241"}; +{"key"="4436";"subkey"="887";"value"="37251"}; +{"key"="4437";"subkey"="887";"value"="37260"}; +{"key"="4438";"subkey"="887";"value"="37270"}; +{"key"="4439";"subkey"="887";"value"="37279"}; +{"key"="4440";"subkey"="888";"value"="37288"}; +{"key"="4441";"subkey"="888";"value"="37298"}; +{"key"="4442";"subkey"="888";"value"="37307"}; +{"key"="4443";"subkey"="888";"value"="37317"}; +{"key"="4444";"subkey"="888";"value"="37326"}; +{"key"="4445";"subkey"="889";"value"="37335"}; +{"key"="4446";"subkey"="889";"value"="37345"}; +{"key"="4447";"subkey"="889";"value"="37354"}; +{"key"="4448";"subkey"="889";"value"="37364"}; +{"key"="4449";"subkey"="889";"value"="37373"}; +{"key"="4450";"subkey"="890";"value"="37382"}; +{"key"="4451";"subkey"="890";"value"="37392"}; +{"key"="4452";"subkey"="890";"value"="37401"}; +{"key"="4453";"subkey"="890";"value"="37411"}; +{"key"="4454";"subkey"="890";"value"="37420"}; +{"key"="4455";"subkey"="891";"value"="37429"}; +{"key"="4456";"subkey"="891";"value"="37439"}; +{"key"="4457";"subkey"="891";"value"="37448"}; +{"key"="4458";"subkey"="891";"value"="37458"}; +{"key"="4459";"subkey"="891";"value"="37467"}; +{"key"="4460";"subkey"="892";"value"="37476"}; +{"key"="4461";"subkey"="892";"value"="37486"}; +{"key"="4462";"subkey"="892";"value"="37495"}; +{"key"="4463";"subkey"="892";"value"="37505"}; +{"key"="4464";"subkey"="892";"value"="37514"}; +{"key"="4465";"subkey"="893";"value"="37523"}; +{"key"="4466";"subkey"="893";"value"="37533"}; +{"key"="4467";"subkey"="893";"value"="37542"}; +{"key"="4468";"subkey"="893";"value"="37552"}; +{"key"="4469";"subkey"="893";"value"="37561"}; +{"key"="4470";"subkey"="894";"value"="37570"}; +{"key"="4471";"subkey"="894";"value"="37580"}; +{"key"="4472";"subkey"="894";"value"="37589"}; +{"key"="4473";"subkey"="894";"value"="37599"}; +{"key"="4474";"subkey"="894";"value"="37608"}; +{"key"="4475";"subkey"="895";"value"="37618"}; +{"key"="4476";"subkey"="895";"value"="37627"}; +{"key"="4477";"subkey"="895";"value"="37636"}; +{"key"="4478";"subkey"="895";"value"="37646"}; +{"key"="4479";"subkey"="895";"value"="37655"}; +{"key"="4480";"subkey"="896";"value"="37665"}; +{"key"="4481";"subkey"="896";"value"="37674"}; +{"key"="4482";"subkey"="896";"value"="37683"}; +{"key"="4483";"subkey"="896";"value"="37693"}; +{"key"="4484";"subkey"="896";"value"="37702"}; +{"key"="4485";"subkey"="897";"value"="37712"}; +{"key"="4486";"subkey"="897";"value"="37721"}; +{"key"="4487";"subkey"="897";"value"="37730"}; +{"key"="4488";"subkey"="897";"value"="37740"}; +{"key"="4489";"subkey"="897";"value"="37749"}; +{"key"="4490";"subkey"="898";"value"="37759"}; +{"key"="4491";"subkey"="898";"value"="37768"}; +{"key"="4492";"subkey"="898";"value"="37777"}; +{"key"="4493";"subkey"="898";"value"="37787"}; +{"key"="4494";"subkey"="898";"value"="37796"}; +{"key"="4495";"subkey"="899";"value"="37806"}; +{"key"="4496";"subkey"="899";"value"="37815"}; +{"key"="4497";"subkey"="899";"value"="37825"}; +{"key"="4498";"subkey"="899";"value"="37834"}; +{"key"="4499";"subkey"="899";"value"="37843"}; +{"key"="4500";"subkey"="900";"value"="37853"}; +{"key"="4501";"subkey"="900";"value"="37862"}; +{"key"="4502";"subkey"="900";"value"="37872"}; +{"key"="4503";"subkey"="900";"value"="37881"}; +{"key"="4504";"subkey"="900";"value"="37890"}; +{"key"="4505";"subkey"="901";"value"="37900"}; +{"key"="4506";"subkey"="901";"value"="37909"}; +{"key"="4507";"subkey"="901";"value"="37919"}; +{"key"="4508";"subkey"="901";"value"="37928"}; +{"key"="4509";"subkey"="901";"value"="37937"}; +{"key"="4510";"subkey"="902";"value"="37947"}; +{"key"="4511";"subkey"="902";"value"="37956"}; +{"key"="4512";"subkey"="902";"value"="37966"}; +{"key"="4513";"subkey"="902";"value"="37975"}; +{"key"="4514";"subkey"="902";"value"="37985"}; +{"key"="4515";"subkey"="903";"value"="37994"}; +{"key"="4516";"subkey"="903";"value"="38003"}; +{"key"="4517";"subkey"="903";"value"="38013"}; +{"key"="4518";"subkey"="903";"value"="38022"}; +{"key"="4519";"subkey"="903";"value"="38032"}; +{"key"="4520";"subkey"="904";"value"="38041"}; +{"key"="4521";"subkey"="904";"value"="38050"}; +{"key"="4522";"subkey"="904";"value"="38060"}; +{"key"="4523";"subkey"="904";"value"="38069"}; +{"key"="4524";"subkey"="904";"value"="38079"}; +{"key"="4525";"subkey"="905";"value"="38088"}; +{"key"="4526";"subkey"="905";"value"="38098"}; +{"key"="4527";"subkey"="905";"value"="38107"}; +{"key"="4528";"subkey"="905";"value"="38116"}; +{"key"="4529";"subkey"="905";"value"="38126"}; +{"key"="4530";"subkey"="906";"value"="38135"}; +{"key"="4531";"subkey"="906";"value"="38145"}; +{"key"="4532";"subkey"="906";"value"="38154"}; +{"key"="4533";"subkey"="906";"value"="38163"}; +{"key"="4534";"subkey"="906";"value"="38173"}; +{"key"="4535";"subkey"="907";"value"="38182"}; +{"key"="4536";"subkey"="907";"value"="38192"}; +{"key"="4537";"subkey"="907";"value"="38201"}; +{"key"="4538";"subkey"="907";"value"="38211"}; +{"key"="4539";"subkey"="907";"value"="38220"}; +{"key"="4540";"subkey"="908";"value"="38229"}; +{"key"="4541";"subkey"="908";"value"="38239"}; +{"key"="4542";"subkey"="908";"value"="38248"}; +{"key"="4543";"subkey"="908";"value"="38258"}; +{"key"="4544";"subkey"="908";"value"="38267"}; +{"key"="4545";"subkey"="909";"value"="38277"}; +{"key"="4546";"subkey"="909";"value"="38286"}; +{"key"="4547";"subkey"="909";"value"="38295"}; +{"key"="4548";"subkey"="909";"value"="38305"}; +{"key"="4549";"subkey"="909";"value"="38314"}; +{"key"="4550";"subkey"="910";"value"="38324"}; +{"key"="4551";"subkey"="910";"value"="38333"}; +{"key"="4552";"subkey"="910";"value"="38342"}; +{"key"="4553";"subkey"="910";"value"="38352"}; +{"key"="4554";"subkey"="910";"value"="38361"}; +{"key"="4555";"subkey"="911";"value"="38371"}; +{"key"="4556";"subkey"="911";"value"="38380"}; +{"key"="4557";"subkey"="911";"value"="38390"}; +{"key"="4558";"subkey"="911";"value"="38399"}; +{"key"="4559";"subkey"="911";"value"="38408"}; +{"key"="4560";"subkey"="912";"value"="38418"}; +{"key"="4561";"subkey"="912";"value"="38427"}; +{"key"="4562";"subkey"="912";"value"="38437"}; +{"key"="4563";"subkey"="912";"value"="38446"}; +{"key"="4564";"subkey"="912";"value"="38456"}; +{"key"="4565";"subkey"="913";"value"="38465"}; +{"key"="4566";"subkey"="913";"value"="38474"}; +{"key"="4567";"subkey"="913";"value"="38484"}; +{"key"="4568";"subkey"="913";"value"="38493"}; +{"key"="4569";"subkey"="913";"value"="38503"}; +{"key"="4570";"subkey"="914";"value"="38512"}; +{"key"="4571";"subkey"="914";"value"="38522"}; +{"key"="4572";"subkey"="914";"value"="38531"}; +{"key"="4573";"subkey"="914";"value"="38540"}; +{"key"="4574";"subkey"="914";"value"="38550"}; +{"key"="4575";"subkey"="915";"value"="38559"}; +{"key"="4576";"subkey"="915";"value"="38569"}; +{"key"="4577";"subkey"="915";"value"="38578"}; +{"key"="4578";"subkey"="915";"value"="38588"}; +{"key"="4579";"subkey"="915";"value"="38597"}; +{"key"="4580";"subkey"="916";"value"="38606"}; +{"key"="4581";"subkey"="916";"value"="38616"}; +{"key"="4582";"subkey"="916";"value"="38625"}; +{"key"="4583";"subkey"="916";"value"="38635"}; +{"key"="4584";"subkey"="916";"value"="38644"}; +{"key"="4585";"subkey"="917";"value"="38654"}; +{"key"="4586";"subkey"="917";"value"="38663"}; +{"key"="4587";"subkey"="917";"value"="38672"}; +{"key"="4588";"subkey"="917";"value"="38682"}; +{"key"="4589";"subkey"="917";"value"="38691"}; +{"key"="4590";"subkey"="918";"value"="38701"}; +{"key"="4591";"subkey"="918";"value"="38710"}; +{"key"="4592";"subkey"="918";"value"="38720"}; +{"key"="4593";"subkey"="918";"value"="38729"}; +{"key"="4594";"subkey"="918";"value"="38738"}; +{"key"="4595";"subkey"="919";"value"="38748"}; +{"key"="4596";"subkey"="919";"value"="38757"}; +{"key"="4597";"subkey"="919";"value"="38767"}; +{"key"="4598";"subkey"="919";"value"="38776"}; +{"key"="4599";"subkey"="919";"value"="38786"}; +{"key"="4600";"subkey"="920";"value"="38795"}; +{"key"="4601";"subkey"="920";"value"="38804"}; +{"key"="4602";"subkey"="920";"value"="38814"}; +{"key"="4603";"subkey"="920";"value"="38823"}; +{"key"="4604";"subkey"="920";"value"="38833"}; +{"key"="4605";"subkey"="921";"value"="38842"}; +{"key"="4606";"subkey"="921";"value"="38852"}; +{"key"="4607";"subkey"="921";"value"="38861"}; +{"key"="4608";"subkey"="921";"value"="38871"}; +{"key"="4609";"subkey"="921";"value"="38880"}; +{"key"="4610";"subkey"="922";"value"="38889"}; +{"key"="4611";"subkey"="922";"value"="38899"}; +{"key"="4612";"subkey"="922";"value"="38908"}; +{"key"="4613";"subkey"="922";"value"="38918"}; +{"key"="4614";"subkey"="922";"value"="38927"}; +{"key"="4615";"subkey"="923";"value"="38937"}; +{"key"="4616";"subkey"="923";"value"="38946"}; +{"key"="4617";"subkey"="923";"value"="38955"}; +{"key"="4618";"subkey"="923";"value"="38965"}; +{"key"="4619";"subkey"="923";"value"="38974"}; +{"key"="4620";"subkey"="924";"value"="38984"}; +{"key"="4621";"subkey"="924";"value"="38993"}; +{"key"="4622";"subkey"="924";"value"="39003"}; +{"key"="4623";"subkey"="924";"value"="39012"}; +{"key"="4624";"subkey"="924";"value"="39022"}; +{"key"="4625";"subkey"="925";"value"="39031"}; +{"key"="4626";"subkey"="925";"value"="39040"}; +{"key"="4627";"subkey"="925";"value"="39050"}; +{"key"="4628";"subkey"="925";"value"="39059"}; +{"key"="4629";"subkey"="925";"value"="39069"}; +{"key"="4630";"subkey"="926";"value"="39078"}; +{"key"="4631";"subkey"="926";"value"="39088"}; +{"key"="4632";"subkey"="926";"value"="39097"}; +{"key"="4633";"subkey"="926";"value"="39106"}; +{"key"="4634";"subkey"="926";"value"="39116"}; +{"key"="4635";"subkey"="927";"value"="39125"}; +{"key"="4636";"subkey"="927";"value"="39135"}; +{"key"="4637";"subkey"="927";"value"="39144"}; +{"key"="4638";"subkey"="927";"value"="39154"}; +{"key"="4639";"subkey"="927";"value"="39163"}; +{"key"="4640";"subkey"="928";"value"="39173"}; +{"key"="4641";"subkey"="928";"value"="39182"}; +{"key"="4642";"subkey"="928";"value"="39191"}; +{"key"="4643";"subkey"="928";"value"="39201"}; +{"key"="4644";"subkey"="928";"value"="39210"}; +{"key"="4645";"subkey"="929";"value"="39220"}; +{"key"="4646";"subkey"="929";"value"="39229"}; +{"key"="4647";"subkey"="929";"value"="39239"}; +{"key"="4648";"subkey"="929";"value"="39248"}; +{"key"="4649";"subkey"="929";"value"="39258"}; +{"key"="4650";"subkey"="930";"value"="39267"}; +{"key"="4651";"subkey"="930";"value"="39276"}; +{"key"="4652";"subkey"="930";"value"="39286"}; +{"key"="4653";"subkey"="930";"value"="39295"}; +{"key"="4654";"subkey"="930";"value"="39305"}; +{"key"="4655";"subkey"="931";"value"="39314"}; +{"key"="4656";"subkey"="931";"value"="39324"}; +{"key"="4657";"subkey"="931";"value"="39333"}; +{"key"="4658";"subkey"="931";"value"="39343"}; +{"key"="4659";"subkey"="931";"value"="39352"}; +{"key"="4660";"subkey"="932";"value"="39361"}; +{"key"="4661";"subkey"="932";"value"="39371"}; +{"key"="4662";"subkey"="932";"value"="39380"}; +{"key"="4663";"subkey"="932";"value"="39390"}; +{"key"="4664";"subkey"="932";"value"="39399"}; +{"key"="4665";"subkey"="933";"value"="39409"}; +{"key"="4666";"subkey"="933";"value"="39418"}; +{"key"="4667";"subkey"="933";"value"="39428"}; +{"key"="4668";"subkey"="933";"value"="39437"}; +{"key"="4669";"subkey"="933";"value"="39446"}; +{"key"="4670";"subkey"="934";"value"="39456"}; +{"key"="4671";"subkey"="934";"value"="39465"}; +{"key"="4672";"subkey"="934";"value"="39475"}; +{"key"="4673";"subkey"="934";"value"="39484"}; +{"key"="4674";"subkey"="934";"value"="39494"}; +{"key"="4675";"subkey"="935";"value"="39503"}; +{"key"="4676";"subkey"="935";"value"="39513"}; +{"key"="4677";"subkey"="935";"value"="39522"}; +{"key"="4678";"subkey"="935";"value"="39532"}; +{"key"="4679";"subkey"="935";"value"="39541"}; +{"key"="4680";"subkey"="936";"value"="39550"}; +{"key"="4681";"subkey"="936";"value"="39560"}; +{"key"="4682";"subkey"="936";"value"="39569"}; +{"key"="4683";"subkey"="936";"value"="39579"}; +{"key"="4684";"subkey"="936";"value"="39588"}; +{"key"="4685";"subkey"="937";"value"="39598"}; +{"key"="4686";"subkey"="937";"value"="39607"}; +{"key"="4687";"subkey"="937";"value"="39617"}; +{"key"="4688";"subkey"="937";"value"="39626"}; +{"key"="4689";"subkey"="937";"value"="39635"}; +{"key"="4690";"subkey"="938";"value"="39645"}; +{"key"="4691";"subkey"="938";"value"="39654"}; +{"key"="4692";"subkey"="938";"value"="39664"}; +{"key"="4693";"subkey"="938";"value"="39673"}; +{"key"="4694";"subkey"="938";"value"="39683"}; +{"key"="4695";"subkey"="939";"value"="39692"}; +{"key"="4696";"subkey"="939";"value"="39702"}; +{"key"="4697";"subkey"="939";"value"="39711"}; +{"key"="4698";"subkey"="939";"value"="39721"}; +{"key"="4699";"subkey"="939";"value"="39730"}; +{"key"="4700";"subkey"="940";"value"="39739"}; +{"key"="4701";"subkey"="940";"value"="39749"}; +{"key"="4702";"subkey"="940";"value"="39758"}; +{"key"="4703";"subkey"="940";"value"="39768"}; +{"key"="4704";"subkey"="940";"value"="39777"}; +{"key"="4705";"subkey"="941";"value"="39787"}; +{"key"="4706";"subkey"="941";"value"="39796"}; +{"key"="4707";"subkey"="941";"value"="39806"}; +{"key"="4708";"subkey"="941";"value"="39815"}; +{"key"="4709";"subkey"="941";"value"="39825"}; +{"key"="4710";"subkey"="942";"value"="39834"}; +{"key"="4711";"subkey"="942";"value"="39844"}; +{"key"="4712";"subkey"="942";"value"="39853"}; +{"key"="4713";"subkey"="942";"value"="39862"}; +{"key"="4714";"subkey"="942";"value"="39872"}; +{"key"="4715";"subkey"="943";"value"="39881"}; +{"key"="4716";"subkey"="943";"value"="39891"}; +{"key"="4717";"subkey"="943";"value"="39900"}; +{"key"="4718";"subkey"="943";"value"="39910"}; +{"key"="4719";"subkey"="943";"value"="39919"}; +{"key"="4720";"subkey"="944";"value"="39929"}; +{"key"="4721";"subkey"="944";"value"="39938"}; +{"key"="4722";"subkey"="944";"value"="39948"}; +{"key"="4723";"subkey"="944";"value"="39957"}; +{"key"="4724";"subkey"="944";"value"="39966"}; +{"key"="4725";"subkey"="945";"value"="39976"}; +{"key"="4726";"subkey"="945";"value"="39985"}; +{"key"="4727";"subkey"="945";"value"="39995"}; +{"key"="4728";"subkey"="945";"value"="40004"}; +{"key"="4729";"subkey"="945";"value"="40014"}; +{"key"="4730";"subkey"="946";"value"="40023"}; +{"key"="4731";"subkey"="946";"value"="40033"}; +{"key"="4732";"subkey"="946";"value"="40042"}; +{"key"="4733";"subkey"="946";"value"="40052"}; +{"key"="4734";"subkey"="946";"value"="40061"}; +{"key"="4735";"subkey"="947";"value"="40071"}; +{"key"="4736";"subkey"="947";"value"="40080"}; +{"key"="4737";"subkey"="947";"value"="40089"}; +{"key"="4738";"subkey"="947";"value"="40099"}; +{"key"="4739";"subkey"="947";"value"="40108"}; +{"key"="4740";"subkey"="948";"value"="40118"}; +{"key"="4741";"subkey"="948";"value"="40127"}; +{"key"="4742";"subkey"="948";"value"="40137"}; +{"key"="4743";"subkey"="948";"value"="40146"}; +{"key"="4744";"subkey"="948";"value"="40156"}; +{"key"="4745";"subkey"="949";"value"="40165"}; +{"key"="4746";"subkey"="949";"value"="40175"}; +{"key"="4747";"subkey"="949";"value"="40184"}; +{"key"="4748";"subkey"="949";"value"="40194"}; +{"key"="4749";"subkey"="949";"value"="40203"}; +{"key"="4750";"subkey"="950";"value"="40213"}; +{"key"="4751";"subkey"="950";"value"="40222"}; +{"key"="4752";"subkey"="950";"value"="40231"}; +{"key"="4753";"subkey"="950";"value"="40241"}; +{"key"="4754";"subkey"="950";"value"="40250"}; +{"key"="4755";"subkey"="951";"value"="40260"}; +{"key"="4756";"subkey"="951";"value"="40269"}; +{"key"="4757";"subkey"="951";"value"="40279"}; +{"key"="4758";"subkey"="951";"value"="40288"}; +{"key"="4759";"subkey"="951";"value"="40298"}; +{"key"="4760";"subkey"="952";"value"="40307"}; +{"key"="4761";"subkey"="952";"value"="40317"}; +{"key"="4762";"subkey"="952";"value"="40326"}; +{"key"="4763";"subkey"="952";"value"="40336"}; +{"key"="4764";"subkey"="952";"value"="40345"}; +{"key"="4765";"subkey"="953";"value"="40355"}; +{"key"="4766";"subkey"="953";"value"="40364"}; +{"key"="4767";"subkey"="953";"value"="40373"}; +{"key"="4768";"subkey"="953";"value"="40383"}; +{"key"="4769";"subkey"="953";"value"="40392"}; +{"key"="4770";"subkey"="954";"value"="40402"}; +{"key"="4771";"subkey"="954";"value"="40411"}; +{"key"="4772";"subkey"="954";"value"="40421"}; +{"key"="4773";"subkey"="954";"value"="40430"}; +{"key"="4774";"subkey"="954";"value"="40440"}; +{"key"="4775";"subkey"="955";"value"="40449"}; +{"key"="4776";"subkey"="955";"value"="40459"}; +{"key"="4777";"subkey"="955";"value"="40468"}; +{"key"="4778";"subkey"="955";"value"="40478"}; +{"key"="4779";"subkey"="955";"value"="40487"}; +{"key"="4780";"subkey"="956";"value"="40497"}; +{"key"="4781";"subkey"="956";"value"="40506"}; +{"key"="4782";"subkey"="956";"value"="40516"}; +{"key"="4783";"subkey"="956";"value"="40525"}; +{"key"="4784";"subkey"="956";"value"="40534"}; +{"key"="4785";"subkey"="957";"value"="40544"}; +{"key"="4786";"subkey"="957";"value"="40553"}; +{"key"="4787";"subkey"="957";"value"="40563"}; +{"key"="4788";"subkey"="957";"value"="40572"}; +{"key"="4789";"subkey"="957";"value"="40582"}; +{"key"="4790";"subkey"="958";"value"="40591"}; +{"key"="4791";"subkey"="958";"value"="40601"}; +{"key"="4792";"subkey"="958";"value"="40610"}; +{"key"="4793";"subkey"="958";"value"="40620"}; +{"key"="4794";"subkey"="958";"value"="40629"}; +{"key"="4795";"subkey"="959";"value"="40639"}; +{"key"="4796";"subkey"="959";"value"="40648"}; +{"key"="4797";"subkey"="959";"value"="40658"}; +{"key"="4798";"subkey"="959";"value"="40667"}; +{"key"="4799";"subkey"="959";"value"="40677"}; +{"key"="4800";"subkey"="960";"value"="40686"}; +{"key"="4801";"subkey"="960";"value"="40696"}; +{"key"="4802";"subkey"="960";"value"="40705"}; +{"key"="4803";"subkey"="960";"value"="40715"}; +{"key"="4804";"subkey"="960";"value"="40724"}; +{"key"="4805";"subkey"="961";"value"="40733"}; +{"key"="4806";"subkey"="961";"value"="40743"}; +{"key"="4807";"subkey"="961";"value"="40752"}; +{"key"="4808";"subkey"="961";"value"="40762"}; +{"key"="4809";"subkey"="961";"value"="40771"}; +{"key"="4810";"subkey"="962";"value"="40781"}; +{"key"="4811";"subkey"="962";"value"="40790"}; +{"key"="4812";"subkey"="962";"value"="40800"}; +{"key"="4813";"subkey"="962";"value"="40809"}; +{"key"="4814";"subkey"="962";"value"="40819"}; +{"key"="4815";"subkey"="963";"value"="40828"}; +{"key"="4816";"subkey"="963";"value"="40838"}; +{"key"="4817";"subkey"="963";"value"="40847"}; +{"key"="4818";"subkey"="963";"value"="40857"}; +{"key"="4819";"subkey"="963";"value"="40866"}; +{"key"="4820";"subkey"="964";"value"="40876"}; +{"key"="4821";"subkey"="964";"value"="40885"}; +{"key"="4822";"subkey"="964";"value"="40895"}; +{"key"="4823";"subkey"="964";"value"="40904"}; +{"key"="4824";"subkey"="964";"value"="40914"}; +{"key"="4825";"subkey"="965";"value"="40923"}; +{"key"="4826";"subkey"="965";"value"="40933"}; +{"key"="4827";"subkey"="965";"value"="40942"}; +{"key"="4828";"subkey"="965";"value"="40952"}; +{"key"="4829";"subkey"="965";"value"="40961"}; +{"key"="4830";"subkey"="966";"value"="40970"}; +{"key"="4831";"subkey"="966";"value"="40980"}; +{"key"="4832";"subkey"="966";"value"="40989"}; +{"key"="4833";"subkey"="966";"value"="40999"}; +{"key"="4834";"subkey"="966";"value"="41008"}; +{"key"="4835";"subkey"="967";"value"="41018"}; +{"key"="4836";"subkey"="967";"value"="41027"}; +{"key"="4837";"subkey"="967";"value"="41037"}; +{"key"="4838";"subkey"="967";"value"="41046"}; +{"key"="4839";"subkey"="967";"value"="41056"}; +{"key"="4840";"subkey"="968";"value"="41065"}; +{"key"="4841";"subkey"="968";"value"="41075"}; +{"key"="4842";"subkey"="968";"value"="41084"}; +{"key"="4843";"subkey"="968";"value"="41094"}; +{"key"="4844";"subkey"="968";"value"="41103"}; +{"key"="4845";"subkey"="969";"value"="41113"}; +{"key"="4846";"subkey"="969";"value"="41122"}; +{"key"="4847";"subkey"="969";"value"="41132"}; +{"key"="4848";"subkey"="969";"value"="41141"}; +{"key"="4849";"subkey"="969";"value"="41151"}; +{"key"="4850";"subkey"="970";"value"="41160"}; +{"key"="4851";"subkey"="970";"value"="41170"}; +{"key"="4852";"subkey"="970";"value"="41179"}; +{"key"="4853";"subkey"="970";"value"="41189"}; +{"key"="4854";"subkey"="970";"value"="41198"}; +{"key"="4855";"subkey"="971";"value"="41208"}; +{"key"="4856";"subkey"="971";"value"="41217"}; +{"key"="4857";"subkey"="971";"value"="41227"}; +{"key"="4858";"subkey"="971";"value"="41236"}; +{"key"="4859";"subkey"="971";"value"="41246"}; +{"key"="4860";"subkey"="972";"value"="41255"}; +{"key"="4861";"subkey"="972";"value"="41265"}; +{"key"="4862";"subkey"="972";"value"="41274"}; +{"key"="4863";"subkey"="972";"value"="41284"}; +{"key"="4864";"subkey"="972";"value"="41293"}; +{"key"="4865";"subkey"="973";"value"="41302"}; +{"key"="4866";"subkey"="973";"value"="41312"}; +{"key"="4867";"subkey"="973";"value"="41321"}; +{"key"="4868";"subkey"="973";"value"="41331"}; +{"key"="4869";"subkey"="973";"value"="41340"}; +{"key"="4870";"subkey"="974";"value"="41350"}; +{"key"="4871";"subkey"="974";"value"="41359"}; +{"key"="4872";"subkey"="974";"value"="41369"}; +{"key"="4873";"subkey"="974";"value"="41378"}; +{"key"="4874";"subkey"="974";"value"="41388"}; +{"key"="4875";"subkey"="975";"value"="41397"}; +{"key"="4876";"subkey"="975";"value"="41407"}; +{"key"="4877";"subkey"="975";"value"="41416"}; +{"key"="4878";"subkey"="975";"value"="41426"}; +{"key"="4879";"subkey"="975";"value"="41435"}; +{"key"="4880";"subkey"="976";"value"="41445"}; +{"key"="4881";"subkey"="976";"value"="41454"}; +{"key"="4882";"subkey"="976";"value"="41464"}; +{"key"="4883";"subkey"="976";"value"="41473"}; +{"key"="4884";"subkey"="976";"value"="41483"}; +{"key"="4885";"subkey"="977";"value"="41492"}; +{"key"="4886";"subkey"="977";"value"="41502"}; +{"key"="4887";"subkey"="977";"value"="41511"}; +{"key"="4888";"subkey"="977";"value"="41521"}; +{"key"="4889";"subkey"="977";"value"="41530"}; +{"key"="4890";"subkey"="978";"value"="41540"}; +{"key"="4891";"subkey"="978";"value"="41549"}; +{"key"="4892";"subkey"="978";"value"="41559"}; +{"key"="4893";"subkey"="978";"value"="41568"}; +{"key"="4894";"subkey"="978";"value"="41578"}; +{"key"="4895";"subkey"="979";"value"="41587"}; +{"key"="4896";"subkey"="979";"value"="41597"}; +{"key"="4897";"subkey"="979";"value"="41606"}; +{"key"="4898";"subkey"="979";"value"="41616"}; +{"key"="4899";"subkey"="979";"value"="41625"}; +{"key"="4900";"subkey"="980";"value"="41635"}; +{"key"="4901";"subkey"="980";"value"="41644"}; +{"key"="4902";"subkey"="980";"value"="41654"}; +{"key"="4903";"subkey"="980";"value"="41663"}; +{"key"="4904";"subkey"="980";"value"="41673"}; +{"key"="4905";"subkey"="981";"value"="41682"}; +{"key"="4906";"subkey"="981";"value"="41692"}; +{"key"="4907";"subkey"="981";"value"="41701"}; +{"key"="4908";"subkey"="981";"value"="41711"}; +{"key"="4909";"subkey"="981";"value"="41720"}; +{"key"="4910";"subkey"="982";"value"="41730"}; +{"key"="4911";"subkey"="982";"value"="41739"}; +{"key"="4912";"subkey"="982";"value"="41749"}; +{"key"="4913";"subkey"="982";"value"="41758"}; +{"key"="4914";"subkey"="982";"value"="41768"}; +{"key"="4915";"subkey"="983";"value"="41777"}; +{"key"="4916";"subkey"="983";"value"="41787"}; +{"key"="4917";"subkey"="983";"value"="41796"}; +{"key"="4918";"subkey"="983";"value"="41806"}; +{"key"="4919";"subkey"="983";"value"="41815"}; +{"key"="4920";"subkey"="984";"value"="41825"}; +{"key"="4921";"subkey"="984";"value"="41834"}; +{"key"="4922";"subkey"="984";"value"="41844"}; +{"key"="4923";"subkey"="984";"value"="41853"}; +{"key"="4924";"subkey"="984";"value"="41863"}; +{"key"="4925";"subkey"="985";"value"="41872"}; +{"key"="4926";"subkey"="985";"value"="41882"}; +{"key"="4927";"subkey"="985";"value"="41891"}; +{"key"="4928";"subkey"="985";"value"="41901"}; +{"key"="4929";"subkey"="985";"value"="41910"}; +{"key"="4930";"subkey"="986";"value"="41920"}; +{"key"="4931";"subkey"="986";"value"="41929"}; +{"key"="4932";"subkey"="986";"value"="41939"}; +{"key"="4933";"subkey"="986";"value"="41948"}; +{"key"="4934";"subkey"="986";"value"="41958"}; +{"key"="4935";"subkey"="987";"value"="41967"}; +{"key"="4936";"subkey"="987";"value"="41977"}; +{"key"="4937";"subkey"="987";"value"="41986"}; +{"key"="4938";"subkey"="987";"value"="41996"}; +{"key"="4939";"subkey"="987";"value"="42005"}; +{"key"="4940";"subkey"="988";"value"="42015"}; +{"key"="4941";"subkey"="988";"value"="42024"}; +{"key"="4942";"subkey"="988";"value"="42034"}; +{"key"="4943";"subkey"="988";"value"="42043"}; +{"key"="4944";"subkey"="988";"value"="42053"}; +{"key"="4945";"subkey"="989";"value"="42062"}; +{"key"="4946";"subkey"="989";"value"="42072"}; +{"key"="4947";"subkey"="989";"value"="42081"}; +{"key"="4948";"subkey"="989";"value"="42091"}; +{"key"="4949";"subkey"="989";"value"="42100"}; +{"key"="4950";"subkey"="990";"value"="42110"}; +{"key"="4951";"subkey"="990";"value"="42119"}; +{"key"="4952";"subkey"="990";"value"="42129"}; +{"key"="4953";"subkey"="990";"value"="42138"}; +{"key"="4954";"subkey"="990";"value"="42148"}; +{"key"="4955";"subkey"="991";"value"="42157"}; +{"key"="4956";"subkey"="991";"value"="42167"}; +{"key"="4957";"subkey"="991";"value"="42176"}; +{"key"="4958";"subkey"="991";"value"="42186"}; +{"key"="4959";"subkey"="991";"value"="42195"}; +{"key"="4960";"subkey"="992";"value"="42205"}; +{"key"="4961";"subkey"="992";"value"="42214"}; +{"key"="4962";"subkey"="992";"value"="42224"}; +{"key"="4963";"subkey"="992";"value"="42233"}; +{"key"="4964";"subkey"="992";"value"="42243"}; +{"key"="4965";"subkey"="993";"value"="42252"}; +{"key"="4966";"subkey"="993";"value"="42262"}; +{"key"="4967";"subkey"="993";"value"="42272"}; +{"key"="4968";"subkey"="993";"value"="42281"}; +{"key"="4969";"subkey"="993";"value"="42291"}; +{"key"="4970";"subkey"="994";"value"="42300"}; +{"key"="4971";"subkey"="994";"value"="42310"}; +{"key"="4972";"subkey"="994";"value"="42319"}; +{"key"="4973";"subkey"="994";"value"="42329"}; +{"key"="4974";"subkey"="994";"value"="42338"}; +{"key"="4975";"subkey"="995";"value"="42348"}; +{"key"="4976";"subkey"="995";"value"="42357"}; +{"key"="4977";"subkey"="995";"value"="42367"}; +{"key"="4978";"subkey"="995";"value"="42376"}; +{"key"="4979";"subkey"="995";"value"="42386"}; +{"key"="4980";"subkey"="996";"value"="42395"}; +{"key"="4981";"subkey"="996";"value"="42405"}; +{"key"="4982";"subkey"="996";"value"="42414"}; +{"key"="4983";"subkey"="996";"value"="42424"}; +{"key"="4984";"subkey"="996";"value"="42433"}; +{"key"="4985";"subkey"="997";"value"="42443"}; +{"key"="4986";"subkey"="997";"value"="42452"}; +{"key"="4987";"subkey"="997";"value"="42462"}; +{"key"="4988";"subkey"="997";"value"="42471"}; +{"key"="4989";"subkey"="997";"value"="42481"}; +{"key"="4990";"subkey"="998";"value"="42490"}; +{"key"="4991";"subkey"="998";"value"="42500"}; +{"key"="4992";"subkey"="998";"value"="42509"}; +{"key"="4993";"subkey"="998";"value"="42519"}; +{"key"="4994";"subkey"="998";"value"="42528"}; +{"key"="4995";"subkey"="999";"value"="42538"}; +{"key"="4996";"subkey"="999";"value"="42547"}; +{"key"="4997";"subkey"="999";"value"="42557"}; +{"key"="4998";"subkey"="999";"value"="42566"}; +{"key"="4999";"subkey"="999";"value"="42576"}; diff --git a/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql new file mode 100644 index 00000000000..b092c781271 --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/test/cases/Basic.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +SELECT + HyperLogLog(key) AS str, + CountDistinctEstimate(CAST(subkey AS Double)) AS `double`, + HLL(CAST(value AS Int64), 18) AS `int` +FROM Input; + diff --git a/yql/essentials/udfs/common/hyperloglog/test/ya.make b/yql/essentials/udfs/common/hyperloglog/test/ya.make new file mode 100644 index 00000000000..5eac077dcfa --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/test/ya.make @@ -0,0 +1,16 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS( + yql/essentials/udfs/common/hyperloglog + yql/essentials/udfs/common/digest +) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/hyperloglog/ya.make b/yql/essentials/udfs/common/hyperloglog/ya.make new file mode 100644 index 00000000000..b89a154d66b --- /dev/null +++ b/yql/essentials/udfs/common/hyperloglog/ya.make @@ -0,0 +1,32 @@ +IF (YQL_PACKAGED) + PACKAGE() + + FROM_SANDBOX( + FILE 7319897411 OUT_NOAUTO libhyperloglog_udf.so + ) + + END() +ELSE() +YQL_UDF_CONTRIB(hyperloglog_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + hyperloglog_udf.cpp + ) + + PEERDIR( + library/cpp/hyperloglog + ) + + END() + +ENDIF() + +RECURSE_FOR_TESTS( + test +)
\ No newline at end of file diff --git a/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp new file mode 100644 index 00000000000..6559e4a8425 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp @@ -0,0 +1,477 @@ +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_registrator.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <library/cpp/regex/hyperscan/hyperscan.h> +#include <library/cpp/regex/pcre/regexp.h> + +#include <util/charset/utf8.h> +#include <util/string/split.h> +#include <util/string/builder.h> +#include <util/system/cpu_id.h> + +using namespace NHyperscan; +using namespace NKikimr; +using namespace NUdf; + +namespace { + using TOptions = ui32; + class THyperscanUdfBase: public TBoxedValue { + protected: + constexpr static const char* IGNORE_CASE_PREFIX = "(?i)"; + static void SetCommonOptions(TString& regex, TOptions& options) { + options |= HS_FLAG_ALLOWEMPTY; + if (regex.StartsWith(IGNORE_CASE_PREFIX)) { + options |= HS_FLAG_CASELESS; + regex = regex.substr(4); + } + if (UTF8Detect(regex) == UTF8) { + options |= HS_FLAG_UTF8; + } + if (NX86::HaveAVX2()) { + options |= HS_CPU_FEATURES_AVX2; + } + } + }; + + class THyperscanMatch: public THyperscanUdfBase { + public: + enum class EMode { + NORMAL, + BACKTRACKING, + MULTI + }; + + class TFactory: public THyperscanUdfBase { + public: + TFactory( + TSourcePosition pos, + bool surroundMode, + THyperscanMatch::EMode mode, + size_t regexpsCount = 0) + : Pos_(pos) + , SurroundMode(surroundMode) + , Mode(mode) + , RegexpsCount(regexpsCount) + { + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + return TUnboxedValuePod( + new THyperscanMatch( + valueBuilder, + args[0], + SurroundMode, + Mode, + Pos_, + RegexpsCount)); + } + + TSourcePosition Pos_; + bool SurroundMode; + THyperscanMatch::EMode Mode; + size_t RegexpsCount; + }; + + static const TStringRef& Name(bool isGrep, THyperscanMatch::EMode mode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto backtrackingMatch = TStringRef::Of("BacktrackingMatch"); + static auto backtrackingGrep = TStringRef::Of("BacktrackingGrep"); + static auto multiMatch = TStringRef::Of("MultiMatch"); + static auto multiGrep = TStringRef::Of("MultiGrep"); + if (isGrep) { + switch (mode) { + case THyperscanMatch::EMode::NORMAL: + return grep; + case THyperscanMatch::EMode::BACKTRACKING: + return backtrackingGrep; + case THyperscanMatch::EMode::MULTI: + return multiGrep; + } + } else { + switch (mode) { + case THyperscanMatch::EMode::NORMAL: + return match; + case THyperscanMatch::EMode::BACKTRACKING: + return backtrackingMatch; + case THyperscanMatch::EMode::MULTI: + return multiMatch; + } + } + + Y_ABORT("Unexpected"); + } + + THyperscanMatch( + const IValueBuilder*, + const TUnboxedValuePod& runConfig, + bool surroundMode, + THyperscanMatch::EMode mode, + TSourcePosition pos, + size_t regexpsCount) + : Regex_(runConfig.AsStringRef()) + , Mode(mode) + , Pos_(pos) + , RegexpsCount(regexpsCount) + { + try { + TOptions options = 0; + int pcreOptions = REG_EXTENDED; + if (Mode == THyperscanMatch::EMode::BACKTRACKING && Regex_.StartsWith(IGNORE_CASE_PREFIX)) { + pcreOptions |= REG_ICASE; + } + auto regex = Regex_; + SetCommonOptions(regex, options); + switch (mode) { + case THyperscanMatch::EMode::NORMAL: { + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + Database_ = Compile(regex, options); + break; + } + case THyperscanMatch::EMode::BACKTRACKING: { + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + try { + Database_ = Compile(regex, options); + Mode = THyperscanMatch::EMode::NORMAL; + } catch (const TCompileException&) { + options |= HS_FLAG_PREFILTER; + Database_ = Compile(regex, options); + Fallback_ = TRegExMatch(regex, pcreOptions); + } + break; + } + case THyperscanMatch::EMode::MULTI: { + std::vector<TString> regexes; + TVector<const char*> cregexes; + TVector<TOptions> flags; + TVector<TOptions> ids; + + const auto func = [®exes, &flags, surroundMode](const std::string_view& token) { + TString regex(token); + + TOptions opt = 0; + SetCommonOptions(regex, opt); + + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + + regexes.emplace_back(std::move(regex)); + flags.emplace_back(opt); + }; + StringSplitter(Regex_).Split('\n').Consume(func); + + std::transform(regexes.cbegin(), regexes.cend(), std::back_inserter(cregexes), std::bind(&TString::c_str, std::placeholders::_1)); + ids.resize(regexes.size()); + std::iota(ids.begin(), ids.end(), 0); + + Database_ = CompileMulti(cregexes, flags, ids); + break; + } + } + Scratch_ = MakeScratch(Database_); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + TUnboxedValue* items = nullptr; + TUnboxedValue tuple; + size_t i = 0; + + if (Mode == THyperscanMatch::EMode::MULTI) { + tuple = valueBuilder->NewArray(RegexpsCount, items); + for (i = 0; i < RegexpsCount; ++i) { + items[i] = TUnboxedValuePod(false); + } + } + + if (args[0]) { + // XXX: StringRef data might not be a NTBS, though the function + // <TRegExMatch::Match> expects ASCIIZ string. Explicitly copy + // the given argument string and append the NUL terminator to it. + const TString input(args[0].AsStringRef()); + if (Y_UNLIKELY(Mode == THyperscanMatch::EMode::MULTI)) { + auto callback = [items] (TOptions id, ui64 /* from */, ui64 /* to */) { + items[id] = TUnboxedValuePod(true); + }; + Scan(Database_, Scratch_, input, callback); + return tuple; + } else { + bool matches = Matches(Database_, Scratch_, input); + if (matches && Mode == THyperscanMatch::EMode::BACKTRACKING) { + matches = Fallback_.Match(input.data()); + } + return TUnboxedValuePod(matches); + } + + } else { + return Mode == THyperscanMatch::EMode::MULTI ? tuple : TUnboxedValue(TUnboxedValuePod(false)); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + private: + const TString Regex_; + THyperscanMatch::EMode Mode; + const TSourcePosition Pos_; + const size_t RegexpsCount; + TDatabase Database_; + TScratch Scratch_; + TRegExMatch Fallback_; + }; + + class THyperscanCapture: public THyperscanUdfBase { + public: + class TFactory: public THyperscanUdfBase { + public: + TFactory(TSourcePosition pos) + : Pos_(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder*, + const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new THyperscanCapture(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + private: + TSourcePosition Pos_; + }; + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Capture"); + return name; + } + + THyperscanCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + { + Regex_ = runConfig.AsStringRef(); + TOptions options = HS_FLAG_SOM_LEFTMOST; + + SetCommonOptions(Regex_, options); + + Database_ = Compile(Regex_, options); + Scratch_ = MakeScratch(Database_); + } + + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (const auto arg = args[0]) { + + TUnboxedValue result; + auto callback = [valueBuilder, arg, &result] (TOptions id, ui64 from, ui64 to) { + Y_UNUSED(id); + if (!result) { + result = valueBuilder->SubString(arg, from, to); + } + }; + Scan(Database_, Scratch_, arg.AsStringRef(), callback); + return result; + } + + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + TSourcePosition Pos_; + TString Regex_; + TDatabase Database_; + TScratch Scratch_; + }; + + class THyperscanReplace: public THyperscanUdfBase { + public: + class TFactory: public THyperscanUdfBase { + public: + TFactory(TSourcePosition pos) + : Pos_(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder*, + const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new THyperscanReplace(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + private: + TSourcePosition Pos_; + }; + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Replace"); + return name; + } + + THyperscanReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + { + Regex_ = runConfig.AsStringRef(); + TOptions options = HS_FLAG_SOM_LEFTMOST; + + SetCommonOptions(Regex_, options); + + + Database_ = Compile(Regex_, options); + Scratch_ = MakeScratch(Database_); + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view replacement(args[1].AsStringRef()); + + ui64 index = 0; + TStringBuilder result; + auto callback = [input, replacement, &index, &result] (TOptions id, ui64 from, ui64 to) { + Y_UNUSED(id); + if (index != from) { + result << input.substr(index, from - index); + } + result << replacement; + index = to; + }; + Scan(Database_, Scratch_, input, callback); + + if (!index) { + return args[0]; + } + + result << input.substr(index); + return valueBuilder->NewString(result); + } + + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + TSourcePosition Pos_; + TString Regex_; + TDatabase Database_; + TScratch Scratch_; + }; + + class THyperscanModule: public IUdfModule { + public: + TStringRef Name() const { + return TStringRef::Of("Hyperscan"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL)); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL)); + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING)); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING)); + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); + sink.Add(THyperscanCapture::Name()); + sink.Add(THyperscanReplace::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + try { + Y_UNUSED(userType); + + bool typesOnly = (flags & TFlags::TypesOnly); + bool isMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL) == name); + bool isGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL) == name); + bool isBacktrackingMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING) == name); + bool isBacktrackingGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING) == name); + bool isMultiMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI) == name); + bool isMultiGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI) == name); + + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isGrep, THyperscanMatch::EMode::NORMAL)); + } + } else if (isBacktrackingMatch || isBacktrackingGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isBacktrackingGrep, THyperscanMatch::EMode::BACKTRACKING)); + } + } else if (isMultiMatch || isMultiGrep) { + auto boolType = builder.SimpleType<bool>(); + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const std::string_view regexp(typeConfig); + size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; + auto tuple = builder.Tuple(); + for (size_t i = 0; i < regexpCount; ++i) { + tuple->Add(boolType); + } + auto tupleType = tuple->Build(); + builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isMultiGrep, THyperscanMatch::EMode::MULTI, regexpCount)); + } + } else if (THyperscanCapture::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanCapture::TFactory(builder.GetSourcePosition())); + } + } else if (THyperscanReplace::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanReplace::TFactory(builder.GetSourcePosition())); + } + } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } + }; + + class TPcreModule : public THyperscanModule { + public: + TStringRef Name() const { + return TStringRef::Of("Pcre"); + } + }; +} + +REGISTER_MODULES(THyperscanModule, TPcreModule) diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/result.json b/yql/essentials/udfs/common/hyperscan/test/canondata/result.json new file mode 100644 index 00000000000..93e6411aec8 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/canondata/result.json @@ -0,0 +1,17 @@ +{ + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ], + "test.test[CharacterClasses]": [ + { + "uri": "file://test.test_CharacterClasses_/results.txt" + } + ], + "test.test[Error]": [ + { + "uri": "file://test.test_Error_/extracted" + } + ] +} diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..7e4dd70678c --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,441 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "match"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "grep"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "insensitive_grep"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "multi_match"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ] + ] + ] + ]; + [ + "some_multi_match"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "multi_match2"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ] + ] + ] + ]; + [ + "some_multi_match2a"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "some_multi_match2b"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "some_multi_match2c"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "capture"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "capture_many"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "replace"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "backtracking"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + ""; + %false; + %false; + %false; + [ + %false; + %true; + %false; + %true; + %false; + %true; + %false + ]; + %false; + [ + %false; + %true; + %false; + %true; + %false + ]; + %false; + %true; + %false; + #; + #; + [ + "" + ]; + %false + ]; + [ + "a"; + %true; + %false; + %false; + [ + %true; + %false; + %true; + %false; + %true; + %false; + %false + ]; + %true; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "a" + ]; + %false + ]; + [ + "aax"; + %true; + %false; + %false; + [ + %true; + %false; + %true; + %false; + %false; + %false; + %false + ]; + %true; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + [ + "aa" + ]; + #; + [ + "aax" + ]; + %true + ]; + [ + "xaax"; + %false; + %false; + %false; + [ + %false; + %false; + %true; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + [ + "xaa" + ]; + [ + "xa" + ]; + [ + "bax" + ]; + %false + ]; + [ + "xaaxaaxaa"; + %false; + %true; + %true; + [ + %false; + %false; + %true; + %false; + %true; + %false; + %true + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + [ + "xaa" + ]; + [ + "xa" + ]; + [ + "bababa" + ]; + %false + ]; + [ + "XAXA"; + %false; + %false; + %true; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "XAXA" + ]; + %false + ]; + [ + "7"; + %false; + %false; + %false; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "7" + ]; + %false + ]; + [ + "QC transfer task JAVA"; + %false; + %false; + %false; + [ + %false; + %false; + %true; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %true; + %false; + %true + ]; + %false; + %false; + %true; + #; + #; + [ + "QC transfer task JAVA" + ]; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt new file mode 100644 index 00000000000..7fe80ff82a7 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_CharacterClasses_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "digits"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "spaces"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "xx000xx"; + %true; + %false + ]; + [ + "lLlLl"; + %false; + %false + ]; + [ + "a1 b2 c3"; + %true; + %true + ]; + [ + "xxx yyy"; + %false; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted new file mode 100644 index 00000000000..4d090620be0 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/canondata/test.test_Error_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:8:1: Fatal: Execution of node: YtMap! + SELECT $match(value) AS match FROM Input; + ^ + <tmp_path>/program.sql:<main>:6:21: Fatal: library/cpp/regex/hyperscan/hyperscan.cpp:102: Failed to compile regex: ^*$. Error message (hyperscan): Invalid repeat at index 1. + $match = Hyperscan::Match("*"); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Basic.in b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.in new file mode 100644 index 00000000000..ddc62722474 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.in @@ -0,0 +1,8 @@ +{"key"="1";"subkey"="1";"value"=""}; +{"key"="2";"subkey"="2";"value"="a"}; +{"key"="3";"subkey"="3";"value"="aax"}; +{"key"="4";"subkey"="4";"value"="xaax"}; +{"key"="5";"subkey"="5";"value"="xaaxaaxaa"}; +{"key"="6";"subkey"="6";"value"="XAXA"}; +{"key"="7";"subkey"="7";"value"="7"}; +{"key"="8";"subkey"="8";"value"="QC transfer task JAVA"}; diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql new file mode 100644 index 00000000000..4df22e6603a --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/cases/Basic.sql @@ -0,0 +1,33 @@ +/* syntax version 1 */ +$match = Hyperscan::Match("a.*"); +$grep = Hyperscan::Grep("axa"); +$insensitive_grep = Hyperscan::Grep("(?i)axa"); +$multi_match = Hyperscan::MultiMatch(@@a.* +.*a.* +.*a +.*axa.*@@); +$multi_match2 = Hyperscan::MultiMatch(@@YQL.* +QC.* +.*transfer task.*@@); + +$capture = Hyperscan::Capture(".*a{2}.*"); +$capture_many = Hyperscan::Capture(".*x(a+).*"); +$replace = Hyperscan::Replace("xa"); +$backtracking_grep = Hyperscan::BacktrackingGrep("(?<!xa)ax"); + +SELECT + value, + $match(value) AS match, + $grep(value) AS grep, + $insensitive_grep(value) AS insensitive_grep, + $multi_match(value) AS multi_match, + $multi_match(value).0 AS some_multi_match, + $multi_match2(value) AS multi_match2, + $multi_match2(value).0 AS some_multi_match2a, + $multi_match2(value).1 AS some_multi_match2b, + $multi_match2(value).2 AS some_multi_match2c, + $capture(value) AS capture, + $capture_many(value) AS capture_many, + $replace(value, "b") AS replace, + $backtracking_grep(value) as backtracking +FROM Input; diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in new file mode 100644 index 00000000000..e2737f40a1e --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.in @@ -0,0 +1,4 @@ +{"key"="1";"subkey"="1";"value"="xx000xx"}; +{"key"="2";"subkey"="2";"value"="lLlLl"}; +{"key"="3";"subkey"="3";"value"="a1 b2 c3"}; +{"key"="4";"subkey"="4";"value"="xxx yyy"}; diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql new file mode 100644 index 00000000000..4f19373b653 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/cases/CharacterClasses.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +$digits = Hyperscan::Grep("\\d+"); +$spaces = Hyperscan::Grep("\\s+"); + +SELECT + value, + $digits(value) AS digits, + $spaces(value) AS spaces +FROM Input; diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg b/yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg new file mode 100644 index 00000000000..7f181f61d6a --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/cases/Error.cfg @@ -0,0 +1,2 @@ +in yt.plato.Input Basic.in +xfail diff --git a/yql/essentials/udfs/common/hyperscan/test/cases/Error.sql b/yql/essentials/udfs/common/hyperscan/test/cases/Error.sql new file mode 100644 index 00000000000..26ece06ca0b --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/cases/Error.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +PRAGMA config.flags("LLVM","OFF"); -- TODO: fix error handling with LLVM +$match = Hyperscan::Match("*"); +SELECT $match(value) AS match FROM Input; diff --git a/yql/essentials/udfs/common/hyperscan/test/ya.make b/yql/essentials/udfs/common/hyperscan/test/ya.make new file mode 100644 index 00000000000..2aa229b0b69 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/test/ya.make @@ -0,0 +1,17 @@ +IF (OS_LINUX AND CLANG) + +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/hyperscan) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() + +ENDIF() diff --git a/yql/essentials/udfs/common/hyperscan/ya.make b/yql/essentials/udfs/common/hyperscan/ya.make new file mode 100644 index 00000000000..49c95d67c26 --- /dev/null +++ b/yql/essentials/udfs/common/hyperscan/ya.make @@ -0,0 +1,42 @@ +IF (YQL_PACKAGED) + PACKAGE() + + FROM_SANDBOX( + FILE 7319899245 OUT_NOAUTO libhyperscan_udf.so + ) + + END() +ELSE() + + # NO_BUILD_IF does not like logical expressions by now + # see DEVTOOLSSUPPORT-44378 + IF (NOT OS_LINUX OR NOT CLANG) + SET(DISABLE_HYPERSCAN_BUILD) + ENDIF() + + NO_BUILD_IF(DISABLE_HYPERSCAN_BUILD) + +YQL_UDF_CONTRIB(hyperscan_udf) + + YQL_ABI_VERSION( + 2 + 27 + 0 + ) + + SRCS( + hyperscan_udf.cpp + ) + + PEERDIR( + library/cpp/regex/hyperscan + library/cpp/regex/pcre + ) + + END() + +ENDIF() + +RECURSE_FOR_TESTS( + test +)
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/ip_base.cpp b/yql/essentials/udfs/common/ip_base/ip_base.cpp new file mode 100644 index 00000000000..1c017e2a5d2 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/ip_base.cpp @@ -0,0 +1,7 @@ +#include <yql/essentials/public/udf/udf_helpers.h> + +#include "lib/ip_base_udf.h" + +SIMPLE_MODULE(TIpModule, EXPORTED_IP_BASE_UDF) +REGISTER_MODULES(TIpModule) + diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp new file mode 100644 index 00000000000..a0617e77283 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp @@ -0,0 +1 @@ +#include "ip_base_udf.h"
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h new file mode 100644 index 00000000000..dfb9cc29c0c --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h @@ -0,0 +1,358 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <library/cpp/ipv6_address/ipv6_address.h> +#include <library/cpp/ipmath/ipmath.h> +#include <util/generic/buffer.h> + +namespace { + using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>; + using TOptionalString = NKikimr::NUdf::TOptional<char*>; + using TOptionalByte = NKikimr::NUdf::TOptional<ui8>; + using TStringRef = NKikimr::NUdf::TStringRef; + using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; + using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; + + ui8 GetAddressRangePrefix(const TIpAddressRange& range) { + if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) { + return 0; + } + if (range.Size() == 0) { + return range.Type() == TIpv6Address::Ipv4 ? 32 : 128; + } + ui128 size = range.Size(); + size_t sizeLog = MostSignificantBit(size); + return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog); + } + + struct TRawIp4 { + ui8 a, b, c, d; + + static TRawIp4 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 24 & 0xff), + ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), + ui8(x & 0xff) + }; + } + + static TRawIp4 MaskFromPrefix(ui8 prefix) { + ui128 x = ui128(-1) << int(32 - prefix); + x &= ui128(ui32(-1)); + return FromIpAddress({x, TIpv6Address::Ipv4}); + } + + TIpv6Address ToIpAddress() const { + return {a, b, c, d}; + } + + std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const { + return {{ + ui8(a & mask.a), + ui8(b & mask.b), + ui8(c & mask.c), + ui8(d & mask.d) + },{ + ui8(a | ~mask.a), + ui8(b | ~mask.b), + ui8(c | ~mask.c), + ui8(d | ~mask.d) + }}; + } + }; + + struct TRawIp4Subnet { + TRawIp4 base, mask; + + static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))}; + } + + TIpAddressRange ToIpRange() const { + auto range = base.ApplyMask(mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; + } + }; + + struct TRawIp6 { + ui8 a1, a0, b1, b0, c1, c0, d1, d0, e1, e0, f1, f0, g1, g0, h1, h0; + + static TRawIp6 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), + ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), + ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), + ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), + ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), + ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), + ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), ui8(x & 0xff) + }; + } + + static TRawIp6 MaskFromPrefix(ui8 prefix) { + ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix); + return FromIpAddress({x, TIpv6Address::Ipv6}); + } + + TIpv6Address ToIpAddress() const { + return {ui16(ui32(a1) << ui32(8) | ui32(a0)), + ui16(ui32(b1) << ui32(8) | ui32(b0)), + ui16(ui32(c1) << ui32(8) | ui32(c0)), + ui16(ui32(d1) << ui32(8) | ui32(d0)), + ui16(ui32(e1) << ui32(8) | ui32(e0)), + ui16(ui32(f1) << ui32(8) | ui32(f0)), + ui16(ui32(g1) << ui32(8) | ui32(g0)), + ui16(ui32(h1) << ui32(8) | ui32(h0)), + }; + } + + std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const { + return { { + ui8(a1 & mask.a1), + ui8(a0 & mask.a0), + ui8(b1 & mask.b1), + ui8(b0 & mask.b0), + ui8(c1 & mask.c1), + ui8(c0 & mask.c0), + ui8(d1 & mask.d1), + ui8(d0 & mask.d0), + ui8(e1 & mask.e1), + ui8(e0 & mask.e0), + ui8(f1 & mask.f1), + ui8(f0 & mask.f0), + ui8(g1 & mask.g1), + ui8(g0 & mask.g0), + ui8(h1 & mask.h1), + ui8(h0 & mask.h0) + }, { + ui8(a1 | ~mask.a1), + ui8(a0 | ~mask.a0), + ui8(b1 | ~mask.b1), + ui8(b0 | ~mask.b0), + ui8(c1 | ~mask.c1), + ui8(c0 | ~mask.c0), + ui8(d1 | ~mask.d1), + ui8(d0 | ~mask.d0), + ui8(e1 | ~mask.e1), + ui8(e0 | ~mask.e0), + ui8(f1 | ~mask.f1), + ui8(f0 | ~mask.f0), + ui8(g1 | ~mask.g1), + ui8(g0 | ~mask.g0), + ui8(h1 | ~mask.h1), + ui8(h0 | ~mask.h0) + }}; + } + }; + + struct TRawIp6Subnet { + TRawIp6 base, mask; + + static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))}; + } + + TIpAddressRange ToIpRange() const { + auto range = base.ApplyMask(mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; + } + }; + + TIpv6Address DeserializeAddress(const TStringRef& str) { + TIpv6Address addr; + if (str.Size() == 4) { + TRawIp4 addr4; + memcpy(&addr4, str.Data(), sizeof addr4); + addr = addr4.ToIpAddress(); + } else if (str.Size() == 16) { + TRawIp6 addr6; + memcpy(&addr6, str.Data(), sizeof addr6); + addr = addr6.ToIpAddress(); + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << str.Size(); + } + return addr; + } + + TIpAddressRange DeserializeSubnet(const TStringRef& str) { + TIpAddressRange range; + if (str.Size() == sizeof(TRawIp4Subnet)) { + TRawIp4Subnet subnet4; + memcpy(&subnet4, str.Data(), sizeof subnet4); + range = subnet4.ToIpRange(); + } else if (str.Size() == sizeof(TRawIp6Subnet)) { + TRawIp6Subnet subnet6; + memcpy(&subnet6, str.Data(), sizeof subnet6); + range = subnet6.ToIpRange(); + } else { + ythrow yexception() << "Invalid binary representation"; + } + return range; + } + + TString SerializeAddress(const TIpv6Address& addr) { + Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6); + TString res; + if (addr.Type() == TIpv6Address::Ipv4) { + auto addr4 = TRawIp4::FromIpAddress(addr); + res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4); + } else if (addr.Type() == TIpv6Address::Ipv6) { + auto addr6 = TRawIp6::FromIpAddress(addr); + res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6); + } + return res; + } + + TString SerializeSubnet(const TIpAddressRange& range) { + TString res; + if (range.Type() == TIpv6Address::Ipv4) { + auto subnet4 = TRawIp4Subnet::FromIpRange(range); + res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4); + } else if (range.Type() == TIpv6Address::Ipv6) { + auto subnet6 = TRawIp6Subnet::FromIpRange(range); + res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6); + } + return res; + } + + SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) { + TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef()); + if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) { + return TUnboxedValue(); + } + return valueBuilder->NewString(SerializeAddress(addr)); + } + + SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) { + TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef()); + auto res = SerializeSubnet(range); + return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod()); + } + + SIMPLE_UDF(TToString, char*(TAutoMapString)) { + return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false)); + } + + SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) { + TStringBuilder result; + auto range = DeserializeSubnet(args[0].AsStringRef()); + result << (*range.Begin()).ToString(false); + result << '/'; + result << ToString(GetAddressRangePrefix(range)); + return valueBuilder->NewString(result); + } + + SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) { + Y_UNUSED(valueBuilder); + auto range1 = DeserializeSubnet(args[0].AsStringRef()); + if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) { + auto addr2 = DeserializeAddress(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(addr2)); + } else { // second argument is a whole subnet, not a single address + auto range2 = DeserializeSubnet(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(range2)); + } + } + + SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + result = ref.Size() == 4; + } + return TUnboxedValuePod(result); + } + + SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + result = ref.Size() == 16; + } + return TUnboxedValuePod(result); + } + + SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + if (ref.Size() == 16) { + result = DeserializeAddress(ref).Isv4MappedTov6(); + } + } + return TUnboxedValuePod(result); + } + + SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) { + const auto& ref = args[0].AsStringRef(); + if (ref.Size() == 16) { + return valueBuilder->NewString(ref); + } else if (ref.Size() == 4) { + TIpv6Address addr4 = DeserializeAddress(ref); + auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6); + return valueBuilder->NewString(SerializeAddress(addr6)); + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << ref.Size(); + } + } + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) { + const auto ref = args[0].AsStringRef(); + ui8 subnetSize = args[1].GetOrDefault<ui8>(0); + TIpv6Address addr = DeserializeAddress(ref); + if (ref.Size() == 4) { + if (!subnetSize) { + subnetSize = 24; + } + if (subnetSize > 32) { + subnetSize = 32; + } + } else if (ref.Size() == 16) { + if (!subnetSize) { + subnetSize = 64; + } + if (subnetSize > 128) { + subnetSize = 128; + } + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << ref.Size(); + } + TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize); + return valueBuilder->NewString(SerializeAddress(beg)); + } + + SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) { + const auto refBase = args[0].AsStringRef(); + const auto refMask = args[1].AsStringRef(); + TIpv6Address addrBase = DeserializeAddress(refBase); + TIpv6Address addrMask = DeserializeAddress(refMask); + if (addrBase.Type() != addrMask.Type()) { + ythrow yexception() << "Base and mask differ in length"; + } + return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type()))); + } + +#define EXPORTED_IP_BASE_UDF \ + TFromString, \ + TSubnetFromString, \ + TToString, \ + TSubnetToString, \ + TIsIPv4, \ + TIsIPv6, \ + TIsEmbeddedIPv4, \ + TConvertToIPv6, \ + TGetSubnet, \ + TSubnetMatch, \ + TGetSubnetByMask +} diff --git a/yql/essentials/udfs/common/ip_base/lib/ya.make b/yql/essentials/udfs/common/ip_base/lib/ya.make new file mode 100644 index 00000000000..72633514771 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/lib/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 28 + 0 +) + +SRCS( + ip_base_udf.cpp +) + +PEERDIR( + yql/essentials/public/udf + library/cpp/ipmath + library/cpp/ipv6_address +) + +END() diff --git a/yql/essentials/udfs/common/ip_base/test/canondata/result.json b/yql/essentials/udfs/common/ip_base/test/canondata/result.json new file mode 100644 index 00000000000..a9602f6bf0c --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/canondata/result.json @@ -0,0 +1,12 @@ +{ + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ], + "test.test[Subnets]": [ + { + "uri": "file://test.test_Subnets_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..c62c9cbd35f --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,374 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "internal_representation"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "round_trip"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "is_ipv4"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_ipv6"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_embedded_ipv4"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "all_ipv6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "default_subnet"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "small_subnet"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "large_subnet"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "single_subnet4"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "single_subnet6"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\x7F\0\0\1" + ]; + [ + "127.0.0.1" + ]; + %true; + %false; + %false; + [ + "::ffff:127.0.0.1" + ]; + [ + "127.0.0.0" + ]; + [ + "127.0.0.1" + ]; + [ + "127.0.0.0" + ]; + [ + "127.0.0.1" + ]; + [ + "127.0.0.1" + ] + ]; + [ + [ + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1" + ]; + [ + "::1" + ]; + %false; + %true; + %false; + [ + "::1" + ]; + [ + "::" + ]; + [ + "::" + ]; + [ + "::" + ]; + [ + "::" + ]; + [ + "::1" + ] + ]; + [ + [ + [ + "1bTBAw==" + ] + ]; + [ + "213.180.193.3" + ]; + %true; + %false; + %false; + [ + "::ffff:213.180.193.3" + ]; + [ + "213.180.193.0" + ]; + [ + "213.180.193.3" + ]; + [ + "213.180.0.0" + ]; + [ + "213.180.193.3" + ]; + [ + "213.180.193.3" + ] + ]; + [ + [ + [ + "KgIGuAAAAAAAAAAAAAAAAw==" + ] + ]; + [ + "2a02:6b8::3" + ]; + %false; + %true; + %false; + [ + "2a02:6b8::3" + ]; + [ + "2a02:6b8::" + ]; + [ + "2a02:6b8::" + ]; + [ + "2a02::" + ]; + [ + "2a02:6b8::" + ]; + [ + "2a02:6b8::3" + ] + ]; + [ + [ + [ + "JADLACBIAAEAAAAAaBwbZQ==" + ] + ]; + [ + "2400:cb00:2048:1::681c:1b65" + ]; + %false; + %true; + %false; + [ + "2400:cb00:2048:1::681c:1b65" + ]; + [ + "2400:cb00:2048:1::" + ]; + [ + "2400:cb00:2048:1::681c:1b60" + ]; + [ + "2400::" + ]; + [ + "2400:cb00::" + ]; + [ + "2400:cb00:2048:1::681c:1b65" + ] + ]; + [ + [ + [ + "/oAAAAAAAAACFbL//qlnzg==" + ] + ]; + [ + "fe80::215:b2ff:fea9:67ce" + ]; + %false; + %true; + %false; + [ + "fe80::215:b2ff:fea9:67ce" + ]; + [ + "fe80::" + ]; + [ + "fe80::215:b2ff:fea9:67c8" + ]; + [ + "fe80::" + ]; + [ + "fe80::" + ]; + [ + "fe80::215:b2ff:fea9:67ce" + ] + ]; + [ + [ + [ + "AAAAAAAAAAAAAP//TUubAw==" + ] + ]; + [ + "::ffff:77.75.155.3" + ]; + %false; + %true; + %true; + [ + "::ffff:77.75.155.3" + ]; + [ + "::" + ]; + [ + "::ffff:77.75.155.0" + ]; + [ + "::" + ]; + [ + "::" + ]; + [ + "::ffff:77.75.155.3" + ] + ]; + [ + #; + #; + %false; + %false; + %false; + #; + #; + #; + #; + #; + # + ]; + [ + [ + "\0\0\0\0" + ]; + [ + "0.0.0.0" + ]; + %true; + %false; + %false; + [ + "::ffff:0.0.0.0" + ]; + [ + "0.0.0.0" + ]; + [ + "0.0.0.0" + ]; + [ + "0.0.0.0" + ]; + [ + "0.0.0.0" + ]; + [ + "0.0.0.0" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt new file mode 100644 index 00000000000..c6f8ac61364 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/canondata/test.test_Subnets_/results.txt @@ -0,0 +1,184 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "internal1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "string1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "subnet1_subnet2_match"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "subnet1_ip1_match"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "subnet2_ip1_match"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "ip1_ip2_mask_subnet"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "wKgAAP///wA=" + ] + ]; + [ + "192.168.0.0/24" + ]; + [ + %true + ]; + [ + %true + ]; + [ + %false + ]; + [ + "192.0.0.0" + ] + ]; + [ + [ + [ + "CgAAAP//AAA=" + ] + ]; + [ + "10.0.0.0/16" + ]; + [ + %false + ]; + [ + %true + ]; + [ + %false + ]; + [ + "10.0.0.0" + ] + ]; + [ + [ + "\0\0\0\0\0\0\0\0" + ]; + [ + "0.0.0.0/0" + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + "0.0.0.0" + ] + ]; + [ + [ + [ + "KgIGuAweShgAAAaWAAAAAP///////////////wAAAAA=" + ] + ]; + [ + "2a02:6b8:c1e:4a18:0:696::/96" + ]; + [ + %false + ]; + [ + %true + ]; + [ + %true + ]; + [ + "::696:0:0" + ] + ]; + [ + [ + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + ]; + [ + "::/0" + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + "::ffff:192.168.0.2" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Basic.in b/yql/essentials/udfs/common/ip_base/test/cases/Basic.in new file mode 100644 index 00000000000..4aa20599141 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/cases/Basic.in @@ -0,0 +1,9 @@ +{"key"="127.0.0.1";"subkey"="";"value"=""}; +{"key"="::1";"subkey"="";"value"=""}; +{"key"="213.180.193.3";"subkey"="";"value"=""}; +{"key"="2a02:6b8::3";"subkey"="";"value"=""}; +{"key"="2400:cb00:2048:1::681c:1b65";"subkey"="";"value"=""}; +{"key"="fe80::215:b2ff:fea9:67ce";"subkey"="";"value"=""}; +{"key"="::ffff:77.75.155.3";"subkey"="";"value"=""}; +{"key"="sdfsdfsdf";"subkey"="";"value"=""}; +{"key"="0.0.0.0";"subkey"="";value=""};
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Basic.sql b/yql/essentials/udfs/common/ip_base/test/cases/Basic.sql new file mode 100644 index 00000000000..1b875bc7313 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/cases/Basic.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ +SELECT + internal_representation AS internal_representation, + Ip::ToString(internal_representation) AS round_trip, + Ip::IsIPv4(internal_representation) AS is_ipv4, + Ip::IsIPv6(internal_representation) AS is_ipv6, + Ip::IsEmbeddedIPv4(internal_representation) AS is_embedded_ipv4, + Ip::ToString(Ip::ConvertToIPv6(internal_representation)) AS all_ipv6, + Ip::ToString(Ip::GetSubnet(internal_representation)) AS default_subnet, + Ip::ToString(Ip::GetSubnet(internal_representation, 125)) AS small_subnet, + Ip::ToString(Ip::GetSubnet(internal_representation, 16)) AS large_subnet, + Ip::ToString(Ip::GetSubnet(internal_representation, 32)) AS single_subnet4, + Ip::ToString(Ip::GetSubnet(internal_representation, 128)) AS single_subnet6 +FROM ( + SELECT Ip::FromString(key) AS internal_representation FROM Input +); diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in new file mode 100644 index 00000000000..b2e2a1d02c1 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in @@ -0,0 +1,5 @@ +{"subnet1"="192.168.0.1/24";"subnet2"="192.168.0.1/28";"ip1"="192.168.0.32";"ip2"="255.0.0.0"}; +{"subnet1"="10.0.0.1/16";"subnet2"="127.0.0.1/16";"ip1"="10.0.10.128";"ip2"="255.0.240.0"}; +{"subnet1"="0.0.0.0/0";"subnet2"="1.1.1.1/32";"ip1"="1.1.1.1";"ip2"="0.0.0.0"}; +{"subnet1"="2a02:6b8:c1e:4a18:0:696:ec65:0/96";"subnet2"="2a02:6b8:c1e:4a18::/12";"ip1"="2a02:6b8:c1e:4a18:0:696:ec65:0";"ip2"="::ffff:ffff:0:0"}; +{"subnet1"="::/0";"subnet2"="::ffff:192.168.0.1/96";"ip1"="::ffff:192.168.0.2";"ip2"="ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"};
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr new file mode 100644 index 00000000000..01b3c2afea5 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.in.attr @@ -0,0 +1 @@ +{schema=[{name=subnet1;type=string};{name=subnet2;type=string};{name=ip1;type=string};{name=ip2;type=string}]}
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql new file mode 100644 index 00000000000..43a7b143872 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/cases/Subnets.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ +SELECT + subnet1 AS internal1, + Ip::SubnetToString(subnet1) AS string1, + Ip::SubnetMatch(subnet1, subnet2) AS subnet1_subnet2_match, + Ip::SubnetMatch(subnet1, ip1) AS subnet1_ip1_match, + Ip::SubnetMatch(subnet2, ip1) AS subnet2_ip1_match, + Ip::ToString(Ip::GetSubnetByMask(ip1, ip2)) AS ip1_ip2_mask_subnet +FROM ( + SELECT + Ip::SubnetFromString(subnet1) AS subnet1, + Ip::SubnetFromString(subnet2) AS subnet2, + Ip::FromString(ip1) AS ip1, + Ip::FromString(ip2) AS ip2 + FROM Input +);
\ No newline at end of file diff --git a/yql/essentials/udfs/common/ip_base/test/ya.make b/yql/essentials/udfs/common/ip_base/test/ya.make new file mode 100644 index 00000000000..883a487b013 --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +DEPENDS(yql/essentials/udfs/common/ip_base) + +END() diff --git a/yql/essentials/udfs/common/ip_base/ya.make b/yql/essentials/udfs/common/ip_base/ya.make new file mode 100644 index 00000000000..b43780285cd --- /dev/null +++ b/yql/essentials/udfs/common/ip_base/ya.make @@ -0,0 +1,34 @@ +IF (YQL_PACKAGED) + PACKAGE() + + FROM_SANDBOX( + FILE 7319899828 OUT_NOAUTO libip_udf.so + ) + + END() + +ELSE() + +YQL_UDF_CONTRIB(ip_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + ip_base.cpp + ) + + PEERDIR( + yql/essentials/udfs/common/ip_base/lib + ) + + END() + +ENDIF() + +RECURSE_FOR_TESTS( + test +)
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json/json_udf.cpp b/yql/essentials/udfs/common/json/json_udf.cpp new file mode 100644 index 00000000000..3a7916bed74 --- /dev/null +++ b/yql/essentials/udfs/common/json/json_udf.cpp @@ -0,0 +1,120 @@ +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <library/cpp/json/easy_parse/json_easy_parser.h> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + class TGetField: public TBoxedValue { + public: + typedef bool TTypeAwareMarker; + + public: + static TStringRef Name() { + return TStringRef::Of("GetField"); + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + if (!args[0]) { + return valueBuilder->NewEmptyList(); + } + + const TString json(args[0].AsStringRef()); + const TString field(args[1].AsStringRef()); + + if (field.empty()) { + return valueBuilder->NewEmptyList(); + } + + NJson::TJsonParser parser; + parser.AddField(field, false); + + TVector<TString> result; + parser.Parse(json, &result); + + TUnboxedValue* items = nullptr; + const auto list = valueBuilder->NewArray(result.size(), items); + for (const TString& item : result) { + *items++ = valueBuilder->NewString(item); + } + + return list; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + bool useString = true; + bool isOptional = true; + if (userType) { + // support of an overload with Json/Json? input type + auto typeHelper = builder.TypeInfoHelper(); + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Missing or invalid user type."); + return true; + } + + auto argsTypeTuple = userTypeInspector.GetElementType(0); + auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } + + if (argsTypeInspector.GetElementsCount() != 2) { + builder.SetError("Invalid user type - expected two arguments."); + return true; + } + + auto inputType = argsTypeInspector.GetElementType(0); + auto optInspector = TOptionalTypeInspector(*typeHelper, inputType); + auto dataType = inputType; + if (optInspector) { + dataType = optInspector.GetItemType(); + } else { + isOptional = false; + } + + auto dataInspector = TDataTypeInspector(*typeHelper, dataType); + if (dataInspector && dataInspector.GetTypeId() == TDataType<TJson>::Id) { + useString = false; + builder.UserType(userType); + } + } + + auto retType = builder.List()->Item<char*>().Build(); + if (useString) { + builder.Args()->Add(builder.Optional()->Item<char*>().Build()).Add<char*>().Done().Returns(retType); + } else { + auto type = builder.SimpleType<TJson>(); + if (isOptional) { + builder.Args()->Add(builder.Optional()->Item(type).Build()).Add<char*>().Done().Returns(retType); + } else { + builder.Args()->Add(type).Add<char*>().Done().Returns(retType); + } + } + + if (!typesOnly) { + builder.Implementation(new TGetField); + } + + builder.IsStrict(); + return true; + } else { + return false; + } + } + }; +} + +SIMPLE_MODULE(TJsonModule, + TGetField) + +REGISTER_MODULES(TJsonModule) diff --git a/yql/essentials/udfs/common/json/test/canondata/result.json b/yql/essentials/udfs/common/json/test/canondata/result.json new file mode 100644 index 00000000000..fb6112fc5bc --- /dev/null +++ b/yql/essentials/udfs/common/json/test/canondata/result.json @@ -0,0 +1,7 @@ +{ + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..8cd3200dab4 --- /dev/null +++ b/yql/essentials/udfs/common/json/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,57 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "11" + ]; + [ + "" + ]; + [] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json/test/cases/Basic.sql b/yql/essentials/udfs/common/json/test/cases/Basic.sql new file mode 100644 index 00000000000..512246d7668 --- /dev/null +++ b/yql/essentials/udfs/common/json/test/cases/Basic.sql @@ -0,0 +1,12 @@ +/* syntax version 0 */ +$json1 = @@{ + "x": { + "y": ["15", "11", "17"], + "z": 1 + } +}@@; + +SELECT + Json::GetField($json1, "/x/y/[1]"), + Json::GetField("[]", "/"), + Json::GetField($json1, "///"); diff --git a/yql/essentials/udfs/common/json/test/ya.make b/yql/essentials/udfs/common/json/test/ya.make new file mode 100644 index 00000000000..d0260816188 --- /dev/null +++ b/yql/essentials/udfs/common/json/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/json) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/json/ya.make b/yql/essentials/udfs/common/json/ya.make new file mode 100644 index 00000000000..ac0dbd375d6 --- /dev/null +++ b/yql/essentials/udfs/common/json/ya.make @@ -0,0 +1,29 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319900360 OUT_NOAUTO libjson_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(json_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + json_udf.cpp + ) + + PEERDIR( + library/cpp/json/easy_parse + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/json2/as_json_node.h b/yql/essentials/udfs/common/json2/as_json_node.h new file mode 100644 index 00000000000..c7463fffa66 --- /dev/null +++ b/yql/essentials/udfs/common/json2/as_json_node.h @@ -0,0 +1,115 @@ +#pragma once + +#include "resource.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/minikql/dom/node.h> +#include <yql/essentials/minikql/dom/json.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + + template <typename TSource> + class TAsJsonNode: public TBoxedValue { + public: + TAsJsonNode(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); + auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); + builder.Args() + ->Add(optionalSourceType) + .Done() + .Returns(resourceType); + + if (!typesOnly) { + builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); + } + + builder.IsStrict(); + return true; + } + + private: + const size_t MaxParseErrors = 10; + + static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return MakeEntity(); + } + return Interpret(args[0], valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + TStringRef TAsJsonNode<TUtf8>::Name() { + return TStringRef::Of("Utf8AsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return MakeString(sourceValue.AsStringRef(), valueBuilder); + } + + template <> + TStringRef TAsJsonNode<double>::Name() { + return TStringRef::Of("DoubleAsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeDouble(sourceValue.Get<double>()); + } + + template <> + TStringRef TAsJsonNode<bool>::Name() { + return TStringRef::Of("BoolAsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeBool(sourceValue.Get<bool>()); + } + + template <> + TStringRef TAsJsonNode<TJson>::Name() { + return TStringRef::Of("JsonAsJsonNode"); + } + + template <> + TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); + } +} + diff --git a/yql/essentials/udfs/common/json2/compile_path.h b/yql/essentials/udfs/common/json2/compile_path.h new file mode 100644 index 00000000000..8239cfc1eee --- /dev/null +++ b/yql/essentials/udfs/common/json2/compile_path.h @@ -0,0 +1,70 @@ +#pragma once + +#include "resource.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + + class TCompilePath: public TBoxedValue { + public: + TCompilePath(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("CompilePath"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); + builder.Args() + ->Add<NUdf::TUtf8>() + .Done() + .Returns(resourceType); + + if (!typesOnly) { + builder.Implementation(new TCompilePath(builder.GetSourcePosition())); + } + return true; + } + + private: + const size_t MaxParseErrors = 10; + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + TIssues issues; + const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors); + if (!issues.Empty()) { + ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); + } + + return TUnboxedValuePod(new TJsonPathResource(jsonPath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; +} + diff --git a/yql/essentials/udfs/common/json2/json2_udf.cpp b/yql/essentials/udfs/common/json2/json2_udf.cpp new file mode 100644 index 00000000000..96ef6ccf00b --- /dev/null +++ b/yql/essentials/udfs/common/json2/json2_udf.cpp @@ -0,0 +1,43 @@ +#include "as_json_node.h" +#include "compile_path.h" +#include "parse.h" +#include "serialize.h" +#include "sql_exists.h" +#include "sql_query.h" +#include "sql_value.h" + +#include <yql/essentials/public/udf/udf_helpers.h> + +namespace NJson2Udf { + SIMPLE_MODULE(TJson2Module, + TParse, + TSerialize<EDataSlot::Json>, + TSerialize<EDataSlot::JsonDocument>, + TCompilePath, + TSqlValue<EDataSlot::Json, TUtf8>, + TSqlValue<EDataSlot::Json, TUtf8, true>, + TSqlValue<EDataSlot::Json, i64>, + TSqlValue<EDataSlot::Json, double>, + TSqlValue<EDataSlot::Json, bool>, + TSqlValue<EDataSlot::JsonDocument, TUtf8>, + TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, + TSqlValue<EDataSlot::JsonDocument, i64>, + TSqlValue<EDataSlot::JsonDocument, double>, + TSqlValue<EDataSlot::JsonDocument, bool>, + TSqlExists<EDataSlot::Json, false>, + TSqlExists<EDataSlot::Json, true>, + TSqlExists<EDataSlot::JsonDocument, false>, + TSqlExists<EDataSlot::JsonDocument, true>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, + TAsJsonNode<TUtf8>, + TAsJsonNode<double>, + TAsJsonNode<bool>, + TAsJsonNode<TJson>) +} + +REGISTER_MODULES(NJson2Udf::TJson2Module) diff --git a/yql/essentials/udfs/common/json2/parse.h b/yql/essentials/udfs/common/json2/parse.h new file mode 100644 index 00000000000..0020c164c2b --- /dev/null +++ b/yql/essentials/udfs/common/json2/parse.h @@ -0,0 +1,66 @@ +#pragma once + +#include "resource.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/minikql/dom/json.h> + +#include <library/cpp/json/json_reader.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + + class TParse: public TBoxedValue { + public: + TParse(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Parse"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + builder.Args() + ->Add<TAutoMap<TJson>>() + .Done() + .Returns<TJsonNodeResource>(); + + if (!typesOnly) { + builder.Implementation(new TParse(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + const auto json = args[0].AsStringRef(); + return TryParseJsonDom(json, valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; +} + diff --git a/yql/essentials/udfs/common/json2/resource.h b/yql/essentials/udfs/common/json2/resource.h new file mode 100644 index 00000000000..aa65b14818d --- /dev/null +++ b/yql/essentials/udfs/common/json2/resource.h @@ -0,0 +1,17 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/minikql/jsonpath/jsonpath.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + + extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; + using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; + + extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; + using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; +} + diff --git a/yql/essentials/udfs/common/json2/serialize.h b/yql/essentials/udfs/common/json2/serialize.h new file mode 100644 index 00000000000..a7077cb6e6d --- /dev/null +++ b/yql/essentials/udfs/common/json2/serialize.h @@ -0,0 +1,89 @@ +#pragma once + +#include "resource.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/minikql/dom/json.h> + +#include <yql/essentials/types/binary_json/write.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + using namespace NBinaryJson; + + template <EDataSlot ResultType> + class TSerialize : public TBoxedValue { + public: + TSerialize(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + TType* resultType = nullptr; + if constexpr (ResultType == EDataSlot::Json) { + resultType = builder.SimpleType<TJson>(); + } else { + resultType = builder.SimpleType<TJsonDocument>(); + } + + builder.Args() + ->Add<TAutoMap<TJsonNodeResource>>() + .Done() + .Returns(resultType); + + if (!typesOnly) { + builder.Implementation(new TSerialize(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + const TUnboxedValue& jsonDom = args[0]; + + if constexpr (ResultType == EDataSlot::Json) { + return valueBuilder->NewString(SerializeJsonDom(jsonDom)); + } else { + const auto binaryJson = SerializeToBinaryJson(jsonDom); + return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + const TStringRef& TSerialize<EDataSlot::Json>::Name() { + static auto name = TStringRef::Of("Serialize"); + return name; + } + + template <> + const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { + static auto name = TStringRef::Of("SerializeToJsonDocument"); + return name; + } +} + diff --git a/yql/essentials/udfs/common/json2/sql_exists.h b/yql/essentials/udfs/common/json2/sql_exists.h new file mode 100644 index 00000000000..8a049b49d42 --- /dev/null +++ b/yql/essentials/udfs/common/json2/sql_exists.h @@ -0,0 +1,135 @@ +#pragma once + +#include "resource.h" +#include "compile_path.h" + +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <util/generic/yexception.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NJsonPath; + + template <EDataSlot InputType, bool ThrowException> + class TSqlExists: public TBoxedValue { + public: + explicit TSqlExists(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + auto optionalBoolType = builder.Optional()->Item<bool>().Build(); + + if constexpr (ThrowException) { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(optionalBoolType); + } else { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add(optionalBoolType) + .Done() + .Returns(optionalBoolType); + } + + if (!typesOnly) { + builder.Implementation(new TSqlExists(builder.GetSourcePosition())); + } + if constexpr (!ThrowException) { + builder.IsStrict(); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); + } + + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } + + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); + + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + if (result.IsError()) { + if constexpr (ThrowException) { + ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; + } else { + return args[3]; + } + } + + return TUnboxedValuePod(!result.GetNodes().empty()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + TStringRef TSqlExists<EDataSlot::Json, false>::Name() { + return "SqlExists"; + } + + template <> + TStringRef TSqlExists<EDataSlot::Json, true>::Name() { + return "SqlTryExists"; + } + + template <> + TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { + return "JsonDocumentSqlExists"; + } + + template <> + TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { + return "JsonDocumentSqlTryExists"; + } +} + diff --git a/yql/essentials/udfs/common/json2/sql_query.h b/yql/essentials/udfs/common/json2/sql_query.h new file mode 100644 index 00000000000..cb3bafd3b0b --- /dev/null +++ b/yql/essentials/udfs/common/json2/sql_query.h @@ -0,0 +1,184 @@ +#pragma once + +#include "resource.h" +#include "compile_path.h" + +#include <yql/essentials/core/sql_types/yql_atom_enums.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/minikql/dom/node.h> + +#include <util/generic/yexception.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + using namespace NJsonPath; + + template <EDataSlot InputType, EJsonQueryWrap Mode> + class TSqlQuery: public TBoxedValue { + public: + explicit TSqlQuery(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + + /* + Arguments: + 0. Resource<JsonNode>? or JsonDocument?. Input json + 1. Resource<JsonPath>. Jsonpath to execute on json + 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath + 3. Bool. True - throw on empty result, false otherwise + 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true + 5. Bool. True - throw on error, false - otherwise + 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true + */ + // we can't mark TSqlQuery as strict due to runtime throw policy setting + // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add<bool>() + .Add(optionalJsonType) + .Add<bool>() + .Add(optionalJsonType) + .Done() + .Returns(optionalJsonType); + + if (!typesOnly) { + builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); + } + + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } + + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + + const bool throwOnEmpty = args[3].Get<bool>(); + const auto emptyDefault = args[4]; + const bool throwOnError = args[5].Get<bool>(); + const auto errorDefault = args[6]; + const auto variables = DictToVariables(args[2]); + + auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + + const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { + if (throws) { + ythrow yexception() << message; + } + return caseDefault; + }; + + if (result.IsError()) { + return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); + } + + auto& nodes = result.GetNodes(); + const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); + if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { + TVector<TUnboxedValue> converted; + converted.reserve(nodes.size()); + for (auto& node : nodes) { + converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); + } + return MakeList(converted.data(), converted.size(), valueBuilder); + } + + if (nodes.empty()) { + return handleCase("Empty result", throwOnEmpty, emptyDefault); + } + + // No wrapping is applicable and result is not empty. Result must be a single object or array + if (nodes.size() > 1) { + return handleCase("Result consists of multiple items", throwOnError, errorDefault); + } + + if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { + return handleCase("Result is neither object nor array", throwOnError, errorDefault); + } + + return nodes[0].ConvertToUnboxedValue(valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <> + TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { + return "SqlQuery"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { + return "SqlQueryWrap"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { + return "SqlQueryConditionalWrap"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { + return "JsonDocumentSqlQuery"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { + return "JsonDocumentSqlQueryWrap"; + } + + template <> + TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { + return "JsonDocumentSqlQueryConditionalWrap"; + } +} + diff --git a/yql/essentials/udfs/common/json2/sql_value.h b/yql/essentials/udfs/common/json2/sql_value.h new file mode 100644 index 00000000000..8d3318a8c54 --- /dev/null +++ b/yql/essentials/udfs/common/json2/sql_value.h @@ -0,0 +1,296 @@ +#pragma once + +#include "resource.h" +#include "compile_path.h" + +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/minikql/dom/node.h> + +#include <yql/essentials/types/binary_json/read.h> + +#include <util/generic/yexception.h> +#include <util/generic/ylimits.h> +#include <util/string/cast.h> + +namespace NJson2Udf { + using namespace NKikimr; + using namespace NUdf; + using namespace NYql; + using namespace NDom; + using namespace NJsonPath; + + namespace { + template <class TValueType, bool ForceConvert = false> + TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + Y_UNUSED(source); + Y_ABORT("Unsupported type"); + } + + template <> + TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (IsNodeType(source, ENodeType::String)) { + return source; + } + return {}; + } + + template <> + TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + switch (GetNodeType(source)) { + case ENodeType::String: + return source; + case ENodeType::Uint64: + return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); + case ENodeType::Int64: + return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); + case ENodeType::Bool: + return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); + case ENodeType::Double: + return valueBuilder->NewString(ToString(source.Get<double>())).Release(); + case ENodeType::Entity: + return TUnboxedValuePod::Embedded("null"); + case ENodeType::List: + case ENodeType::Dict: + case ENodeType::Attr: + return {}; + } + } + + template <> + TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } + + if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(source.Get<i64>()); + } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); + } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); + } + + return {}; + } + + template <> + TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } + + if (IsNodeType(source, ENodeType::Double)) { + return TUnboxedValuePod(source.Get<double>()); + } else if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); + } else if (IsNodeType(source, ENodeType::Uint64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); + } + + return {}; + } + + template <> + TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { + return {}; + } + return {TUnboxedValuePod(source.Get<bool>())}; + } + } + + template <EDataSlot InputType, class TValueType, bool ForceConvert = false> + class TSqlValue: public TBoxedValue { + public: + enum class TErrorCode : ui8 { + Empty = 0, + Error = 1 + }; + + TSqlValue(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } + + auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); + auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); + auto returnTypeTuple = builder.Tuple(2) + ->Add(errorTupleType) + .Add(optionalValueType) + .Build(); + auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); + + TType* jsonType = nullptr; + if constexpr (InputType == EDataSlot::Json) { + jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + } else { + jsonType = builder.SimpleType<TJsonDocument>(); + } + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); + + builder.Args() + ->Add(optionalJsonType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(returnType); + + builder.IsStrict(); + + if (!typesOnly) { + builder.Implementation(new TSqlValue(builder.GetSourcePosition())); + } + return true; + } + + private: + TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { + TUnboxedValue* items = nullptr; + auto errorTuple = valueBuilder->NewArray(2, items); + items[0] = TUnboxedValuePod(static_cast<ui8>(code)); + items[1] = valueBuilder->NewString(message); + return valueBuilder->NewVariant(0, std::move(errorTuple)); + } + + TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { + return valueBuilder->NewVariant(1, std::move(value)); + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + if (!args[0].HasValue()) { + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); + } + + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + } else { + jsonDom = TValue(args[0]); + } + + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); + + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + + if (result.IsError()) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); + } + + const auto& nodes = result.GetNodes(); + if (nodes.empty()) { + return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); + } + + if (nodes.size() > 1) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); + } + + const auto& value = nodes[0]; + if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { + // SqlValue can return only scalar values + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); + } + + if (value.Is(EValueType::Null)) { + // JSON nulls must be converted to SQL nulls + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); + } + + const auto source = value.ConvertToUnboxedValue(valueBuilder); + TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); + if (!convertedValue) { + // error while converting JSON value type to TValueType + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); + } + + return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + template <EDataSlot InputType, class TValueType, bool ForceConvert> + TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { + Y_ABORT("Unknown name"); + } + + template<> + TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { + return TStringRef::Of("SqlValueConvertToUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { + return TStringRef::Of("SqlValueUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { + return TStringRef::Of("SqlValueInt64"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, double>::Name() { + return TStringRef::Of("SqlValueNumber"); + } + + template <> + TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { + return TStringRef::Of("SqlValueBool"); + } + + template<> + TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { + return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { + return TStringRef::Of("JsonDocumentSqlValueUtf8"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { + return TStringRef::Of("JsonDocumentSqlValueInt64"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { + return TStringRef::Of("JsonDocumentSqlValueNumber"); + } + + template <> + TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { + return TStringRef::Of("JsonDocumentSqlValueBool"); + } + +} diff --git a/yql/essentials/udfs/common/json2/test/canondata/result.json b/yql/essentials/udfs/common/json2/test/canondata/result.json new file mode 100644 index 00000000000..086f5e77ead --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/result.json @@ -0,0 +1,42 @@ +{ + "test.test[AsJsonNode]": [ + { + "uri": "file://test.test_AsJsonNode_/results.txt" + } + ], + "test.test[SerializeParse]": [ + { + "uri": "file://test.test_SerializeParse_/results.txt" + } + ], + "test.test[SqlExists]": [ + { + "uri": "file://test.test_SqlExists_/results.txt" + } + ], + "test.test[SqlQueryError]": [ + { + "uri": "file://test.test_SqlQueryError_/extracted" + } + ], + "test.test[SqlQuery]": [ + { + "uri": "file://test.test_SqlQuery_/results.txt" + } + ], + "test.test[SqlTryExistsError]": [ + { + "uri": "file://test.test_SqlTryExistsError_/extracted" + } + ], + "test.test[SqlTryExists]": [ + { + "uri": "file://test.test_SqlTryExists_/results.txt" + } + ], + "test.test[SqlValue]": [ + { + "uri": "file://test.test_SqlValue_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt new file mode 100644 index 00000000000..fd6bba35bca --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_AsJsonNode_/results.txt @@ -0,0 +1,84 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column7"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "\"string\""; + "null"; + "1.2345"; + "null"; + "true"; + "null"; + "{\"key\":28}"; + "null" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt new file mode 100644 index 00000000000..58a867b34e4 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SerializeParse_/results.txt @@ -0,0 +1,102 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Json" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "[]"; + "{}"; + "[1,3,4,5,6]"; + "{\"x\":1234}" + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "JsonDocument" + ] + ]; + [ + "column1"; + [ + "DataType"; + "JsonDocument" + ] + ]; + [ + "column2"; + [ + "DataType"; + "JsonDocument" + ] + ]; + [ + "column3"; + [ + "DataType"; + "JsonDocument" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "[]"; + "{}"; + "[1,3,4,5,6]"; + "{\"x\":1234}" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt new file mode 100644 index 00000000000..1b74c43a71d --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlExists_/results.txt @@ -0,0 +1,195 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + [ + %false + ]; + [ + %false + ]; + [ + %false + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %false + ]; + [ + %true + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted new file mode 100644 index 00000000000..12f3e1927de --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQueryError_/extracted @@ -0,0 +1,10 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:12:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:14:12: Fatal: yql/essentials/udfs/common/json2/sql_query.h:xxx: Error executing jsonpath: +jsonpath:1:8: Error: Member not found, code: 4702 + + Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, NULL, true, NULL); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt new file mode 100644 index 00000000000..0773abf2be8 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlQuery_/results.txt @@ -0,0 +1,400 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "{\"y\":123}" + ]; + [ + "[123,456]" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + [ + "{}" + ]; + [ + "[]" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + [ + "{}" + ]; + [ + "[]" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "[123]" + ]; + [ + "[123]" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "{\"y\":123}" + ]; + [ + "[{\"y\":123}]" + ]; + [ + "{\"y\":123}" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "[123,456]" + ]; + [ + "[[123,456]]" + ]; + [ + "[123,456]" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "{}" + ]; + [ + "[]" + ]; + [ + "[]" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted new file mode 100644 index 00000000000..7761bc9a04e --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExistsError_/extracted @@ -0,0 +1,10 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:12:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:14:12: Fatal: yql/essentials/udfs/common/json2/sql_exists.h:xxx: Error executing jsonpath: +jsonpath:1:8: Error: Expected object, code: 4701 + + Json2::SqlTryExists($json, $path, AsDict()); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt new file mode 100644 index 00000000000..4a5f62bc86b --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlTryExists_/results.txt @@ -0,0 +1,83 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt new file mode 100644 index 00000000000..b5aeb82c9ab --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/canondata/test.test_SqlValue_/results.txt @@ -0,0 +1,1663 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column3"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column4"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1"; + [ + "some string value" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n" + ] + ]; + [ + "1"; + # + ]; + [ + "1"; + # + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column3"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column4"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1"; + [ + "2856" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n" + ] + ]; + [ + "1"; + # + ]; + [ + "1"; + # + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "column3"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "column4"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1"; + [ + "2.71828" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n" + ] + ]; + [ + "1"; + # + ]; + [ + "1"; + # + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column3"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column4"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1"; + [ + %true + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Member not found, code: 4702\n" + ] + ]; + [ + "0"; + [ + "1"; + "Error executing jsonpath:\njsonpath:1:8: Error: Expected object, code: 4701\n" + ] + ]; + [ + "1"; + # + ]; + [ + "1"; + # + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column3"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column4"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column5"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1"; + [ + "some string value" + ] + ]; + [ + "1"; + [ + "2856" + ] + ]; + [ + "1"; + [ + "2.71828" + ] + ]; + [ + "1"; + [ + "true" + ] + ]; + [ + "1"; + # + ]; + [ + "1"; + # + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "1"; + [ + "2856" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column2"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ]; + [ + "0"; + [ + "1"; + "Cannot convert extracted JSON value to target type" + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + [ + "column1"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "TupleType"; + [ + [ + "DataType"; + "Uint8" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "0"; + [ + "1"; + "Extracted JSON value is either object or array" + ] + ]; + [ + "0"; + [ + "1"; + "Extracted JSON value is either object or array" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql b/yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql new file mode 100644 index 00000000000..2d85d5576ce --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/AsJsonNode.sql @@ -0,0 +1,9 @@ +SELECT + Json2::Utf8AsJsonNode(CAST("string" as Utf8)), + Json2::Utf8AsJsonNode(NULL), + Json2::DoubleAsJsonNode(1.2345), + Json2::DoubleAsJsonNode(NULL), + Json2::BoolAsJsonNode(true), + Json2::BoolAsJsonNode(NULL), + Json2::JsonAsJsonNode(CAST(@@{"key": 28}@@ as Json)), + Json2::JsonAsJsonNode(NULL); diff --git a/yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql b/yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql new file mode 100644 index 00000000000..1d5eb42d0cf --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SerializeParse.sql @@ -0,0 +1,15 @@ +$id = ($json) -> { RETURN Json2::Serialize(Json2::Parse($json)); }; + +SELECT + $id("[]"), + $id("{}"), + $id("[1, 3, 4, 5, 6]"), + $id(@@{"x": 1234}@@); + +$id_jd = ($json) -> { RETURN Json2::SerializeToJsonDocument(Json2::Parse($json)); }; + +SELECT + $id_jd("[]"), + $id_jd("{}"), + $id_jd("[1, 3, 4, 5, 6]"), + $id_jd(@@{"x": 1234}@@); diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlExists.sql b/yql/essentials/udfs/common/json2/test/cases/SqlExists.sql new file mode 100644 index 00000000000..34f475fe5a8 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlExists.sql @@ -0,0 +1,25 @@ +/* syntax version 1 */ + +$path = Json2::CompilePath("strict $.x"); + +-- Key exists +SELECT + Json2::SqlExists(CAST(@@{"x": 123}@@ as Json), $path, AsDict(), false), + Json2::SqlExists(CAST(@@{"x": {"key": "value"}}@@ as Json), $path, AsDict(), false), + Json2::SqlExists(CAST(@@{"x": [1, 2, 3]}@@ as Json), $path, AsDict(), false), + Json2::SqlExists(CAST(@@{"x": null}@@ as Json), $path, AsDict(), false); + +-- Key is missing +SELECT + Json2::SqlExists(NULL, $path, AsDict(), false), + Json2::SqlExists(CAST(@@{"not_x": 123}@@ as Json), $path, AsDict(), false), + Json2::SqlExists(CAST("{}" as Json), $path, AsDict(), false), + Json2::SqlExists(CAST("[]" as Json), $path, AsDict(), false); + +-- Error handling +$json = CAST("[]" as Json); + +SELECT + Json2::SqlExists($json, $path, AsDict(), false), + Json2::SqlExists($json, $path, AsDict(), true), + Json2::SqlExists($json, $path, AsDict(), NULL);
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql b/yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql new file mode 100644 index 00000000000..38750aec512 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlQuery.sql @@ -0,0 +1,52 @@ +/* syntax version 1 */ + +$path = Json2::CompilePath("strict $.x"); +$array = CAST("[]" as Json); +$object = CAST("{}" as Json); + +-- Valid cases +$nested_object = CAST(@@{"x": {"y": 123}}@@ as Json); +$nested_array = CAST(@@{"x": [123, 456]}@@ as Json); +SELECT + Json2::SqlQuery($nested_object, $path, AsDict(), false, $array, false, $object), + Json2::SqlQuery($nested_array, $path, AsDict(), false, $array, false, $object); + +-- Null handling +SELECT + Json2::SqlQuery(NULL, $path, AsDict(), false, $array, false, $object); + +-- Errors +$jsonpath_error = CAST(@@{"y": []}@@ as Json); +SELECT + Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, $array, false, NULL), + Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, $array, false, $object), + Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, $object, false, $array); + +$mismatch_error = CAST(@@{"x": 123}@@ as Json); +SELECT + Json2::SqlQuery($mismatch_error, $path, AsDict(), false, $array, false, NULL), + Json2::SqlQuery($mismatch_error, $path, AsDict(), false, $array, false, $object), + Json2::SqlQuery($mismatch_error, $path, AsDict(), false, $object, false, $array); + +-- Wrap +$nested_value = CAST(@@{"x": 123}@@ as Json); +SELECT + Json2::SqlQueryWrap($nested_value, $path, AsDict(), false, $object, false, $array), + Json2::SqlQueryConditionalWrap($nested_value, $path, AsDict(), false, $object, false, $array); + +SELECT + Json2::SqlQuery($nested_object, $path, AsDict(), false, $object, false, $array), + Json2::SqlQueryWrap($nested_object, $path, AsDict(), false, $object, false, $array), + Json2::SqlQueryConditionalWrap($nested_object, $path, AsDict(), false, $object, false, $array); + +SELECT + Json2::SqlQuery($nested_array, $path, AsDict(), false, $object, false, $array), + Json2::SqlQueryWrap($nested_array, $path, AsDict(), false, $object, false, $array), + Json2::SqlQueryConditionalWrap($nested_array, $path, AsDict(), false, $object, false, $array); + +-- Wrap empty result +$path_lax = Json2::CompilePath("lax $.x"); +SELECT + Json2::SqlQuery($object, $path_lax, AsDict(), false, $object, false, $object), + Json2::SqlQueryWrap($object, $path_lax, AsDict(), false, $object, false, $object), + Json2::SqlQueryConditionalWrap($object, $path_lax, AsDict(), false, $object, false, $object);
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg new file mode 100644 index 00000000000..eb2e5315d1e --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.cfg @@ -0,0 +1 @@ +xfail
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql new file mode 100644 index 00000000000..4aaa329fc06 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlQueryError.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ + +$path = Json2::CompilePath("strict $.x"); + +$jsonpath_error = CAST(@@{"y": []}@@ as Json); +SELECT + Json2::SqlQuery($jsonpath_error, $path, AsDict(), false, NULL, true, NULL);
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql b/yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql new file mode 100644 index 00000000000..f42bd5628db --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlTryExists.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ + +$path = Json2::CompilePath("strict $.x"); + +-- Key exists +SELECT + Json2::SqlTryExists(NULL, $path, AsDict()), + Json2::SqlTryExists(CAST(@@{"x": 123}@@ as Json), $path, AsDict()), + Json2::SqlTryExists(CAST(@@{"x": {"key": "value"}}@@ as Json), $path, AsDict()), + Json2::SqlTryExists(CAST(@@{"x": [1, 2, 3]}@@ as Json), $path, AsDict()), + Json2::SqlTryExists(CAST(@@{"x": null}@@ as Json), $path, AsDict()); diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg new file mode 100644 index 00000000000..eb2e5315d1e --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.cfg @@ -0,0 +1 @@ +xfail
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql new file mode 100644 index 00000000000..3d0440b3cad --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlTryExistsError.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ + +$path = Json2::CompilePath("strict $.x"); +$json = CAST("[]" as Json); + +SELECT + Json2::SqlTryExists($json, $path, AsDict());
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/cases/SqlValue.sql b/yql/essentials/udfs/common/json2/test/cases/SqlValue.sql new file mode 100644 index 00000000000..8f86edee795 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/cases/SqlValue.sql @@ -0,0 +1,95 @@ +/* syntax version 1 */ + +-- Plain cases +$path = Json2::CompilePath("strict $.x"); +$empty_object = CAST("{}" as Json); +$empty_array = CAST("[]" as Json); +$null_key = CAST(@@{ + "x": null +}@@ as Json); + +$string_json = CAST(@@{ + "x": "some string value" +}@@ as Json); +SELECT + Json2::SqlValueUtf8($string_json, $path, AsDict()), + Json2::SqlValueUtf8($empty_object, $path, AsDict()), + Json2::SqlValueUtf8($empty_array, $path, AsDict()), + Json2::SqlValueUtf8($null_key, $path, AsDict()), + Json2::SqlValueUtf8(NULL, $path, AsDict()); + +$int64_json = CAST(@@{ + "x": 2856 +}@@ as Json); +SELECT + Json2::SqlValueInt64($int64_json, $path, AsDict()), + Json2::SqlValueInt64($empty_object, $path, AsDict()), + Json2::SqlValueInt64($empty_array, $path, AsDict()), + Json2::SqlValueInt64($null_key, $path, AsDict()), + Json2::SqlValueInt64(NULL, $path, AsDict()); + +$double_json = CAST(@@{ + "x": 2.71828 +}@@ as Json); +SELECT + Json2::SqlValueNumber($double_json, $path, AsDict()), + Json2::SqlValueNumber($empty_object, $path, AsDict()), + Json2::SqlValueNumber($empty_array, $path, AsDict()), + Json2::SqlValueNumber($null_key, $path, AsDict()), + Json2::SqlValueNumber(NULL, $path, AsDict()); + +$bool_json = CAST(@@{ + "x": true +}@@ as Json); +SELECT + Json2::SqlValueBool($bool_json, $path, AsDict()), + Json2::SqlValueBool($empty_object, $path, AsDict()), + Json2::SqlValueBool($empty_array, $path, AsDict()), + Json2::SqlValueBool($null_key, $path, AsDict()), + Json2::SqlValueBool(NULL, $path, AsDict()); + +-- Convert cases +SELECT + Json2::SqlValueConvertToUtf8($string_json, $path, AsDict()), + Json2::SqlValueConvertToUtf8($int64_json, $path, AsDict()), + Json2::SqlValueConvertToUtf8($double_json, $path, AsDict()), + Json2::SqlValueConvertToUtf8($bool_json, $path, AsDict()), + -- NOTE: Here SQL null must be returned, not "null" string + Json2::SqlValueConvertToUtf8($null_key, $path, AsDict()), + Json2::SqlValueConvertToUtf8(NULL, $path, AsDict()); + +-- Error cases +SELECT + Json2::SqlValueUtf8($int64_json, $path, AsDict()), + Json2::SqlValueUtf8($double_json, $path, AsDict()), + Json2::SqlValueUtf8($bool_json, $path, AsDict()); + +SELECT + Json2::SqlValueInt64($string_json, $path, AsDict()), + Json2::SqlValueInt64($double_json, $path, AsDict()), + Json2::SqlValueInt64($bool_json, $path, AsDict()); + +SELECT + Json2::SqlValueNumber($string_json, $path, AsDict()), + -- NOTE: Here int64 is automatically converted to double as it is possible without precision loss + Json2::SqlValueNumber($int64_json, $path, AsDict()), + Json2::SqlValueNumber($bool_json, $path, AsDict()); + +SELECT + Json2::SqlValueBool($string_json, $path, AsDict()), + Json2::SqlValueBool($int64_json, $path, AsDict()), + Json2::SqlValueBool($double_json, $path, AsDict()); + +$nested_object_json = CAST(@@{ + "x": { + "a": 1 + } +}@@ as Json); + +$nested_array_json = CAST(@@{ + "x": [29, 32, "some string"] +}@@ as Json); + +SELECT + Json2::SqlValueBool($nested_object_json, $path, AsDict()), + Json2::SqlValueBool($nested_array_json, $path, AsDict());
\ No newline at end of file diff --git a/yql/essentials/udfs/common/json2/test/ya.make b/yql/essentials/udfs/common/json2/test/ya.make new file mode 100644 index 00000000000..c7079c1bd86 --- /dev/null +++ b/yql/essentials/udfs/common/json2/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/json2) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/json2/ya.make b/yql/essentials/udfs/common/json2/ya.make new file mode 100644 index 00000000000..202b4aee9ca --- /dev/null +++ b/yql/essentials/udfs/common/json2/ya.make @@ -0,0 +1,33 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319901430 OUT_NOAUTO libjson2_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(json2_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + json2_udf.cpp + ) + + PEERDIR( + yql/essentials/core/sql_types + yql/essentials/types/binary_json + yql/essentials/minikql/dom + yql/essentials/minikql/jsonpath + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) + diff --git a/yql/essentials/udfs/common/math/lib/erfinv.cpp b/yql/essentials/udfs/common/math/lib/erfinv.cpp new file mode 100644 index 00000000000..b762ec28070 --- /dev/null +++ b/yql/essentials/udfs/common/math/lib/erfinv.cpp @@ -0,0 +1,114 @@ +#include <cmath> +#include <array> +#include <numeric> + +#include "erfinv.h" + +template <size_t N> +static double polEval(double x, const std::array<double, N>& coef) { + static_assert(N > 0, "Array coef[] should not be empty."); + return std::accumulate(coef.crbegin() + 1, coef.crend(), coef[N - 1], + [x] (auto init, auto cur) { + return std::move(init) * x + cur; + }); +} + +namespace NMathUdf { + +// https://www.jstor.org/stable/2347330 +double ErfInv(double x) { + static constexpr std::array<double, 8> a = { + 1.1975323115670912564578e0, + 4.7072688112383978012285e1, + 6.9706266534389598238465e2, + 4.8548868893843886794648e3, + 1.6235862515167575384252e4, + 2.3782041382114385731252e4, + 1.1819493347062294404278e4, + 8.8709406962545514830200e2, + }; + static constexpr std::array<double, 8> b = { + 1., + 4.2313330701600911252e1, + 6.8718700749205790830e2, + 5.3941960214247511077e3, + 2.1213794301586595867e4, + 3.9307895800092710610e4, + 2.8729085735721942674e4, + 5.2264952788528545610e3, + }; + static constexpr std::array<double, 8> c = { + 1.42343711074968357734e0, + 4.63033784615654529590e0, + 5.76949722146069140550e0, + 3.64784832476320460504e0, + 1.27045825245236838258e0, + 2.41780725177450611770e-1, + 2.27238449892691845833e-2, + 7.74545014278341407640e-4, + }; + static constexpr std::array<double, 8> d = { + 1.4142135623730950488016887e0, + 2.9036514445419946173133295e0, + 2.3707661626024532365971225e0, + 9.7547832001787427186894837e-1, + 2.0945065210512749128288442e-1, + 2.1494160384252876777097297e-2, + 7.7441459065157709165577218e-4, + 1.4859850019840355905497876e-9, + }; + static constexpr std::array<double, 8> e = { + 6.65790464350110377720e0, + 5.46378491116411436990e0, + 1.78482653991729133580e0, + 2.96560571828504891230e-1, + 2.65321895265761230930e-2, + 1.24266094738807843860e-3, + 2.71155556874348757815e-5, + 2.01033439929228813265e-7, + }; + static constexpr std::array<double, 8> f = { + 1.414213562373095048801689e0, + 8.482908416595164588112026e-1, + 1.936480946950659106176712e-1, + 2.103693768272068968719679e-2, + 1.112800997078859844711555e-3, + 2.611088405080593625138020e-5, + 2.010321207683943062279931e-7, + 2.891024605872965461538222e-15, + }; + + if (isnan(x) || x <= -1. || x >= 1.) { + if (x == 1.) { + return std::numeric_limits<double>::infinity(); + } + if (x == -1.) { + return -std::numeric_limits<double>::infinity(); + } + return std::numeric_limits<double>::quiet_NaN(); + } + + double sign = (x > 0) - (x < 0); + x = abs(x); + if (x < 1e-7) { + return sign * x / M_2_SQRTPI; + } + + double ans; + if (x <= 0.85) { + double r = 0.180625 - 0.25 * x * x; + ans = x * polEval(r, a) / polEval(r, b); + } else { + double r = std::sqrt(M_LN2 - log(1. - x)) - 1.6; + if (r <= 3.4) { + ans = polEval(r, c) / polEval(r, d); + } else { + r -= 3.4; + ans = polEval(r, e) / polEval(r, f); + } + } + + return ans * sign; +} + +} diff --git a/yql/essentials/udfs/common/math/lib/erfinv.h b/yql/essentials/udfs/common/math/lib/erfinv.h new file mode 100644 index 00000000000..1ced5a07e65 --- /dev/null +++ b/yql/essentials/udfs/common/math/lib/erfinv.h @@ -0,0 +1,7 @@ +#pragma once + +namespace NMathUdf { + +double ErfInv(double x); + +} diff --git a/yql/essentials/udfs/common/math/lib/round.h b/yql/essentials/udfs/common/math/lib/round.h new file mode 100644 index 00000000000..f59700da88f --- /dev/null +++ b/yql/essentials/udfs/common/math/lib/round.h @@ -0,0 +1,77 @@ +#pragma once + +#include <util/system/types.h> +#include <cmath> +#include <optional> +#include <fenv.h> + +namespace NMathUdf { + +template <class T> +inline T RoundToDecimal(T v, int decShift) { + T div = std::pow(T(10), decShift); + return std::floor(v / div + T(0.5)) * div; +} + +inline std::optional<i64> Mod(i64 value, i64 m) { + if (!m) { + return {}; + } + + const i64 result = value % m; + if ((result < 0 && m > 0) || (result > 0 && m < 0)) { + return result + m; + } + return result; +} + +inline std::optional<i64> Rem(i64 value, i64 m) { + if (!m) { + return {}; + } + + const i64 result = value % m; + if (result < 0 && value > 0) { + return result + m; + } + + if (result > 0 && value < 0) { + return result - m; + } + return result; +} + +inline std::optional<i64> NearbyIntImpl(double value, decltype(FE_DOWNWARD) mode) { + if (!::isfinite(value)) { + return {}; + } + + auto prevMode = ::fegetround(); + ::fesetround(mode); + auto res = ::nearbyint(value); + ::fesetround(prevMode); + // cast to i64 gives wrong sign above 9223372036854774784 + // lower bound is adjusted to -9223372036854774784 as well + if (res < double(std::numeric_limits<i64>::min() + 513) || res > double(std::numeric_limits<i64>::max() - 512)) { + return {}; + } + + return static_cast<i64>(res); +} + +inline std::optional<i64> NearbyInt(double value, ui32 mode) { + switch (mode) { + case 0: + return NearbyIntImpl(value, FE_DOWNWARD); + case 1: + return NearbyIntImpl(value, FE_TONEAREST); + case 2: + return NearbyIntImpl(value, FE_TOWARDZERO); + case 3: + return NearbyIntImpl(value, FE_UPWARD); + default: + return {}; + } +} + +} diff --git a/yql/essentials/udfs/common/math/lib/round_ut.cpp b/yql/essentials/udfs/common/math/lib/round_ut.cpp new file mode 100644 index 00000000000..4d0e96e4dc3 --- /dev/null +++ b/yql/essentials/udfs/common/math/lib/round_ut.cpp @@ -0,0 +1,70 @@ +#include "round.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/hex.h> + +using namespace NMathUdf; + +Y_UNIT_TEST_SUITE(TRound) { + Y_UNIT_TEST(Basic) { + double value = 1930.0 / 3361.0; + double result = RoundToDecimal<long double>(value, -3); + double answer = 0.574; + UNIT_ASSERT_VALUES_EQUAL( + HexEncode(&result, sizeof(double)), + HexEncode(&answer, sizeof(double))); + } + + Y_UNIT_TEST(Mod) { + UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, 7), 6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(1, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(0, 7), 0); + + UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(1, -7), -6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(0, -7), 0); + + UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, 7), 6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(15, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, 7), 0); + + UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(15, -7), -6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, -7), 0); + + UNIT_ASSERT(!Mod(-14, 0)); + } + + Y_UNIT_TEST(Rem) { + UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, 7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(1, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(0, 7), 0); + + UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(1, -7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(0, -7), 0); + + UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, 7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(15, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, 7), 0); + + UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(15, -7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, -7), 0); + UNIT_ASSERT(!Rem(-14, 0)); + } + + Y_UNIT_TEST(NearbyInt) { + const i64 maxV = 9223372036854774784ll; + const i64 minV = -9223372036854774784ll; + UNIT_ASSERT_VALUES_EQUAL((i64)(double)(maxV), maxV); + UNIT_ASSERT_VALUES_EQUAL((i64)(double)(minV), minV); + + UNIT_ASSERT_VALUES_UNEQUAL((i64)(double)(maxV + 1), maxV + 1); + } +} diff --git a/yql/essentials/udfs/common/math/lib/ut/ya.make b/yql/essentials/udfs/common/math/lib/ut/ya.make new file mode 100644 index 00000000000..c1efcde3b47 --- /dev/null +++ b/yql/essentials/udfs/common/math/lib/ut/ya.make @@ -0,0 +1,11 @@ +IF (OS_LINUX) +IF (NOT WITH_VALGRIND) + UNITTEST_FOR(yql/essentials/udfs/common/math/lib) + + SRCS( + round_ut.cpp + ) + + END() +ENDIF() +ENDIF() diff --git a/yql/essentials/udfs/common/math/lib/ya.make b/yql/essentials/udfs/common/math/lib/ya.make new file mode 100644 index 00000000000..54b882a8438 --- /dev/null +++ b/yql/essentials/udfs/common/math/lib/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + erfinv.cpp +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/yql/essentials/udfs/common/math/math_ir.cpp b/yql/essentials/udfs/common/math/math_ir.cpp new file mode 100644 index 00000000000..a63968dbe00 --- /dev/null +++ b/yql/essentials/udfs/common/math/math_ir.cpp @@ -0,0 +1,5 @@ +#define LLVM_BC + +#include "math_ir.h" + +#include <util/generic/ymath.cpp> diff --git a/yql/essentials/udfs/common/math/math_ir.h b/yql/essentials/udfs/common/math/math_ir.h new file mode 100644 index 00000000000..ee788ee90ba --- /dev/null +++ b/yql/essentials/udfs/common/math/math_ir.h @@ -0,0 +1,150 @@ +#pragma once + +#include <yql/essentials/udfs/common/math/lib/round.h> +#include <yql/essentials/udfs/common/math/lib/erfinv.h> +#include <yql/essentials/public/udf/udf_value.h> + +#include <util/generic/ymath.h> +#include <util/system/compiler.h> + +#include <math.h> + +namespace NYql { +namespace NUdf { + +#define CONST_FUNCS(XX) \ + XX(Pi, M_PI) \ + XX(E, M_E) \ + XX(Eps, std::numeric_limits<double>::epsilon()) \ + XX(RoundDownward, 0) \ + XX(RoundToNearest, 1) \ + XX(RoundTowardZero, 2) \ + XX(RoundUpward, 3) + +#define SINGLE_ARG_FUNCS(XX) \ + XX(Abs, Abs) \ + XX(Acos, acos) \ + XX(Asin, asin) \ + XX(Asinh, asin) \ + XX(Atan, atan) \ + XX(Cbrt, cbrt) \ + XX(Ceil, ceil) \ + XX(Cos, cos) \ + XX(Cosh, cosh) \ + XX(Erf, Erf) \ + XX(Exp, exp) \ + XX(Exp2, Exp2) \ + XX(Fabs, fabs) \ + XX(Floor, std::floor) \ + XX(Lgamma, LogGamma) \ + XX(Rint, rint) \ + XX(Sin, sin) \ + XX(Sinh, sinh) \ + XX(Sqrt, sqrt) \ + XX(Tan, tan) \ + XX(Tanh, tanh) \ + XX(Tgamma, tgamma) \ + XX(Trunc, trunc) \ + XX(IsFinite, std::isfinite) \ + XX(IsInf, std::isinf) \ + XX(IsNaN, std::isnan) + +#define TWO_ARGS_FUNCS(XX) \ + XX(Atan2, atan2, double) \ + XX(Fmod, fmod, double) \ + XX(Hypot, hypot, double) \ + XX(Remainder, remainder, double) \ + XX(Pow, pow, double) \ + XX(Ldexp, ldexp, int) + +#define POSITIVE_SINGLE_ARG_FUNCS(XX) \ + XX(Log, log) \ + XX(Log2, Log2) \ + XX(Log10, log10) + + +#define CONST_IMPL(name, cnst) \ + extern "C" UDF_ALWAYS_INLINE \ + void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* /*args*/) {\ + *result = TUnboxedValuePod(cnst); \ + } + +#define SINGLE_ARG_IMPL(name, func) \ + extern "C" UDF_ALWAYS_INLINE \ + void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + *result = TUnboxedValuePod(func(args[0].Get<double>())); \ + } + +#define TWO_ARGS_IMPL(name, func, secondType) \ + extern "C" UDF_ALWAYS_INLINE \ + void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + *result = TUnboxedValuePod(func(args[0].Get<double>(), args[1].Get<secondType>())); \ + } + +#define POSITIVE_SINGLE_ARG_IMPL(name, func) \ + extern "C" UDF_ALWAYS_INLINE \ + void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + double input = args[0].Get<double>(); \ + if (input > 0) { \ + *result = TUnboxedValuePod(func(input)); \ + } else { \ + *result = TUnboxedValuePod(static_cast<double>(NAN)); \ + } \ + } + +CONST_FUNCS(CONST_IMPL) +SINGLE_ARG_FUNCS(SINGLE_ARG_IMPL) +TWO_ARGS_FUNCS(TWO_ARGS_IMPL) +POSITIVE_SINGLE_ARG_FUNCS(POSITIVE_SINGLE_ARG_IMPL) + +extern "C" UDF_ALWAYS_INLINE +void SigmoidIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + *result = TUnboxedValuePod(1. / (1. + exp(-args[0].Get<double>()))); +} + +extern "C" UDF_ALWAYS_INLINE +void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + if (!args[2]) { + *result = TUnboxedValuePod(FuzzyEquals(args[0].Get<double>(), args[1].Get<double>())); + } else { + const double eps = args[2].Get<double>(); + *result = TUnboxedValuePod(FuzzyEquals(args[0].Get<double>(), args[1].Get<double>(), eps)); + } +} + +extern "C" UDF_ALWAYS_INLINE +void RoundIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + const double val = NMathUdf::RoundToDecimal<long double>(args[0].Get<double>(), args[1].GetOrDefault<int>(0)); + *result = TUnboxedValuePod(val); +} + +extern "C" UDF_ALWAYS_INLINE +void ErfInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + *result = TUnboxedValuePod(NMathUdf::ErfInv(args[0].Get<double>())); +} + +extern "C" UDF_ALWAYS_INLINE +void ErfcInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + *result = TUnboxedValuePod(NMathUdf::ErfInv(1. - args[0].Get<double>())); +} + +extern "C" UDF_ALWAYS_INLINE +void ModIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + const auto val = NMathUdf::Mod(args[0].Get<i64>(), args[1].Get<i64>()); + *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); +} + +extern "C" UDF_ALWAYS_INLINE +void RemIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + const auto val = NMathUdf::Rem(args[0].Get<i64>(), args[1].Get<i64>()); + *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); +} + +extern "C" UDF_ALWAYS_INLINE +void NearbyIntIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { + const auto val = NMathUdf::NearbyInt(args[0].Get<double>(), args[1].Get<ui32>()); + *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); +} + +} // NUdf +} // NYql diff --git a/yql/essentials/udfs/common/math/math_udf.cpp b/yql/essentials/udfs/common/math/math_udf.cpp new file mode 100644 index 00000000000..1c2652320bb --- /dev/null +++ b/yql/essentials/udfs/common/math/math_udf.cpp @@ -0,0 +1,99 @@ +#include "math_ir.h" + + +#include <yql/essentials/public/udf/udf_helpers.h> + +extern const char TagRoundingMode[] = "MathRoundingMode"; +using TTaggedRoundingMode = NYql::NUdf::TTagged<ui32, TagRoundingMode>; + +#define MATH_UDF_MAP(XX, XXL) \ + XX(Pi, double(), 0) \ + XX(E, double(), 0) \ + XX(Eps, double(), 0) \ + XX(RoundDownward, TTaggedRoundingMode(), 0) \ + XX(RoundToNearest, TTaggedRoundingMode(), 0) \ + XX(RoundTowardZero, TTaggedRoundingMode(), 0) \ + XX(RoundUpward, TTaggedRoundingMode(), 0) \ + XX(Abs, double(TAutoMap<double>), 0) \ + XX(Acos, double(TAutoMap<double>), 0) \ + XX(Asin, double(TAutoMap<double>), 0) \ + XX(Asinh, double(TAutoMap<double>), 0) \ + XX(Atan, double(TAutoMap<double>), 0) \ + XX(Cbrt, double(TAutoMap<double>), 0) \ + XX(Ceil, double(TAutoMap<double>), 0) \ + XX(Cos, double(TAutoMap<double>), 0) \ + XX(Cosh, double(TAutoMap<double>), 0) \ + XX(Erf, double(TAutoMap<double>), 0) \ + XX(ErfInv, double(TAutoMap<double>), 0) \ + XX(ErfcInv, double(TAutoMap<double>), 0) \ + XX(Exp, double(TAutoMap<double>), 0) \ + XX(Exp2, double(TAutoMap<double>), 0) \ + XX(Fabs, double(TAutoMap<double>), 0) \ + XX(Floor, double(TAutoMap<double>), 0) \ + XX(Lgamma, double(TAutoMap<double>), 0) \ + XX(Rint, double(TAutoMap<double>), 0) \ + XX(Sin, double(TAutoMap<double>), 0) \ + XX(Sinh, double(TAutoMap<double>), 0) \ + XX(Sqrt, double(TAutoMap<double>), 0) \ + XX(Tan, double(TAutoMap<double>), 0) \ + XX(Tanh, double(TAutoMap<double>), 0) \ + XX(Tgamma, double(TAutoMap<double>), 0) \ + XX(Trunc, double(TAutoMap<double>), 0) \ + XX(Log, double(TAutoMap<double>), 0) \ + XX(Log2, double(TAutoMap<double>), 0) \ + XX(Log10, double(TAutoMap<double>), 0) \ + XX(Atan2, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Fmod, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Hypot, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Remainder, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Pow, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Ldexp, double(TAutoMap<double>, TAutoMap<int>), 0) \ + XX(IsFinite, bool(TAutoMap<double>), 0) \ + XX(IsInf, bool(TAutoMap<double>), 0) \ + XX(IsNaN, bool(TAutoMap<double>), 0) \ + XX(Sigmoid, double(TAutoMap<double>), 0) \ + XX(FuzzyEquals, bool(TAutoMap<double>, TAutoMap<double>, TEpsilon), 1) \ + XX(Mod, TOptional<i64>(TAutoMap<i64>, i64), 0) \ + XX(Rem, TOptional<i64>(TAutoMap<i64>, i64), 0) \ + XXL(Round, double(TAutoMap<double>, TPrecision), 1) + +#define MATH_UDF_MAP_WITHOUT_IR(XX) \ + XX(NearbyInt, TOptional<i64>(TAutoMap<double>, TTaggedRoundingMode), 0) + +#define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_IR(T##name, signature, optionalArgsCount, "/llvm_bc/Math", #name "IR") { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ + } + +#define MATH_STRICT_UDF_WITHOUT_IR(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ + } + +#define REGISTER_MATH_UDF(udfName, ...) T##udfName, +#define REGISTER_MATH_UDF_LAST(udfName, ...) T##udfName + +using namespace NKikimr; +using namespace NUdf; + +namespace { + extern const char epsilon[] = "Epsilon"; + using TEpsilon = TNamedArg<double, epsilon>; + + extern const char precision[] = "Precision"; + using TPrecision = TNamedArg<int, precision>; + + MATH_UDF_MAP(MATH_STRICT_UDF, MATH_STRICT_UDF) + + MATH_UDF_MAP_WITHOUT_IR(MATH_STRICT_UDF_WITHOUT_IR) + + SIMPLE_MODULE(TMathModule, + MATH_UDF_MAP_WITHOUT_IR(REGISTER_MATH_UDF) + MATH_UDF_MAP(REGISTER_MATH_UDF, REGISTER_MATH_UDF_LAST)) +} + +REGISTER_MODULES(TMathModule) diff --git a/yql/essentials/udfs/common/math/test/canondata/result.json b/yql/essentials/udfs/common/math/test/canondata/result.json new file mode 100644 index 00000000000..1471c26d599 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/canondata/result.json @@ -0,0 +1,35 @@ +{ + "test.test[ErfInvNoLLVM]": [ + { + "checksum": "be26c6ffe8018b2afe5c6bd554d4468d", + "size": 6789, + "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_ErfInvNoLLVM_/results.txt" + } + ], + "test.test[ErfInv]": [ + { + "checksum": "be26c6ffe8018b2afe5c6bd554d4468d", + "size": 6789, + "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_ErfInv_/results.txt" + } + ], + "test.test[IR]": [ + { + "checksum": "83061f69c401182342478cbc8fd11b63", + "size": 10168, + "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_IR_/results.txt" + } + ], + "test.test[IR_LLVM_OFF]": [ + { + "checksum": "83061f69c401182342478cbc8fd11b63", + "size": 10168, + "uri": "https://storage.yandex-team.ru/get-devtools/1937429/e098bd35e45d8b2d18b6958e5e9b0a875d6a03f1/resource.tar.gz#test.test_IR_LLVM_OFF_/results.txt" + } + ], + "test.test[NearbyInt]": [ + { + "uri": "file://test.test_NearbyInt_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt b/yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt new file mode 100644 index 00000000000..1149fd999dd --- /dev/null +++ b/yql/essentials/udfs/common/math/test/canondata/test.test_NearbyInt_/results.txt @@ -0,0 +1,238 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "TupleType"; + [ + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + #; + #; + [ + "2" + ]; + [ + "2" + ]; + [ + "2" + ]; + [ + "3" + ]; + #; + [ + "-3" + ]; + [ + "-3" + ]; + [ + "-3" + ]; + [ + "-4" + ] + ]; + "0" + ]; + [ + [ + #; + #; + [ + "2" + ]; + [ + "2" + ]; + [ + "3" + ]; + [ + "4" + ]; + #; + [ + "-2" + ]; + [ + "-2" + ]; + [ + "-3" + ]; + [ + "-4" + ] + ]; + "1" + ]; + [ + [ + #; + #; + [ + "2" + ]; + [ + "2" + ]; + [ + "2" + ]; + [ + "3" + ]; + #; + [ + "-2" + ]; + [ + "-2" + ]; + [ + "-2" + ]; + [ + "-3" + ] + ]; + "2" + ]; + [ + [ + #; + #; + [ + "3" + ]; + [ + "3" + ]; + [ + "3" + ]; + [ + "4" + ]; + #; + [ + "-2" + ]; + [ + "-2" + ]; + [ + "-2" + ]; + [ + "-3" + ] + ]; + "3" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/math/test/cases/ErfInv.sql b/yql/essentials/udfs/common/math/test/cases/ErfInv.sql new file mode 100644 index 00000000000..798e0090ad8 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/ErfInv.sql @@ -0,0 +1,24 @@ +pragma config.flags("ValidateUdf", "None"); + +SELECT + Math::ErfInv(1e-8), + Math::ErfInv(1e-4), + Math::ErfInv(0.1), + Math::ErfInv(0.25), + Math::ErfInv(0.5), + Math::ErfInv(0.75), + Math::ErfInv(0.9), + Math::ErfInv(0.99), + Math::ErfInv(0.9999999), + Math::ErfInv(0.99999999), + Math::ErfInv(0.999999999), + Math::ErfInv(0.9999999999), + Math::ErfInv(0), + Math::ErfInv(1 + Math::Eps()), + Math::ErfInv(-1 - Math::Eps()), + Math::ErfInv(1), + Math::ErfInv(-1), + Math::ErfcInv(2), + Math::ErfcInv(0), + Math::ErfcInv(2 + 2 * Math::Eps()), + Math::ErfcInv(-Math::Eps()); diff --git a/yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql b/yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql new file mode 100644 index 00000000000..55e7c60db14 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/ErfInvNoLLVM.sql @@ -0,0 +1,25 @@ +pragma config.flags("ValidateUdf", "None"); +pragma config.flags("LLVM_OFF"); + +SELECT + Math::ErfInv(1e-8), + Math::ErfInv(1e-4), + Math::ErfInv(0.1), + Math::ErfInv(0.25), + Math::ErfInv(0.5), + Math::ErfInv(0.75), + Math::ErfInv(0.9), + Math::ErfInv(0.99), + Math::ErfInv(0.9999999), + Math::ErfInv(0.99999999), + Math::ErfInv(0.999999999), + Math::ErfInv(0.9999999999), + Math::ErfInv(0), + Math::ErfInv(1 + Math::Eps()), + Math::ErfInv(-1 - Math::Eps()), + Math::ErfInv(1), + Math::ErfInv(-1), + Math::ErfcInv(2), + Math::ErfcInv(0), + Math::ErfcInv(2 + 2 * Math::Eps()), + Math::ErfcInv(-Math::Eps()); diff --git a/yql/essentials/udfs/common/math/test/cases/IR.in b/yql/essentials/udfs/common/math/test/cases/IR.in new file mode 100644 index 00000000000..2a8e728cae8 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/IR.in @@ -0,0 +1,4 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; diff --git a/yql/essentials/udfs/common/math/test/cases/IR.sql b/yql/essentials/udfs/common/math/test/cases/IR.sql new file mode 100644 index 00000000000..7dbea3a2dc3 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/IR.sql @@ -0,0 +1,26 @@ +/* syntax version 1 */ +pragma config.flags("ValidateUdf", "None"); + +select + Math::Pi(), + Math::E(), + Math::Eps(), + Math::Abs(-2.34), + Math::Cos(0.234), + Math::IsFinite(0.0), + Math::IsNaN(0.0/0.0), + Math::Pow(2.0, 3.0), + Math::Log(4.0), + Math::Log(-4.0), + Math::Sigmoid(0.5), + Math::FuzzyEquals(1 + 0.0, 1 + 1.0e-200), + Math::FuzzyEquals(1.0001, 1.00012, 0.01 as Epsilon), + Math::Round(34.4564, -2 as Precision), + Math::Exp2(3.4), + Math::Exp(3.4), + Math::Erf(0.4), + Math::Mod(-1, 7), + Math::Mod(-1, 0), + Math::Rem(-1, 7), + Math::Rem(-1, 0) +from Input; diff --git a/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in new file mode 100644 index 00000000000..2a8e728cae8 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.in @@ -0,0 +1,4 @@ +{"key"="023";"subkey"="3";"value"="aaa"}; +{"key"="037";"subkey"="5";"value"="ddd"}; +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="150";"subkey"="1";"value"="aaa"}; diff --git a/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql new file mode 100644 index 00000000000..a8c14225ba9 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/IR_LLVM_OFF.sql @@ -0,0 +1,27 @@ +/* syntax version 1 */ +pragma config.flags("ValidateUdf", "None"); +pragma config.flags("LLVM_OFF"); + +select + Math::Pi(), + Math::E(), + Math::Eps(), + Math::Abs(-2.34), + Math::Cos(0.234), + Math::IsFinite(0.0), + Math::IsNaN(0.0/0.0), + Math::Pow(2.0, 3.0), + Math::Log(4.0), + Math::Log(-4.0), + Math::Sigmoid(0.5), + Math::FuzzyEquals(1 + 0.0, 1 + 1.0e-200), + Math::FuzzyEquals(1.0001, 1.00012, 0.01 as Epsilon), + Math::Round(34.4564, -2 as Precision), + Math::Exp2(3.4), + Math::Exp(3.4), + Math::Erf(0.4), + Math::Mod(-1, 7), + Math::Mod(-1, 0), + Math::Rem(-1, 7), + Math::Rem(-1, 0) +from Input; diff --git a/yql/essentials/udfs/common/math/test/cases/NearbyInt.in b/yql/essentials/udfs/common/math/test/cases/NearbyInt.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/NearbyInt.in diff --git a/yql/essentials/udfs/common/math/test/cases/NearbyInt.sql b/yql/essentials/udfs/common/math/test/cases/NearbyInt.sql new file mode 100644 index 00000000000..f27a93d7274 --- /dev/null +++ b/yql/essentials/udfs/common/math/test/cases/NearbyInt.sql @@ -0,0 +1,24 @@ +$f = ($mode)->{ + return ( + Math::NearbyInt(Double("NaN"),$mode), + Math::NearbyInt(1e100,$mode), + Math::NearbyInt(2.3,$mode), + Math::NearbyInt(2.5,$mode), + Math::NearbyInt(2.7,$mode), + Math::NearbyInt(3.5,$mode), + Math::NearbyInt(-1e100,$mode), + Math::NearbyInt(-2.3,$mode), + Math::NearbyInt(-2.5,$mode), + Math::NearbyInt(-2.7,$mode), + Math::NearbyInt(-3.5,$mode) + ) +}; + +select $f(Math::RoundDownward()), 0 as x +union all +select $f(Math::RoundToNearest()), 1 as x +union all +select $f(Math::RoundTowardZero()), 2 as x +union all +select $f(Math::RoundUpward()), 3 as x +order by x; diff --git a/yql/essentials/udfs/common/math/test/ya.make b/yql/essentials/udfs/common/math/test/ya.make new file mode 100644 index 00000000000..2ebe3a7123c --- /dev/null +++ b/yql/essentials/udfs/common/math/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/math) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/math/ya.make b/yql/essentials/udfs/common/math/ya.make new file mode 100644 index 00000000000..01fa2f1d89b --- /dev/null +++ b/yql/essentials/udfs/common/math/ya.make @@ -0,0 +1,92 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319902006 OUT_NOAUTO libmath_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(math_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + math_udf.cpp + ) + + USE_LLVM_BC14() + + LLVM_BC( + math_ir.cpp + lib/erfinv.cpp + NAME Math + SYMBOLS + PiIR + EIR + EpsIR + RoundDownwardIR + RoundToNearestIR + RoundTowardZeroIR + RoundUpwardIR + AbsIR + AcosIR + AsinIR + AsinhIR + AtanIR + CbrtIR + CeilIR + CosIR + CoshIR + ErfIR + ErfInvIR + ErfcInvIR + ExpIR + Exp2IR + FabsIR + FloorIR + LgammaIR + RintIR + SinIR + SinhIR + SqrtIR + TanIR + TanhIR + TgammaIR + TruncIR + IsFiniteIR + IsInfIR + IsNaNIR + Atan2IR + FmodIR + HypotIR + RemainderIR + PowIR + LdexpIR + LogIR + Log2IR + Log10IR + SigmoidIR + FuzzyEqualsIR + RoundIR + ModIR + RemIR + ) + + PEERDIR( + yql/essentials/udfs/common/math/lib + ) + + END() +ENDIF () + +RECURSE( + lib +) + +RECURSE_FOR_TESTS( + test +) + + diff --git a/yql/essentials/udfs/common/pire/pire_udf.cpp b/yql/essentials/udfs/common/pire/pire_udf.cpp new file mode 100644 index 00000000000..0f9ffc5c213 --- /dev/null +++ b/yql/essentials/udfs/common/pire/pire_udf.cpp @@ -0,0 +1,358 @@ +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_registrator.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <library/cpp/regex/pire/regexp.h> +#include <library/cpp/regex/pire/pcre2pire.h> + +#include <util/string/builder.h> + +using namespace NRegExp; +using namespace NKikimr; +using namespace NUdf; + +namespace { + class TPireUdfBase: public TBoxedValue { + protected: + TPireUdfBase(TSourcePosition pos) + : Pos_(pos) + {} + + void SetCommonOptions(std::string_view& regex, TFsm::TOptions& options) { + if (regex.size() >= 4U && regex.substr(0U, 4U) == "(?i)") { + options.SetCaseInsensitive(true); + regex.remove_prefix(4U); + } + if (UTF8Detect(regex) == UTF8) { + options.SetCharset(CODES_UTF8); + } + } + + TSourcePosition Pos_; + }; + + class TPireMatch: public TPireUdfBase { + public: + class TFactory: public TPireUdfBase { + public: + TFactory( + bool surroundMode, + bool multiMode, + TSourcePosition pos, + size_t regexpsCount = 0) + : TPireUdfBase(pos) + , SurroundMode(surroundMode) + , MultiMode(multiMode) + , RegexpsCount(regexpsCount) + { + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + return TUnboxedValuePod( + new TPireMatch( + valueBuilder, + args[0], + SurroundMode, + MultiMode, + Pos_, + RegexpsCount)); + } + + bool SurroundMode; + bool MultiMode; + size_t RegexpsCount; + }; + + static const TStringRef& Name(bool surroundMode, bool multiMode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto multiMatch = TStringRef::Of("MultiMatch"); + static auto multiGrep = TStringRef::Of("MultiGrep"); + if (surroundMode) { + return multiMode ? multiGrep : grep; + } else { + return multiMode ? multiMatch : match; + } + } + + TPireMatch( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod& runConfig, + bool surroundMode, + bool multiMode, + TSourcePosition pos, + size_t regexpsCount) + : TPireUdfBase(pos) + , MultiMode(multiMode) + , RegexpsCount(regexpsCount) + , SurroundMode(surroundMode) + { + Y_UNUSED(valueBuilder); + try { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + options.SetSurround(surroundMode); + SetCommonOptions(regex, options); + if (multiMode) { + std::vector<std::string_view> parts; + StringSplitter(regex).Split('\n').AddTo(&parts); + for (const auto& part : parts) { + if (!part.empty()) { + if (Fsm_) try { + *Fsm_ = *Fsm_ | TFsm(TString(part), options); + } catch (const yexception&) { + UdfTerminate((TStringBuilder() << Pos_ << " Failed to glue up regexes, probably the finite state machine appeared to be too large").data()); + } else { + Fsm_.Reset(new TFsm(TString(part), options)); + } + } + } + } else { + Fsm_.Reset(new TFsm(TString(regex), options)); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + TUnboxedValue* items = nullptr; + TUnboxedValue tuple; + size_t i = 0; + + if (MultiMode) { + tuple = valueBuilder->NewArray(RegexpsCount, items); + + for (i = 0; i < RegexpsCount; ++i) { + items[i] = TUnboxedValuePod(false); + } + } + + if (args[0]) { + const auto input = args[0].AsStringRef(); + TMatcher matcher(*Fsm_); + const bool isMatch = matcher.Match(input.Data(), input.Size(), SurroundMode, SurroundMode).Final(); + if (MultiMode) { + if (isMatch) { + const auto& matchedRegexps = matcher.MatchedRegexps(); + size_t matchesCount = matchedRegexps.second - matchedRegexps.first; + + for (i = 0; i < matchesCount; ++i) { + items[matchedRegexps.first[i]] = TUnboxedValuePod(true); + } + } + return tuple; + + } else { + return TUnboxedValuePod(isMatch); + } + + } else { + return MultiMode ? tuple : TUnboxedValue(TUnboxedValuePod(false)); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + private: + TUniquePtr<TFsm> Fsm_; + bool MultiMode; + size_t RegexpsCount; + bool SurroundMode; + }; + + class TPireCapture: public TPireUdfBase { + public: + class TFactory: public TPireUdfBase { + public: + TFactory(TSourcePosition pos) + : TPireUdfBase(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TPireCapture(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + }; + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Capture"); + return name; + } + + TPireCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : TPireUdfBase(pos) + { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + SetCommonOptions(regex, options); + Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input = args[0].AsStringRef(); + + TSlowSearcher searcher(*Fsm_); + searcher.Search(input.data(), input.size()); + + if (searcher.Captured()) { + const auto& captured = searcher.GetCaptured(); + return valueBuilder->SubString(args[0], std::distance(input.begin(), captured.begin()), captured.length()); + } + } + + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + TUniquePtr<TSlowCapturingFsm> Fsm_; + }; + + class TPireReplace: public TPireUdfBase { + public: + class TFactory: public TPireUdfBase { + public: + TFactory(TSourcePosition pos) + : TPireUdfBase(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TPireReplace(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + }; + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Replace"); + return name; + } + + TPireReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : TPireUdfBase(pos) + { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + SetCommonOptions(regex, options); + Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + + TSlowSearcher s(*Fsm_); + s.Search(input.data(), input.size()); + if (s.Captured()) { + const auto& captured = s.GetCaptured(); + const TString replacement(args[1].AsStringRef()); + TString replaced(args[0].AsStringRef()); + replaced.replace(std::distance(input.begin(), captured.begin()), captured.length(), replacement); + return valueBuilder->NewString(replaced); + } else { + return TUnboxedValue(args[0]); + } + } else { + return TUnboxedValue(); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + TUniquePtr<TSlowCapturingFsm> Fsm_; + }; + + class TPireModule: public IUdfModule { + public: + TStringRef Name() const { + return TStringRef::Of("Pire"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TPireMatch::Name(true, true))->SetTypeAwareness(); + sink.Add(TPireMatch::Name(false, true))->SetTypeAwareness(); + sink.Add(TPireMatch::Name(true, false)); + sink.Add(TPireMatch::Name(false, false)); + sink.Add(TPireCapture::Name()); + sink.Add(TPireReplace::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType*, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { + const bool typesOnly = (flags & TFlags::TypesOnly); + const bool isMatch = (TPireMatch::Name(false, false) == name); + const bool isGrep = (TPireMatch::Name(true, false) == name); + const bool isMultiMatch = (TPireMatch::Name(false, true) == name); + const bool isMultiGrep = (TPireMatch::Name(true, true) == name); + + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireMatch::TFactory(isGrep, false, builder.GetSourcePosition())); + } + } else if (isMultiMatch || isMultiGrep) { + const auto boolType = builder.SimpleType<bool>(); + const auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const std::string_view regexp(typeConfig); + const size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; + const auto tuple = builder.Tuple(); + for (size_t i = 0; i < regexpCount; ++i) { + tuple->Add(boolType); + } + const auto tupleType = tuple->Build(); + builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireMatch::TFactory(isMultiGrep, true, builder.GetSourcePosition(), regexpCount)); + } + } else if (TPireCapture::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireCapture::TFactory(builder.GetSourcePosition())); + } + } else if (TPireReplace::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig<char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireReplace::TFactory(builder.GetSourcePosition())); + } + } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + }; + +} + +REGISTER_MODULES(TPireModule) diff --git a/yql/essentials/udfs/common/pire/test/canondata/result.json b/yql/essentials/udfs/common/pire/test/canondata/result.json new file mode 100644 index 00000000000..2ec1b95e9a0 --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/canondata/result.json @@ -0,0 +1,12 @@ +{ + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ], + "test.test[CharacterClasses]": [ + { + "uri": "file://test.test_CharacterClasses_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..6b3d19d9a17 --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,508 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "match"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "grep"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "insensitive_grep"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "multi_match"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ] + ] + ] + ]; + [ + "some_multi_match"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "multi_match2"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ] + ] + ] + ]; + [ + "some_multi_match2a"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "some_multi_match2b"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "some_multi_match2c"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "capture"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "capture_many"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "replace"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column13"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + ""; + %false; + %false; + %false; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "a"; + %true; + %false; + %false; + [ + %true; + %true; + %true; + %false; + %false; + %false; + %false + ]; + %true; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "a" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "aax"; + %true; + %false; + %false; + [ + %true; + %true; + %false; + %false; + %false; + %false; + %false + ]; + %true; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "aax" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "xaax"; + %false; + %false; + %false; + [ + %false; + %true; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + [ + "a" + ]; + [ + "aa" + ]; + [ + "xbax" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "xaaxaaxaa"; + %false; + %true; + %true; + [ + %false; + %true; + %true; + %true; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + [ + "a" + ]; + [ + "aa" + ]; + [ + "xaaxaaxba" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "XAXA"; + %false; + %false; + %true; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "XAXA" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "7"; + %false; + %false; + %false; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %false; + %false; + %false; + %false + ]; + %false; + %false; + %false; + #; + #; + [ + "7" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ]; + [ + "QC transfer task JAVA"; + %false; + %false; + %false; + [ + %false; + %true; + %false; + %false; + %false; + %false; + %false + ]; + %false; + [ + %false; + %true; + %true; + %false; + %false + ]; + %false; + %true; + %true; + #; + #; + [ + "QC transfer task JAVA" + ]; + [ + %false; + %false; + %false; + %false; + %false + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt b/yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt new file mode 100644 index 00000000000..7fe80ff82a7 --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/canondata/test.test_CharacterClasses_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "digits"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "spaces"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "xx000xx"; + %true; + %false + ]; + [ + "lLlLl"; + %false; + %false + ]; + [ + "a1 b2 c3"; + %true; + %true + ]; + [ + "xxx yyy"; + %false; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/pire/test/cases/Basic.in b/yql/essentials/udfs/common/pire/test/cases/Basic.in new file mode 100644 index 00000000000..ddc62722474 --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/cases/Basic.in @@ -0,0 +1,8 @@ +{"key"="1";"subkey"="1";"value"=""}; +{"key"="2";"subkey"="2";"value"="a"}; +{"key"="3";"subkey"="3";"value"="aax"}; +{"key"="4";"subkey"="4";"value"="xaax"}; +{"key"="5";"subkey"="5";"value"="xaaxaaxaa"}; +{"key"="6";"subkey"="6";"value"="XAXA"}; +{"key"="7";"subkey"="7";"value"="7"}; +{"key"="8";"subkey"="8";"value"="QC transfer task JAVA"}; diff --git a/yql/essentials/udfs/common/pire/test/cases/Basic.sql b/yql/essentials/udfs/common/pire/test/cases/Basic.sql new file mode 100644 index 00000000000..b13078f3232 --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/cases/Basic.sql @@ -0,0 +1,32 @@ +/* syntax version 1 */ +$match = Pire::Match("a.*"); +$grep = Pire::Grep("axa"); +$insensitive_grep = Pire::Grep("(?i)axa"); +$multi_match = Pire::MultiMatch(@@a.* +.*a.* +.*a +.*axa.*@@); +$multi_match2 = Pire::MultiMatch(@@YQL.* +QC.* +.*transfer task.*@@); + +$capture = Pire::Capture(".*x(a).*"); +$capture_many = Pire::Capture(".*x(a+).*"); +$replace = Pire::Replace(".*x(a).*"); + +SELECT + value, + $match(value) AS match, + $grep(value) AS grep, + $insensitive_grep(value) AS insensitive_grep, + $multi_match(value) AS multi_match, + $multi_match(value).0 AS some_multi_match, + $multi_match2(value) AS multi_match2, + $multi_match2(value).0 AS some_multi_match2a, + $multi_match2(value).1 AS some_multi_match2b, + $multi_match2(value).2 AS some_multi_match2c, + $capture(value) AS capture, + $capture_many(value) AS capture_many, + $replace(value, "b") AS replace, + $multi_match2(Nothing(String?)) +FROM Input; diff --git a/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in new file mode 100644 index 00000000000..e2737f40a1e --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.in @@ -0,0 +1,4 @@ +{"key"="1";"subkey"="1";"value"="xx000xx"}; +{"key"="2";"subkey"="2";"value"="lLlLl"}; +{"key"="3";"subkey"="3";"value"="a1 b2 c3"}; +{"key"="4";"subkey"="4";"value"="xxx yyy"}; diff --git a/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql new file mode 100644 index 00000000000..be8ab6c294a --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/cases/CharacterClasses.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +$digits = Pire::Grep("\\d+"); +$spaces = Pire::Grep("\\s+"); + +SELECT + value, + $digits(value) AS digits, + $spaces(value) AS spaces +FROM Input; diff --git a/yql/essentials/udfs/common/pire/test/ya.make b/yql/essentials/udfs/common/pire/test/ya.make new file mode 100644 index 00000000000..08952c2a431 --- /dev/null +++ b/yql/essentials/udfs/common/pire/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/pire) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/pire/ya.make b/yql/essentials/udfs/common/pire/ya.make new file mode 100644 index 00000000000..e7a9fabf607 --- /dev/null +++ b/yql/essentials/udfs/common/pire/ya.make @@ -0,0 +1,29 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319902628 OUT_NOAUTO libpire_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(pire_udf) + + YQL_ABI_VERSION( + 2 + 27 + 0 + ) + + SRCS( + pire_udf.cpp + ) + + PEERDIR( + library/cpp/regex/pire + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp new file mode 100644 index 00000000000..4b7df61c28e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp @@ -0,0 +1,143 @@ +#include <yql/essentials/minikql/protobuf_udf/type_builder.h> +#include <yql/essentials/minikql/protobuf_udf/value_builder.h> +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_registrator.h> + +#include <library/cpp/protobuf/yql/descriptor.h> + +using namespace NKikimr::NUdf; +using namespace NProtoBuf; + +namespace { + class TDynamicProtoValue: public TProtobufValue { + public: + TDynamicProtoValue(const TProtoInfo& info, TDynamicInfoRef dyn) + : TProtobufValue(info) + , Dynamic_(dyn) + { + Y_ASSERT(Dynamic_ != nullptr); + } + + TAutoPtr<Message> Parse(const TStringBuf& data) const override { + return Dynamic_->Parse(data); + } + + private: + TDynamicInfoRef Dynamic_; + }; + + class TDynamicProtoSerialize: public TProtobufSerialize { + public: + TDynamicProtoSerialize(const TProtoInfo& info, TDynamicInfoRef dyn) + : TProtobufSerialize(info) + , Dynamic_(dyn) + { + Y_ASSERT(Dynamic_ != nullptr); + } + + TMaybe<TString> Serialize(const Message& proto) const override { + return Dynamic_->Serialize(proto); + } + + TAutoPtr<Message> MakeProto() const override { + return Dynamic_->MakeProto(); + } + private: + TDynamicInfoRef Dynamic_; + }; + + class TDynamicProtoValueSafe: public TDynamicProtoValue { + public: + TDynamicProtoValueSafe(const TProtoInfo& info, TDynamicInfoRef dyn) + : TDynamicProtoValue(info, dyn) {} + + TAutoPtr<Message> Parse(const TStringBuf& data) const override { + try { + return TDynamicProtoValue::Parse(data); + } catch (const std::exception& e) { + return nullptr; + } + } + }; + + class TProtobufModule: public IUdfModule { + public: + TStringRef Name() const { + return TStringRef("Protobuf"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TStringRef::Of("Parse"))->SetTypeAwareness(); + sink.Add(TStringRef::Of("TryParse"))->SetTypeAwareness(); + sink.Add(TStringRef::Of("Serialize"))->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + Y_UNUSED(userType); + + try { + auto dyn = TDynamicInfo::Create(TStringBuf(typeConfig.Data(), typeConfig.Size())); + + TProtoInfo typeInfo; + ProtoTypeBuild(dyn->Descriptor(), + dyn->GetEnumFormat(), + dyn->GetRecursionTraits(), + dyn->GetOptionalLists(), + builder, &typeInfo, + EProtoStringYqlType::Bytes, + dyn->GetSyntaxAware(), + false, + dyn->GetYtMode()); + + auto stringType = builder.SimpleType<char*>(); + auto structType = typeInfo.StructType; + auto optionalStructType = builder.Optional()->Item(structType).Build(); + + if (TStringRef::Of("Serialize") == name) { + // function signature: + // String Serialize(Protobuf value) + builder.Returns(stringType) + .Args() + ->Add(structType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Done(); + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoSerialize(typeInfo, dyn)); + } + } else { + // function signature: + // Protobuf Parse(String value) + builder.Returns((TStringRef::Of("TryParse") == name) ? optionalStructType : structType) + .Args() + ->Add(stringType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Done(); + + if (TStringRef::Of("Parse") == name) { + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoValue(typeInfo, dyn)); + } + } else if (TStringRef::Of("TryParse") == name) { + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoValueSafe(typeInfo, dyn)); + } + } + } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } + }; + +} + +REGISTER_MODULES(TProtobufModule); diff --git a/yql/essentials/udfs/common/protobuf/test/canondata/result.json b/yql/essentials/udfs/common/protobuf/test/canondata/result.json new file mode 100644 index 00000000000..ac534cee58f --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/canondata/result.json @@ -0,0 +1,126 @@ +{ + "test.test[CRYPTR-627]": [ + { + "checksum": "fce4dcff82967863048135d17d52f31f", + "size": 90043, + "uri": "https://storage.yandex-team.ru/get-devtools/212715/870509fa5b3fad596ac1effbd15336199d7c4166/resource.tar.gz#test.test_CRYPTR-627_/results.txt" + } + ], + "test.test[YQL-16111]": [ + { + "checksum": "35d6f2291f27eb3ec28a96739412f276", + "size": 5675, + "uri": "https://storage.yandex-team.ru/get-devtools/1781765/c2a453956382e0fdfc958f3c4e32ed7740f03d4b/resource.tar.gz#test.test_YQL-16111_/results.txt" + } + ], + "test.test[YQL-3381]": [ + { + "checksum": "c098af301d5dc8d85071a47455f0f592", + "size": 97359, + "uri": "https://storage.yandex-team.ru/get-devtools/212715/6d6e638efd56a0c6037196ae663d58f279ee467a/resource.tar.gz#test.test_YQL-3381_/results.txt" + } + ], + "test.test[YQL-6706]": [ + { + "checksum": "17fbe1395ae7573288532aa42c5525c4", + "size": 87479, + "uri": "https://storage.yandex-team.ru/get-devtools/212715/b614019f8638cf007ad9da4a361791da4a66b156/resource.tar.gz#test.test_YQL-6706_/results.txt" + } + ], + "test.test[YQL-8307]": [ + { + "checksum": "e3045a4ed9fe70bc12c8fd0de7bff29c", + "size": 3840, + "uri": "https://storage.yandex-team.ru/get-devtools/1923547/edc65c62e064cae64c748b355e12a480cac0c768/resource.tar.gz#test.test_YQL-8307_/results.txt" + } + ], + "test.test[recursion_bytes]": [ + { + "checksum": "eeb1ca24bdc09529bd0a320965b3123d", + "size": 1241, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_recursion_bytes_/results.txt" + } + ], + "test.test[recursion_fail]": [ + { + "uri": "file://test.test_recursion_fail_/extracted" + } + ], + "test.test[recursion_ignore]": [ + { + "checksum": "eeb1ca24bdc09529bd0a320965b3123d", + "size": 1241, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_recursion_ignore_/results.txt" + } + ], + "test.test[syntax_aware_empty_nested_message]": [ + { + "checksum": "2f7d8896e8d637d9698c94f99b5e586f", + "size": 4344, + "uri": "https://storage.yandex-team.ru/get-devtools/1899731/4317be1fdb14f46c8bb94f5975cce5af8f89f78e/resource.tar.gz#test.test_syntax_aware_empty_nested_message_/results.txt" + } + ], + "test.test[syntax_aware_nested_enum_string_value]": [ + { + "checksum": "630a0e1ef04aada546e39ad71771c4d9", + "size": 4152, + "uri": "https://storage.yandex-team.ru/get-devtools/1899731/4317be1fdb14f46c8bb94f5975cce5af8f89f78e/resource.tar.gz#test.test_syntax_aware_nested_enum_string_value_/results.txt" + } + ], + "test.test[yt_mode_any]": [ + { + "checksum": "c640b3b07686099261511c45d21388f7", + "size": 3808, + "uri": "https://storage.yandex-team.ru/get-devtools/1946324/c1d8ae91dcc463381c8fa29e568a09926e9a2225/resource.tar.gz#test.test_yt_mode_any_/results.txt" + } + ], + "test.test[yt_mode_enum]": [ + { + "checksum": "2c32becbf155f2dce66d7c00db46a2a4", + "size": 2965, + "uri": "https://storage.yandex-team.ru/get-devtools/1781765/c2a453956382e0fdfc958f3c4e32ed7740f03d4b/resource.tar.gz#test.test_yt_mode_enum_/results.txt" + } + ], + "test.test[yt_mode_map]": [ + { + "checksum": "67b8ad859a21d7f0c8203daa08194cf3", + "size": 21224, + "uri": "https://storage.yandex-team.ru/get-devtools/1936947/d770c9a45c5df4db61873fc48e1cb5961f774af6/resource.tar.gz#test.test_yt_mode_map_/results.txt" + } + ], + "test.test[yt_mode_no_ser]": [ + { + "checksum": "eeb1ca24bdc09529bd0a320965b3123d", + "size": 1241, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_no_ser_/results.txt" + } + ], + "test.test[yt_mode_plain]": [ + { + "checksum": "7321635f17c1f8fdb75d96bb2a3ce4d1", + "size": 1796, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_plain_/results.txt" + } + ], + "test.test[yt_mode_ser_pb]": [ + { + "checksum": "eeb1ca24bdc09529bd0a320965b3123d", + "size": 1241, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_ser_pb_/results.txt" + } + ], + "test.test[yt_mode_ser_yt]": [ + { + "checksum": "eeb1ca24bdc09529bd0a320965b3123d", + "size": 1241, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_ser_yt_/results.txt" + } + ], + "test.test[yt_mode_variant]": [ + { + "checksum": "de1dd8aeb5c695707a114866a6124ed8", + "size": 12323, + "uri": "https://storage.yandex-team.ru/get-devtools/1937492/9579ab43fa70f08e465aa7673b27a5da5460a75e/resource.tar.gz#test.test_yt_mode_variant_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted b/yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted new file mode 100644 index 00000000000..0b282bc72d6 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/canondata/test.test_recursion_fail_/extracted @@ -0,0 +1,11 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:64:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem + SELECT $udf(TestField) FROM plato.Input; + ^ + <tmp_path>/program.sql:<main>:64:8: Error: At function: NamedApply, At function: Udf, At Protobuf.Parse + SELECT $udf(TestField) FROM plato.Input; + ^ + <tmp_path>/program.sql:<main>:64:8: Error: Failed to find UDF function: Protobuf.Parse, reason: Error: Module: Protobuf, function: Parse, error: (yexception) yql/essentials/minikql/protobuf_udf/type_builder.cpp:xxx: can't handle recursive types: Test + SELECT $udf(TestField) FROM plato.Input; + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in new file mode 100644 index 00000000000..4aef303a9ca --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in @@ -0,0 +1,3 @@ +{"ProfileDump"="\n\x0F\x08\x8B\3\x15\xC6\x16[Z\x1A\5\x98\xA7\xA5\xD4\nj-\x08\xDC\xC6\xF5\x92\xDE\xAC\xD2\xFF\xD5\1\x10\x98\xA7\xA5\xD4\n\x1D\xC6\x16[Z%\xC6\x16[Z-\xC6\x16[Z1\0\0\0\0\0\0\xF0?@\2H\2";}; +{"ProfileDump"="\n\x0F\x08\xEB\1\x15\xED\xE8^Z\x1A\5\x98BAD_ROW_PROTO";}; +{"ProfileDump"="\n\x0F\x08\x8B\3\x15\3222\x94Z\x1A\5\xB6\xF2\xDB\xA5\2\n\x11\x08\x94\3\x15\3222\x94ZB\7Swift 2\n\x12\x08\x93\3\x15\3222\x94ZB\x08Wileyfox\n\x0B\x08\x94\4\x15\3222\x94Z\x1A\1\2j,\x08\x84\xCD\xAF\xF9\x9B\xC0\xF8\xFEv\x10\xB6\xF2\xDB\xA5\2\x1D\3222\x94Z%\3222\x94Z-\3222\x94Z1\0\0\0\0\0\0\x10@@\2H\2";};
\ No newline at end of file diff --git a/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr new file mode 100644 index 00000000000..6f11c914086 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.in.attr @@ -0,0 +1 @@ +{schema=[{name=ProfileDump;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql new file mode 100644 index 00000000000..d4f8a432d44 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/CRYPTR-627.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +/* + Should check is TryParse work correct for bad protobuf input data +*/ + +$config = @@{"name":"yabs.proto.Profile","meta":"eNqNVt1y21QQRrJs2Ws7ceS0UVwKoQyDgWla0h+GXHTGScw0M00T0hQuNYp0bGuiP86RkvoxeAMegeE5uOcF+ga8AHtWUiynypQr+6y+/Tm73+4eeG/C/bl9Lh7FPEqiR6lg3MK/E89n2yQyQH7O/j/4ywT9JPtqbEPdS1ggTGWrNmzvfL69wG3nmOL3EHHGC1hz7IRNIz4vPAhTJd0vq3T3c/DCXyu2hYgjngizRnr3qvRepufk7yE0nSgNE8aFqd0O388wxmNoRJOJBNcJvFUFPpaIIqBvQJul58JsfDyWb0H/LWXcwxvrhN6sQv+MkLnxHejndhjKSJqEHVRh9whifAWaG8TCbBFyowp5EMTGDrT9wJowO0k5xgCEvl+FfhX8lKGMH2GFs2nq29xyooi7wmyT2hdVamenGXRfIo0D6Nup6yUR99LAEmwasBCL1tlSUP/rSv3RNf5NDjeeQ8eOY99D1nhRKMzu7XU5Gy2AA8zJie1xowv1icdFggRVh5qxAg3BnCh0kXR4HjwF7Yx70Udg8nMy87iLlJNae9D5lXnTWcLct4dhUqF9RZ9z7T6009hFJluJFzC0oQz1wenCxv8ItGSzVmVTI5t/KLB6s2Puwsp5YF03neeSg67Rg6YnOc9EFmbX+BTWC4m1HK861GV8YhZdyTaSYIzHwWRfyE6RZwOAXWLFMpUGqRigBbYXItuVYXNXS3jKBu8V6Cw1ECrmHVrEphnr0KE2RIkVuM8WabQdWVzr3KPuV27Jg7EBbcF85mA085hhhMqwu6vu/HAdUWMR0YM3sDIis3todRymAUbUOBifjQ5f9T4xdKiNDg56itGB5snb0/2Xozfjniohp+Oj41/GvRoaXTkd7x8fHY1fH4zODo9f97TBPyq0y3MPFS7Y/Ar7YlGBG6GrlDMMPcUqWJe2n7JsxGl7Kvp/CO0YeVJ8yIaZWTlvJZ0QniCxC3j9djjx/wWsX+VstMr+G7e321IHlPXLYeof16dwseIC4w2nmSIOPWXYuS5Xq0Sg3xXQi4mKJMThm2VUydqUuIS5VD5MMPWdFDqc3SDMcnmIL8Yd6ErjkkEZ4Rrko4rUI9CLDfIhnbvGKtTQOq25rJZrUM+uKcurSNHgCFqLqVtlpA96PrpLhjZu3rA21Mnc3wo08u2wBq1slSz6q5J5/eWmqVESMKOMLq/RUQ4T2ZOpn3g4axmnXKnYHBq3wwtKkSo9xjNuCyY96pQ1nDZy99H8aS7lsVRbvE7TE5YIbJ7gepLy+sT2BRv8W4N6thjLhpTcEGSShL3LKt+S8cyKGdEtjy7lxujKz2IeOsyl8GmUzaKUCyuwxUUe/3UemsXUKeeqRXr3oO9EvpRG+Hyyk1lGGyANzFyCKRK+zDrJ2yRHlsWcuZ4jO8dJOO3H7g1x7ODek2ITemnovVsazytVtF4lIaYBB//3Zo+Us9OOuVY6PTGN0ump2S+dnpnrpdNz806RrSC0bNc17+J5jYptczn5sSIbdKdNWEtsPmWJ5UayxjTDTfq0AatTJL7l+KnI6b1ZVFFg0h1KjjmQssEZ1OS7BT3i86YoeMXclFddh+b1E0O2QZf6oyCZVmrWGLrLDxWkFFbFS1KXUX/I/mz5UTjNRCqJKmcJ4vB1JDcSBqfl5Mk81ksej6Ff9bTpgebbtPCLdkYJVv2y1ODr0JL+RGIHceZ0V3k8+BOXaPm1I6PDHMu5PbPFbNEYDnee7GQytWpd1vLcdbwQXfh+eSYi12R4lty5lzm+Tl8+g7u5MIhCpPmEM+zA0JlTAylVI3JPPVH+AxssP0w=", "lists": {"optional": false}}@@; + +$udf = Udf(Protobuf::TryParse, $config as TypeConfig); + +SELECT $udf(ProfileDump) AS Profile +FROM Input; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.in diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql new file mode 100644 index 00000000000..6c35fc403c2 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-16111.sql @@ -0,0 +1,64 @@ +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_YT; + + message InnerSubProto { + option (NYT.default_field_flags) = SERIALIZATION_YT; + int64 x = 1; + string y = 2; + + message TSubField { + repeated string List = 1; + } + + TSubField SubField = 3 [(NYT.flags) = SERIALIZATION_PROTOBUF]; + } + + map<string, InnerSubProto> dict = 10 [(NYT.flags) = MAP_AS_DICT]; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWs1vG8cVLz9FPkrUaiXbjBzHCZNYshNTgfPl0m0SilzJdPjVJZlEBoLFihxSa5O7zO7SNo2iKNBTj0VvbVEUvaToH1Cg6KVBgR4LFGiDHhqgaAu0f0KPfTOzu9wlKYsJkCQHhfvm9z7mzW9m3swY/nALnu0bRn9A9kamYRvH495el1gdUxvZhpljMnGdI3IuIluFjQNtQEoesEls8SZEeyjMhJ6N7KZuvJCbUcoFNRpULDON7L+jsLmgVRQhqqtDajG0m5TZbzEDKyO1c1/tk0yYid1P8RmALhkRvUv0ziQTwSiSsk8ivgQbo/HxQOsoPhggLCYLvKE0Be/A+kOi3vdDUwyapmIfsAirQ2JZGIBiT0YkE2W9f3au97M9TzlaLVQSC5Ak+njILcROyZ+EiFkrCarmmFixiPlA65BMnBnYmTPQ5O2zNlw97EqSPLKJbmmGnllhRl5cMIpk0J01MdUT34AVY2TjLyuTwPFJ3Xh6IRHqHCO7YLEMgmWMzQ5ROkaXKJreMzJJZuDyfEcYsIi4MsLktBX4Fs9D3Jrotvoos8oY4nxlfxuH9WUodgtiPdpLJNjnyAHXCSYx/gWTWICUTiybdDkjIktyCrjSPKWiX4hSH8C6F5Jiqnrf5ebeWZHkJFdPpmpymgS+xRKAoROjh9OrM0CeLM5SnULmsmRwaWcgfnNKtZVTmFLlk2yObW1Im4TyHlPMe5ZkQeTO7JnsqPGOrZn+T/F58AQKoxWwVWjVFdZQtv0Y0sH0iFsQs2zVtBkLYzL/EAWI4CLDVrmYTH+K70w7HGEdvjI/ogHLs/3efhPWAh1Y1nX2u3BuoWkkydZY13SbmCOTUMZyV5n/rJzCubYfza3Im+N54bVk4r8rwvfxv3D2kzhsLZozC6cvTn9k8DExWZJisvOFMyI2UI/JAGdDaDd946WlZmWuQlVkrim+BVFniaYWri1ngc4lmemJFyFJ/8+5EWcxJ6iA8kLchgSbJl3ibm3eNyVWl/TU8cBWHqiDMWGER2I5wveoTLwMKT6rNNR5xFbPmMwnWplKqPt7Fs5lh5rMBRUw92/OLtyXFndvbi7hVskQrzpDrw4yG2ggIae5uO5Is78JQ5QtLOuQah01JKVUb+9XJCEkpgGY4KBSL7SEsPddrrXeeE2IeAptLoj6Aa/eEGJI2FVuoPyBVEJEPChBzIq4Bkkm2a/XK0LCs9lsyeXaoZD0bB7K9XZDAM9CVWo2C4eSkPIQ+0ctqSmsBsJCF2ueC6nWrgppcQPWuAs3iPUZEUYqTAPhVjYCAkSI2SLEGA2R7ulKYV+qKPVGq1yvFSqYO08mS99pl2WphPnzyRpSoYWySLYDW4sW1IVTyMeF8ClcYLZmuZD9Vxg2F2wqC528DTHOZb7NXl24OzFmz221TM9fakROKTWoiTnCfji3+PP98Y1l9kcm+3ybQGzBJnALNuYMLb0Y/yAEmdOSc8aSGA4sibdmM/jc6YMwN9Yfh+D84pJyYQxvQXxI7BPDLauuLNisafPsYDta/t0+clpdyKOZi/SHYTi30PjCQC8BaPpobPPSia/ESSZhixddZce21x5h7cBFDHBzGmiUBfrMKT2dI+YrIHQGGtFtxbJNog41vc+2mkQ+1lMHFpHXeXPTbaUajECmTyMe0ODNnkb2R0lI+Qpw8TlYvac+UBX3UMUzkaKyhnOwegW2GAT7iI46A9WyWNISDCrStjptKrot4uuwyTSGuDdpowFR6DHPYluOF9kGRVQdAI3IwrLwElPrE52Yqk0U8tEYsYqqd5UT1TrJbFED++FMSH6KAg8dnMRgBb17G0FiHs4zK5gR7LDSOSGd+8rY7t3MXPT7ZxE2GaZIIW1EiE1YpYMx1B5jzIbJ9tD0gqXJl8Fc3VGo4vkjH2s2JKkkp1wrB4ZJCdU3vASnOKH6hpteTFanw/uMZ1PnMGZlhECyOp1DDnA4buF8ODdNll9xY66Xs6rocTSZVxQDHkeTWbU3YWt0MprXu+bXExEyq/giO5mbpINj1c1c8MN9DWIO6d9RiK4eI2NUE39YmcsMHLXNMZ4iOh2JNRZYm3gNNozjex3OSAXN9LRHmRdYetdpA+Njg4nFq2jbOlHNEVuSLRwMknmRQ7m85orpjLAeaj3btbjDZwSTOdZ2QaCZCDjeZbA0yv1+cTOgyKnTq7xwQ+HU42twnoJwoVO7qq360C8zNE171WkMxGmOjycesa7zOKnMpdaXVpxn87Dq572YBM58LEiwCCrWS7R8uSthLYJlVKXckhS5XWuVq5IQ8RX2d6KJK8IOrRrSwZOa+C244F6rWMRWHmomm5BDlW+OHn+2HFST2O8j5oBBxApc1g1cAHDhUM2uMr3QUtQOEtIy+EboWXlaN5oOeLpDFBzoDH0jp9EXq+uhOkL+2uaE1ecJOYECiX5/JcckzGZUiOHfmBDHv3FhBf8mhCT+TQqQ/WcEVv0VPD0QddgeFmKr3PNPrPdzRbq55eO8XJa5Ji0sKP0IL08SsvMlHkL8nsVsx5ntF55s+06TGU/eaSq1ulwtVGRHXXwKogP18SS4DTLRssOCFuiVXXDzYaIvcXrsQYzlSwRwMiZ8Q0xAtFiX6RTBOcGlSqMsFXGWZF+HOE8CnT5eGlCJfzo2Qm5ru7ovyUJ4bvCzFs5LX2X+1RzPfx+ClK/SpiWSOhgYDxV1oKmWQw1gogKVLDt0X9GkwemS/XkIhNlSdybM0NcZZvZnIUgH69uZ8J77WsP7RxjWAlXtstF9BBtalwxHhk2v05UBeUAGmSxbNPaeXDfnylO9ClXLb5ZLUrVRb0m14pHSrr1bq79fkwVtBvYlTvsGCLNBiRdgUVg4szdhvVbHXRK3SungQCq2mvwmxEO3AhM8+5MIbC6IBJdxfobhx6rry0Sfo1VEAw+XzpEHqyPMkm5rPQ0rfH4q5web9amcXzK9DOLIsDRbe0Av6d3rKHrQicqC21LWbQ+tk746g6aLeUQW3BYPjRVN1xjT6o/j6N4RklNc5kGcun56D7aKxRmTccgOrKv9vkmNu4b4SSXtiRlw+w4k3DzQzZtmAospdvwO06sx3W1Ep5qlTK/1w9iekFOa5V2JZj/GEib4LIGnmcTAQJJTDf4mtnvGS0au4uBlT3P7TyFIuGLcbqMj1T5h5mL7YSEks28qx5pQZxRw5PSbjuuAqF12DDKGQxxJyx1XR150xPR1zDZVbRDARhlWcBs8cB6ecu12sS7FI1Z3qhRn1x0XHEDJaXd1s38OwYZ7cOt6yaoCqLpu2P50zVN5Ti9X8JRkn4HtIcC05dS04T7lvDmxh0t+1Acuoic8eiFzTPqa7twk8w/3QibqXcjsfw+PcMZwNtx9Yea6wbodunvdAfWNgar3c4bZnz680orH8j2/jo7/Fwr9Mhw5bOz/Orx9yBUbbjJk0huQDu0gfLIGVyb23oSeWbAVf+zxYtna81jrvOVGaket7TOffLN/D8OG9L6pjkbEZKXawUDtZ/8YhijdJcUViBRqR7iIbcBavXVbkpVivdKu1ugStgoJev9J72ax/F/H0oB+ORVMBHMqNCW5XKiU7xboJaZy1BKi4jacD0obcr1V328fCDHqw73bVCrlZkuIM7fOFSgXreAkvVQtNJRCkwmU+gF12cZVValIh4XikZCgThZDhCSN02krlYstAcQMbDkCzxNrSbEOYv1VKuFxZ1V8GjK8g++WG+7qrrxXqLSlprAmnsM00tbiban4ritOZ+/Dpptd5+DD8tty0vsS7JSkhiwV6d2t0qzLLeWgLFVKTRpO2UkOiioSDsFluOhH7B/xX26RGMp+ezqUrDZkrnYdV7gLNaVGQUZPjgXsUwpW3ivgaOAIruVtyNDpobgPEPyNoYdGLPGJj7uZv/yOvlymb1zMIelyczHwq9RzPfbyz2x7TdacV/a6upTXvy706pF43qvXZOUfw3bAq3sGXcbvp47fSwG/vuHlni/4PPsarbwMMe7myQ8vp2R1pn/cVP4dSHWMwXjI33zOskwzx5ZArkM3x/whrN8nE+VzWPnUsbKGesWpIR02FzHorDfb5Ui00Z0jkM+fnztn+luKPq4/H3UIrAXZcqanJQnj/vsS7uZD95HvNK74j3/LJY8/EXLzVRAfqKam6m7WThnxgBeXN4KjyrJCB33/6t0dNTdR6evjdZuow5w53lu8UcHfwpCY2NzD9pK7Wfan9EmRWDZWOtGu1rHZg3vqxnqOCnMllLDrl/3oLz67CDKDbP84BGtlHQuJ5viYvwKsQugRO95F5NAj+jVxaoHQRHwbEohjPXLeHS5x4wETuZYLYq5isqe0fRmSXiN9b6hols0qkaTMfuejv/rsYnS7AkkvXFph4ORx7uLpT+yf91hGY9hcEIPzLJYP3wxxm8dx/gD7f1qjBpI=", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +$udfParse = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig); + +$data = @@ +{ + "dict": [ + { + "key": "key2", + "value": { + "x": 23, + "y": "yy", + "SubField": {"List": ["s1"]} + } + } + ] +} +@@; + +SELECT + $data, + $udfParse($data), + $udfSerialize($udfParse($data)), + Ensure("Success", StablePickle($udfParse($data)) == StablePickle($udfParse($udfSerialize($udfParse($data)))), "Fail") +; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in new file mode 100644 index 00000000000..f70c4566f9c --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in @@ -0,0 +1,3 @@ +{"ProfileDump"="\n\x0F\x08\x8B\3\x15\xC6\x16[Z\x1A\5\x98\xA7\xA5\xD4\nj-\x08\xDC\xC6\xF5\x92\xDE\xAC\xD2\xFF\xD5\1\x10\x98\xA7\xA5\xD4\n\x1D\xC6\x16[Z%\xC6\x16[Z-\xC6\x16[Z1\0\0\0\0\0\0\xF0?@\2H\2";}; +{"ProfileDump"="\n\x0F\x08\xEB\1\x15\xED\xE8^Z\x1A\5\x99\xE5\xEB\x82\x0F\nH\x08\xDD\2\x15\xE0\xC3\xAAZ\"\7\x08\x99\1\x10\xE0\xC5\x08\"\7\x08\xA2P\x10\xE0\xC5\x08\"\7\x08\x9B\1\x10\xE0\xC5\x08\"\7\x08\x9A\1\x10\xE0\xC5\x08\"\7\x08\x9D\1\x10\xE0\xC5\x08\"\7\x08\x9E\1\x10\xE0\xC5\x08\"\x08\x08\xEE\x9C\7\x10\xE0\xC5\x08\n\x0F\x08\xEB\1\x15\x0BN_Z\x1A\5\xB7\xD8\xE6\xFF\x0E\n\x0B\x08\xA6\2\025fG_Z\x1A\1\7\n\x0F\x08\xEB\1\x15= _Z\x1A\5\xE8\xFD\xBE\xFF\x0E\n\x0B\x08\xA5\3\x15\xE0\xC3\xAAZ\x1A\1F\n\x0F\x08\xEB\1\025b#\xA7Z\x1A\5\xEA\xA5\x8F\xBB\7J\x0C\x08\4\x15\x8E\x83\xA8Z\x1A\3\xD9\xA1\x17";}; +{"ProfileDump"="\n\x0F\x08\x8B\3\x15\3222\x94Z\x1A\5\xB6\xF2\xDB\xA5\2\n\x11\x08\x94\3\x15\3222\x94ZB\7Swift 2\n\x12\x08\x93\3\x15\3222\x94ZB\x08Wileyfox\n\x0B\x08\x94\4\x15\3222\x94Z\x1A\1\2j,\x08\x84\xCD\xAF\xF9\x9B\xC0\xF8\xFEv\x10\xB6\xF2\xDB\xA5\2\x1D\3222\x94Z%\3222\x94Z-\3222\x94Z1\0\0\0\0\0\0\x10@@\2H\2";};
\ No newline at end of file diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr new file mode 100644 index 00000000000..6f11c914086 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.in.attr @@ -0,0 +1 @@ +{schema=[{name=ProfileDump;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql new file mode 100644 index 00000000000..d8ee0b931f2 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-3381.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +$config = @@{"name":"yabs.proto.Profile","meta":"eNqNVt1y21QQRrJs2Ws7ceS0UVwKoQyDgWla0h+GXHTGScw0M00T0hQuNYp0bGuiP86RkvoxeAMegeE5uOcF+ga8AHtWUiynypQr+6y+/Tm73+4eeG/C/bl9Lh7FPEqiR6lg3MK/E89n2yQyQH7O/j/4ywT9JPtqbEPdS1ggTGWrNmzvfL69wG3nmOL3EHHGC1hz7IRNIz4vPAhTJd0vq3T3c/DCXyu2hYgjngizRnr3qvRepufk7yE0nSgNE8aFqd0O388wxmNoRJOJBNcJvFUFPpaIIqBvQJul58JsfDyWb0H/LWXcwxvrhN6sQv+MkLnxHejndhjKSJqEHVRh9whifAWaG8TCbBFyowp5EMTGDrT9wJowO0k5xgCEvl+FfhX8lKGMH2GFs2nq29xyooi7wmyT2hdVamenGXRfIo0D6Nup6yUR99LAEmwasBCL1tlSUP/rSv3RNf5NDjeeQ8eOY99D1nhRKMzu7XU5Gy2AA8zJie1xowv1icdFggRVh5qxAg3BnCh0kXR4HjwF7Yx70Udg8nMy87iLlJNae9D5lXnTWcLct4dhUqF9RZ9z7T6009hFJluJFzC0oQz1wenCxv8ItGSzVmVTI5t/KLB6s2Puwsp5YF03neeSg67Rg6YnOc9EFmbX+BTWC4m1HK861GV8YhZdyTaSYIzHwWRfyE6RZwOAXWLFMpUGqRigBbYXItuVYXNXS3jKBu8V6Cw1ECrmHVrEphnr0KE2RIkVuM8WabQdWVzr3KPuV27Jg7EBbcF85mA085hhhMqwu6vu/HAdUWMR0YM3sDIis3todRymAUbUOBifjQ5f9T4xdKiNDg56itGB5snb0/2Xozfjniohp+Oj41/GvRoaXTkd7x8fHY1fH4zODo9f97TBPyq0y3MPFS7Y/Ar7YlGBG6GrlDMMPcUqWJe2n7JsxGl7Kvp/CO0YeVJ8yIaZWTlvJZ0QniCxC3j9djjx/wWsX+VstMr+G7e321IHlPXLYeof16dwseIC4w2nmSIOPWXYuS5Xq0Sg3xXQi4mKJMThm2VUydqUuIS5VD5MMPWdFDqc3SDMcnmIL8Yd6ErjkkEZ4Rrko4rUI9CLDfIhnbvGKtTQOq25rJZrUM+uKcurSNHgCFqLqVtlpA96PrpLhjZu3rA21Mnc3wo08u2wBq1slSz6q5J5/eWmqVESMKOMLq/RUQ4T2ZOpn3g4axmnXKnYHBq3wwtKkSo9xjNuCyY96pQ1nDZy99H8aS7lsVRbvE7TE5YIbJ7gepLy+sT2BRv8W4N6thjLhpTcEGSShL3LKt+S8cyKGdEtjy7lxujKz2IeOsyl8GmUzaKUCyuwxUUe/3UemsXUKeeqRXr3oO9EvpRG+Hyyk1lGGyANzFyCKRK+zDrJ2yRHlsWcuZ4jO8dJOO3H7g1x7ODek2ITemnovVsazytVtF4lIaYBB//3Zo+Us9OOuVY6PTGN0ump2S+dnpnrpdNz806RrSC0bNc17+J5jYptczn5sSIbdKdNWEtsPmWJ5UayxjTDTfq0AatTJL7l+KnI6b1ZVFFg0h1KjjmQssEZ1OS7BT3i86YoeMXclFddh+b1E0O2QZf6oyCZVmrWGLrLDxWkFFbFS1KXUX/I/mz5UTjNRCqJKmcJ4vB1JDcSBqfl5Mk81ksej6Ff9bTpgebbtPCLdkYJVv2y1ODr0JL+RGIHceZ0V3k8+BOXaPm1I6PDHMu5PbPFbNEYDnee7GQytWpd1vLcdbwQXfh+eSYi12R4lty5lzm+Tl8+g7u5MIhCpPmEM+zA0JlTAylVI3JPPVH+AxssP0w=", "lists": {"optional": false}}@@; + +$udf = Udf(Protobuf::Parse, $config as TypeConfig); + +SELECT $udf(ProfileDump) AS Profile +FROM Input; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in new file mode 100644 index 00000000000..17ca40800d5 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in @@ -0,0 +1,2 @@ +{"doc"="\n\x0Fhttp://agbz.ru/\x10\2\x18\x84\xFB\x89\xC6\5\"\4fake";}; +{"doc"="\nSO REALLY BAD BAD BAD PROTO ROW\n\xBF\2";}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr new file mode 100644 index 00000000000..8838f04a448 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.in.attr @@ -0,0 +1 @@ +{schema=[{name=doc;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql new file mode 100644 index 00000000000..1989d5b03f8 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-6706.sql @@ -0,0 +1,17 @@ +/* syntax version 1 */ +/* + Should check is TryParse work correct for proto with required fields + Can't parse message of type "NNewsStorageProtocol.TRawNewsDoc" because it is missing required fields: Url, Status, FetchTime, FetchHost +*/ + +$config = @@{ + "skip": 0, + "meta": "eNqtWFlzG0UQRrc0vpRxjiUER9kcOCbIwVBFkgoB31ZhOWYtAlRRRa20Y3nJale1Rxy9kkfyL/gjVPHIC3+BKn4I3T07q5Us2Q/hTX1P93zT3Sv2Ns+WBqeiveqK02C173uht+pFoWO7L4M6kVw7OABZwz32jkLPN7viENkdz9EdVm49j8J9UOZVlvvOd7RMLbtcMfAnv8qKR17kd4SWJWZM8eusfCTcULggydUyy3NGQktZtwdkoBVqObBKaP2fLKuocAFqbppOp2X3BAUFL4rmNTZzFHU6IghInKUgaRZq7Ji2I6yWb4sgPkaaxR+xAgXS8nCMmTW9Pq0KdXUmQxqg7y0RdHy7H9qeC2lkII00i3/OrqTITc89ti0qRxF0s8ZkIV9iLKVaItUUh99j8zuR46R0yqQzxsXK7fheb0cIS6uARtlIaPbHLXZ3HA6BzJdYP1vtGBWXqR5jtbh+AZb0b9lCqyks22ydRL22C/WeDJw9YXdPwvjmYopfZoXvbSs8ia9LEvoBm5UuDx1zIPx39vdvhs1Jh1AzQGU4wSOUsAkwag36El0AU0WnouUmR8unonGNlTbs0DdDQUDJGorkN1hlxzd7gmQSGEMGnmArgl+IsJJ8RIrmt1mlEWyJYzNyQsJA+Unh2HQCYQz5+l8ZNiPT3PW9qM8fs0IjFL0A0kS8365Put/6SGEMacGfsKKsPdUieSuTbaWmEVvwbcYSKOA7xNh3z7NPtI2Uof4my4pS/i6JPGZFKkYAiaDtrfNsSdOIDf6nPPhTVpKFGes759ZSmeh/ZqBDbr8G/CBI11ieAIq4nV9bmuxoGzRRyyBdBfXsEOpj3Sx3tps9Svo8Int+rTY9ktRLJgE8iUYPdOIeKQn97wzUct01nUFod7AkKg10vjylHol+fXvb7dquSmieZRtW/EbhF77OQxNeURBnElP6T6wUG3LGij+ariVeV9/D37ue13VENQO1md23X4kGQMZ3RVjNorSJNxhVc3yGlQyz13aEX81DVEYXFHS8vqgW9N8pJetAhKee/5J/OZLS/WkpKf36dvxjck76DisrjamHr7DCurXjvYZTw0nXLQMy8eHYoLPp26Hwqnn9bQbm+Xqv3xShyZ+xSlLT+CnVLqq9MTThXzOWJKDeU+2iTI2Ujf5mATqUYZ6iyZbXmdCGnwLyQjOMAgLs/NqdKciLPUhdI7ahBivCzgntCDlaIYaMRLrnBSEAG2MOGTRrbT8Ihw7epyY8xuX7jEms031fp/ue0lW2h4pPKvIOjSAwUvb8AbuEU/rQF6+GgesU+KwAR4QhcHjDnL8i57yipQwQEIJMVzJJ40g6jNpbaiTNGYrEXvDct7tKqsmtKcXCHWXDtJTCEnlOcTDuehe2kAEAuCjHlqIxLibRsAKYaDmMG5PYJ1p26MiFBvoEEfwOm8PQNkBOSm+RdJQ53r8qZ/vXQ7aoTNKay6Q5ScQ5vF/xOtQYqdBvvsKqShX3LZJfJfkZPmYKWy3uktoHpKJIrM++6XYjbIrX5GKhaLoXc3BqOo62RoVVJDU1C3u+NhM3NaKQD88Z+Q8kX1KI7ecurGUCRR+SaMiA7lRRMyTQZunh3pzycJWeMbSAsVOg3qfdoGXgxnkDzJCqtNoCPLoebeNztPinOFRwsxto8ySh35RcFJ54vrYQJ0cUgmVXuL7QqhIsRODTaZr9vrBirwOE2SWC2VkB19msZO4KDxU5KY7wcImXdBM+KiA12VXg+IukPFnI64yPCPBdB9plMpkgoUaDL1r4L2DCIy5vxo1mhIsnxnuAavqiAx9PNarUCA99IfyO7J7tmND0B9od+YEwyuXLsKTj20kp3iXFcfbYh0xKf+XMh0zKakWeo2kHPROTCLR7mP9GtpoxxiRwbzJoSvmjRHlcBKtIOmTK5n5iM1kBc6aKyova9CyhfUyFHmfzr1hZfYRqnxDEb1/8bRgYiRFAoIp92Lc70G0bQRDB+VaT852RQVKleChrDyne0rQxKrUMpY6gaEV+24vXXO1TehEjPHx64OEXAAgC/TP59IYc/RmrtuL5ue915AcGTOJvxIAm8ayBP7E3KXziKM4bitR/zcJtxw622nSsDWhysSvyMbN2b0pGY4GNxI6GhRMFJ8KigNAMYxKb25HddeG+/GSsJ4zUypB/15WhML4ypAZnkWSKXPkBNrR40cbd0BAOcC3gwJY2y8q09yKVQeoFfKd7SGWRWo8sm6gcdMH5lm+6gUMlQF5+5QvGhos1n4PW7XYcL4BUpWscofikpOu9sEcTqJpdabD50STR+MDzezDqvA4YL7KFpulGMF8GW8IRcF7wAcyY2BjIwQ2ufoNPydT2go6S/QUcwb6JcfeiNjgAGRTFsTsoo130CLbPntmH/GDfo7uIGXncuKkYilPgZZZfD71etcivscVdDzYWF/8cGoYr/QdunI1N", + "name": "TRawNewsDoc", + "format": "protobin" +}@@; + +$udf = Udf(Protobuf::TryParse, $config as TypeConfig); + +SELECT $udf(doc) AS parsed +FROM Input; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in new file mode 100644 index 00000000000..6c7f0be49c9 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in @@ -0,0 +1,4 @@ +{"TestField"=""}; +{"TestField"="a: 1 b: 2 c: \"hello\""}; +{"TestField"="a: 1 c: \"hello\""}; +{"TestField"="d: [1, 2, 3, 4]"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql new file mode 100644 index 00000000000..80604f1ad3c --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/YQL-8307.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ + +$config = @@{"name":"Test","syntax":{"aware":true},"lists":{"optional":false},"format":"prototext","meta":"H4sIAAAAAAAAA+NK5FLOKs3L1i/TzU3MNjYu0y8oyi/Jjy9JLS7RBxF6YL6SERdLCJAnxMnFmCjBqMCowQpiJkkwAZnMIGayBDOQyQlipkiwKDBrsCaxgbUaAwBc3r8mYwAAAA=="}@@; + +$udf = Udf(Protobuf::Parse, $config as TypeConfig); +$udf2 = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT $udf($udf2($udf(TestField))) AS Profile +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in new file mode 100644 index 00000000000..b6dd409ad4e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql new file mode 100644 index 00000000000..3ee0813dc44 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_bytes.sql @@ -0,0 +1,35 @@ +/* +syntax='proto3'; + +message Test { + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; + Test test = 2; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb", + "view": { + "recursion": "bytes", + "enum": "number" + } +}@@; + +$udfPar = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail") +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg new file mode 100644 index 00000000000..d7d756c8260 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.cfg @@ -0,0 +1,2 @@ +xfail +in plato.Input recursion_fail.in diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in new file mode 100644 index 00000000000..b6dd409ad4e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql new file mode 100644 index 00000000000..e274c92e010 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_fail.sql @@ -0,0 +1,32 @@ +/* +syntax='proto3'; + +message Test { + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; + Test test = 2; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb", + "view": { + "recursion": "fail", + "enum": "number" + } +}@@; + +$udf = Udf(Protobuf::Parse, $config as TypeConfig); + +SELECT $udf(TestField) FROM plato.Input; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in new file mode 100644 index 00000000000..b6dd409ad4e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql new file mode 100644 index 00000000000..2f57eb18d19 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/recursion_ignore.sql @@ -0,0 +1,35 @@ +/* +syntax='proto3'; + +message Test { + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; + Test test = 2; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb", + "view": { + "recursion": "ignore", + "enum": "number" + } +}@@; + +$udfPar = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail") +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in new file mode 100644 index 00000000000..1c32f6a1423 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in @@ -0,0 +1,4 @@ +{"TestField"="{}"}; +{"TestField"="{\"inner\":{}}"}; +{"TestField"="{\"inner\":{\"i\":{}}"}; +{"TestField"="{\"inner\":{\"i\":{\"a\":\"\"}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql new file mode 100644 index 00000000000..d24aa384d96 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_empty_nested_message.sql @@ -0,0 +1,23 @@ +/* syntax version 1 */ + +/* +syntax='proto3'; + +message Test { + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; +} +*/ + +$config = @@{"name":"Test","syntax":{"aware":true},"lists":{"optional":false},"format":"json","meta":"H4sIAAAAAAAAA+PqZuSSyyrNy9Yv081NzDY2LtMvKMovydcvSS0u0QMzlRK5WEKAPCEpLtbMvLzUIglGBUYNbiNuPZConidISEqciwvMABNCnFyMiWBVnFIqXKwQMWkuxkyoTgEknWAiiQ1skzEAZSMFuY4AAAA="}@@; + +$udf = Udf(Protobuf::Parse, $config as TypeConfig); + +SELECT $udf(TestField) AS Profile +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in new file mode 100644 index 00000000000..2487eb8da31 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in @@ -0,0 +1,4 @@ +{"TestField"="{}"}; +{"TestField"="{\"inner\": {\"l\": 25}}"}; +{"TestField"="{\"inner\": {\"l\": \"B\"}}"}; +{"TestField"="{\"inner\": {\"alphabet\": [0, 1, 25]}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql new file mode 100644 index 00000000000..f869d0a3dd8 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/syntax_aware_nested_enum_string_value.sql @@ -0,0 +1,24 @@ +/* syntax version 1 */ + +/* +message Test { + enum Letters { + A = 0; + B = 1; + Z = 25; + } + message Inner { + repeated Letters alphabet = 1; + Letters l = 2; + } + Inner inner = 1; +} +*/ + +$config = @@{"name":"Test","view":{"enum":"full_name"},"syntax":{"aware":true},"lists":{"optional":false},"format":"json","meta":"H4sIAAAAAAAAA+PayMgll1Wal61fppubmG1sXKZfUJRfkq9fklpcogdmKrUxcrGEALlCUlysmXl5qUUSjAqMGtxG3HogUT1PkJCUExcrmCEkz8WRmFOQkZiUWgJUx6zBZ8QLUeeTWlKSWlQsJMHFmCPBBDQBXUZJjosdpoiVi9FRgAFEOQkwgqgoAckkNrB7jAFNZK4ztAAAAA=="}@@; + +$udf = Udf(Protobuf::Parse, $config as TypeConfig); + +SELECT $udf(TestField) AS Profile +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.in diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql new file mode 100644 index 00000000000..ccc9af6f304 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_any.sql @@ -0,0 +1,43 @@ +/* +syntax = 'proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + bytes Any = 1 [(NYT.flags) = ANY]; + string x = 2; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWltvG8cVDq8iDyVquZJtRo7rhEks2YmpwLlWbtPwspLp8NYlmUQGgsWKHFFrk7vM7tI2jaIo0Kc+9rUoiqIvAfoDChR9aVCgjwUKtHlqgaIt0P6EPvbMzO5ylxeLCZAkDwr3zLnNmW/mnDNj+MNteL5vGP0B2R+Zhm2cjE/3e8TqmtrINsw8o4mbnCPvcuRqkDnUBqTsMbaILb4D0VMkZkPPR/ZSt17KzwjlgxJNSpaZRO7fUdhaMCqKENXVIdUY2kvK7LeYhbWR2n2g9kk2zMjup/gtgB4ZEb1H9O4kG0EvkrKPIr4CmdH4ZKB1FR8bIFtMFvhAecq8C5uPiPrAz5pirGlK9jGWYH1ILAsdUOzJiGSjbPbPz81+duYpR6qNQmIBkkQfD7mG2JL4ScgxqyVBxRwVaxYxH2pdko0zBbtzClp8fFaHK4dTSZLHNtEtzdCza0zJywtWkQx6syqmcuJbsGaMbPxlZRO4Pqlbzy0EQoPzyC6zWAHBMsZmlyhdo0cUTT81skmm4Or8RBhjCfkqyCanrcC3eBHi1kS31cfZdYYQ5yv32zhsrgKx2xA7pbNEgH2BGHCZYBDjXzKIBUjpxLJJjyMisiKmgAvNQyr6pSD1EWx6LimmqvddbO6f50lecuVkKianSeBbLAMYOjFOcXt1B4iTxVFqUJa5KBmc2h2I355CbW0JUmp8k82hrQNpk1DcY4j5zJLMify5M5MdMT6xDdP/Kb4IHkFhsAJ2Cq27xDrSdp5AOhgecRtilq2aNkNhTOYfogARPGTYKReT6U/xvemEI2zC1+ZXNKB5dt47b8NGYAKrms79AC4sVI0g2R7rmm4Tc2QSilhuKvuftSWY6/i5uRZ5azxPvJFM/HdN+BH+F859FoftRXtm4fbF7Y8IPiEmC1JMdr5wR8QG6gkZ4G4I7aVvvbLSrsxXqYjMJcV3Ieoc0VTDjdU00L0kMznxMiTp/zk24sznBCVQXIg7kGDbpEfc1OZ9U2D1yKk6HtjKQ3UwJgzwCCyH+AGliVchxXeVhjKP2ekZk/lGq1AKNX/fwr3sQJOZoARm/u3Zg/vK4unN7SVMlYzjdWfp1UE2gwoScpqTGw4195swRNnBsgmp9nFTUsqNTrEqCSExDcAIh9VGoS2Eve9Kvf3WG0LEE+hwQtTP8PotIYaAXecKKh9JZeSIBynIsyZuQJJRio1GVUh4OlttuVI/EpKeziO50WkK4GmoSa1W4UgSUh5H8bgttYT1gFtoYsMzIdU7NSEtZmCDm3Cd2JwhoafC1BGuJRMgIIeYK0GMwRDhnq4WilJVaTTblUa9UMXYeTRZ+n6nIktljJ+P1pQKbaRFcl3YXnSgLtxCPiyEl2CB6ZrFQu5fYdhakFQWGvkexDiWeZq9vjA7MWTPpVom5y81IktKDapiDrAfzx3+PD++tUp+ZLQvlgRiC5LAbcjMKVr5MP5xCLLLgnPOkRgOHIm3ZyP4wvJFmFvrT0NwcXFJudCHdyE+JPaZ4ZZV1xYkazo8u9iOlD/bR5bVhdybOU9/EoYLC5UvdPQKgKaPxjYvnfhJnGQUdnjRU3Zse+MRNg6cxBjemToaZY5+a8lM54D5GgjdgUZ0W7Fsk6hDTe+zVJM4iJ2qA4vIm3y45Y5SCQYg0ycRD0jwYU8i99MkpHwFuPgCrN9XH6qK21TxSKQorek0Vq/BNmPBOaKh7kC1LBa0BGMV6ViDDpXcEfFN2GISQ8xN2mhAFNrmWSzleJ5lKEfNYaAeWVgWXmFifaITU7WJQj4ZI6+i6j3lTLXOsttUQTGcDcnPUsYjh09ibAW9dweZxAO4yLRgRHDCSveMdB8oY/v0nexlv33mYYvxlChLBznEFqzTxRhqT9Bnw2Q5NL3gaPJFMN9wBGrYfxzEWk1JKsspV8uhYVJA9Q0vwCkOqL7hhheD1e3yOWNv6jRjVlYIBKvbPeIMDsYt3A8XpsHyC2bmZjkrihZHk3lBMWBxNJkVexu2R2ejebkbfjkRWWYFX2aduUm6uFa97CU/u29AzCP8uwrR1RNEjGriDyt7lTFHbXOMXUS3K7HBAhsTb0DGOLnf5YhUUM2p9jj7EgvvJh1geGwysngddVtnqjliR7KFi0GyL3NWTq+7ZLojrEfaqe1q3OU7gtEcbXsg0EgEDO8xtjTS/XYxGVDOqdHrvHBD4tTiG3CRMuFBp/ZUW/Vxv8q4adhrzmDAT3N8MvGAdZP7SWkutL6y4jx3AOt+3ItJ4MjHggSLoFKjTMuXexLWIlhGVSttSZE79XalJgkRX2F/N5q4JuzSqiEd7NTE78Al91rFIrbySDPZhhyqPDl6+Nl2uFrE/hB5DhmLWIWruoEHAB4cqtlTphdaitpFQFoGT4Selud0o+UwTzNEwWGdgW9kGXyxuh6qI8SvbU5YfZ6QE0iQ6PfX0iZhNKNCDP/GhDj+jQtr+DchJPFvUoDcPyOw7q/gaUPUZTksxE65F59a7+dLNLkdxHm5LHNJWlhQ+BFeniRk50s8gvh9i+mOM90vPV333RZTnrzbUuoNuVaoyo64+CxEB+qTSTANMtKqy4Ia6JVdMPkw0le4PfYhxuIlAjgRE54RExAtNWS6RXBPcKrSrEgl3CW5NyHOg0C3jxcGFOKfjo6QO9qpFSVZCM8tfs7CfemrzL+e9vz3IUj5Km1aIqmDgfFIUQeaajnQAEYqUMqqS/c1bRrcLrlfhECYLXVn3Ax9k27mfh6CdLC+nXHvhW/UvX+EYSNQ1a7q3SeQ0XpkODJsep2uDMhDMsjm2KGx//S6OV+ZylWp2MFWpSzVmo22VC8dK536+/XGh3VZ0GbYvsJt3wRh1inxEixyC3f2FmzWG5glMVVKh4dSqd3iNyEedzuwwXM/i8DWAk/wGOc9DG+rbq7ifZ5WEU1sLp2WB6sjjJJua6caVvi8K+eNzeaUzi+ZXgVxZFiarT2kl/TudRRtdKKy4I5UdNvj1klfneGmh3lEFtwRjxsrmp4xptUf56O5IySnOM1jcer66T3YOhZnjMZZdmFT7fdNqtxVxDuVtEdmjDt3IeHGgSZvGgksplj7HaZXY7o7iEY1S5le64dxPCGnNMu7Es19iiVM8FkCu5nEwECQUwn+JrZ3zktGvurwy57kzp9CkHDJmG6jI9U+Y+pixbAQktk3pWNNqDMIOHT6Tdd1QNQea4OM4RBX0nLX1aGXHDJ9HbNNVRsEeKOMV3AHPOYDeNbV28O6FFus3lQozq47LjkMZWfclc39OQQZt3HrecGqAai6btj+cM1DeU4uX/CEZJ+CnSHAdGRp2DBPOW9O7OGSt/rASbTDoxcyJ6Sv6c5NMv9wL2Si3oVM8YfYwhnDWXeLwsx1g3UndO+mw9Q3Bqrezxtmf/rwSisey/f8Ojr5Xyj0q3DkqFn8dXjniAs23WDI5HRAunSC8NkGXJvY+xPas+Ao/tjnxbK176HWecuN1I/bO+c++eb+FoaM9KGpjkbEZKXa4UDt5/4YhijNkuIaRAr1YzzEMrDRaN+RZKXUqHZqdXqErUOC3n/Su1ks/zexNKBfTgUTwZgKLUmuFKqVewV6iakct4WouAMXg9Sm3Gg3ip1DIUZtuHebSrXSagtxZta5AuWkNdykV2qFplJoMYLSOKQmO3iqKlXpqFA6FhLUyGIWIUn9dMbKlVJbADEL2w7Bs8RGUmyCWH+Vy9jurIvPQZZP8P1K0z3dlQ8K1Y7UEjbECxhGOlq6I5Xed8np3APYcqPrND4svm0nvK/AbllqylKJ3t0qrYbcVg4rUrXcou5UnOAgqSrhElyFy36O4jH/5RaJodx3p0vJakNmas8xhVmoJTULMlpyNOCcUrD2QQFXA1dw48CGLN0eivsAwd8YTlGJJT71cTf7l9/Rl8v0rct5BF1+zgd+lXrhlL38M93ekDVnlb2urmT1rwuteiCet+oNWQdPYCdg1e1BV7H7uWP3SsCub3m55Us+y75B60CGGDfz9IeXJVGdmR9XdfAepLrGYDzkbz7naaaRY0cgl6HJ8eAINh+QifIFtHzuaNlAudJUkQ5bixB03pvtaiDK9OYA5LPnx8659laCj2vPBx0CG0G0nGtpRcC4/76Em/nYfeRbhhV/+7da8PgTIVdfA/Ghamqq7kZtyYoHrLi4ERxRFhW66MXr93bV/ESlr483baIO8+Z4f3GigjNITGxuYGfFZJZ7FaJtYtmY4CMFfcI6tPVi9Jd/v/yMTAl4UoceO2k99Pgkzt8j/w99CcIy", + "view": { + "recursion": "fail", + "enum": "number", + "yt_mode": true + } +} +@@; + +$udfParse = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig); + +$data = @@ +{ + "Any": "<x=y>{a=1;b=c}", + "x": "aaa" +} +@@; + +SELECT + $data, + $udfParse($data), + $udfSerialize($udfParse($data)), + Ensure("Success", StablePickle($udfParse($data)) == StablePickle($udfParse($udfSerialize($udfParse($data)))), "Fail") +; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.in diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql new file mode 100644 index 00000000000..f3b2935cc64 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_enum.sql @@ -0,0 +1,50 @@ +/* +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + enum Color + { + WHITE = 0; + BLUE = 1; + RED = -1; + } + + required Color ColorYtIntField = 1 [(NYT.flags) = ENUM_INT]; + required Color ColorYtStringField = 2 [(NYT.flags) = ENUM_STRING]; + required Color ColorField = 3; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWt2PG1cVx59rH+/a49lN4m4a0rptdpM23ir9ZAOl/pjdOPUXY7vpRqpGs/Zdx4k9486Mk2yEEBJPPPKGEEKIl0r8AUiIFyokHpGQoE8gIYoEfwJvcO69M+MZe5x1K7Xtw9Zzvu+5v3vPufcG/nATnhvo+mBE9iaGbunH05O9PjF7xnBi6UaB0cQMlyg4Evk6ZA+GI1JxBdvEEt+G6AkSc6HnIrupGy8W5pQKfo0WJctMI/+vKGwGcEURopo6phZDu0mZ/RZzsDZRew/UAcmFGdn5FL8J0CcTovWJ1jvNRTCKpOyhiC9DdjI9Hg17ikcMUCwmC5xRmQnvQOYRUR94RVNMNE3JHsEyrI+JaWIAinU6IbkoG/1zC6OfH3nK1uqgkliEJNGmY24htiR/EkrMW0lQNdvEmkmMh8MeycWZgZ0FA23On7fh6OFQkuSxRTRzqGu5NWbkpYBZJKP+vImZnvgmrOkTC3+ZuQTOT+rGs4FAaHIZ2REWqyCY+tToEaWn94ky1E70XJIZuLw4ECZYRrkqislp0/ctnoe4eapZ6uPcOkOI/ZX/bRwyq0DsJsRO6CgRYJ8jB1zHn8T4F0xiEVIaMS3S54iIrIgp4EqLkIp+IUh9ABk3JMVQtYGDzb2zIilIjp5M1eQ08X2LFQBdI/oJLq/eCHESnKUmFVnIks6pvZH4rRnU1pYgpc4X2QLaupA2CMU9ppiPLMmCKJw5MtlW4wPbMLyf4gvgEhQGK2C70LpDbCBt+wmk/ekRtyBmWqphMRTGZP4hChDBTYbtcjGZ/hTfnQ04wgZ8ZXFGfZbnx739Fmz4BrCq6/z34VygaQTJ1lQbahYxJgahiOWucv9eW4K5rleaW5E3p4vEa8nEf9aEH+J/4fwncdgKWjOByxeXPyL4mBgsSTHZ/sIVERupx2SEqyG0m77x8kqrslCjKjLXFN+BqL1FUwvXVrNA15LM9MSLkKT/59iIs5gTlEBxIW5Dgi2TPnFKm/tNgdUnJ+p0ZCkP1dGUMMAjsGzi+5QmXoYUX1VD1HnMds+YzBdalVKo+/smrmUbmswFJTD3b81v3JeCh7ewlrBUMonX7KlXR7ksGkjIaU5u2tT8b8IQZRtLBlKdo5akVJrdUk0SQmIagBEOas1iRwi739VG583XhYir0OWEqFfgtRtCDAG7zg1UP5AqKBH3U1BmTdyAJKOUms2akHBttjtytXEoJF2bh3Kz2xLAtVCX2u3ioSSkXInSUUdqC+u+sNDFhutCanTrQlrMwgZ34QSRmSNhpMIsEG4l6yOghJgvQ4zBEOGerhVLUk1ptjrVZqNYw9y5NFn6XrcqSxXMn4fWkoodpEXyPdgK2lADl5AHC+ElWGC25rGQ/ywMmwFFJdDJdyHGsczL7NXA6sSQvVBqmZ631YgsaTWoiQXAfriw+fP6+OYq9ZHRPl8RiAUUgZuQXTC08mb8oxDkliXnjC0x7NsSb85n8Pnlk7Aw1x+H4HxwSxkYwzsQHxPrnu60VVcCijVlz0+2reWt9pFlfSGPZiHSH4fhXKDxwEAvAQy1ydTirRPfiZOMwjYvustOLZcfYXzgJCbw9izQKAv0m0tGugDMV0HojYZEsxTTMog6HmoDVmoS+7ETdWQSOcPZbYdLNRiADI9G3KfB2a5G/idJSHkacPF5WL+vPlQV51DFM5GitJZ9sHoVtpgIjhEd9UaqabKkJZioSHlNyio7HPEN2GQaY6xNw8mIKPSYZ7KS40aWpRJ1W4BGZGJbeImpDYhGDNUiCvloirKKqvWVe6p5L7dFDZTCuZD8DBU8tOUkJlbU+rdQSNyH88wKZgQHrPTukd4DZWqdvJ276PXPImwzmTIV6aKE2IZ1Ohnj4ROMWTdYDU0HbE2eDBaatkIdzx/7sXZLkipyyrFyoBsUUAPdTXCKA2qgO+nFZPV6fMx4NrUPY2ZO8CWr1zvkAjbGTVwP52bJ8ipmF0Y5r4oeJ6eLiqLP4+R0Xu0t2JrcmyzqXfPqiSgyr/gSO5kbpIdz1c9d8Ip7GGIB4d9TiKYeI2JUA3+YuctMOGoZUzxF9HoSYxYZT7wGWf34fo8jUkEzJ8PHuRdZejOUwfDYYmTxKto276nGhG3JJk4Gyb3ERTm94ZDpijAfDU8sx+IOXxGMZlvbBYFmwud4l4mlke71i8WASs6cXuWNGxJnHl+H81QINzq1r1qqR/oVJk3TXreZvjiN6fGpC6zrPE5Kc6D1pTXn+X1Y9+JeTAJHPjYk2ASVmxXavtyVsBfBNqpW7UiK3G10qnVJiHga+9vRxBVhh3YNaf9JTfw2XHCuVUxiKY+GBluQY5UXRxc/W7ZUm1h3UOaAiYg1uKzpuAHgxqEafWV2oaWoPQSkqfNC6Fp5VtPbtvCsQhRt0Tn4RpbBF7vrsTpB/FrGKevPE3ICCRL9/kqOSZjNqBDDvzEhjn/jwhr+TQhJ/JsUIP/PCKx7O3h6IOqxGhZiu9wLT+33C2Va3PbjvF2WuSZtLCj8CG9PErL9JR5C/L7JbMeZ7Refbvt2mxlP3m4rjaZcL9ZkW118BqIj9cmpvwwy0qrTghbolZ2/+DDSl7g89iDG8iUC2BkTviEmIFpuynSJ4JrgVKVVlcq4SvJvQJwngS4fNw2oxD9tGyGH262XJFkIL0x+3sR16enMv5rj+e9DkPJ02rRFUkcj/ZGijoaqaUMDGKlIKatO3Ve0aHC55H8RAmG+1Z0LM/R1hpn/eQjS/v52Lrznv9bw/hGGDV9Xu2p0H0F22CfjiW7R63RlRB6SUS7PNo29p/fNhepMr0bV9jerFaneanakRvlI6TbeazTvNGRhOCf2JS77FgjzQYkXICgsXNmbkGk0sUpiqZQODqRyp81vQlzpjm+B538Wgc2ASHAb52cYfqy6vkr0BdpFtPBwaR95sDvCLGnW8GSIHT4/lfODTWZG55dMr4A40c2hNXxIL+md6yh60InKgsOpapYrrZGBOidNN/OILDgcVxo7mr4+pd0fl6O1IySnOM0Vsfv62T3YOjZnjMZFdiCjDgYGNe4Y4ieVtEtmgtu3IeHkgRZvmglsptjxO0yvxjSHiU6HpjK71g8jPyGnhqZ7JZr/GFsY/7MEnmYSIx1BTjX4m9juGS8ZhZotL7ua238KQcIhY7mNTlTrHjMXK4WFkMy+KR17Qo1BwKbTbzqvI6L22TFIH49xJk1nXm162SbT1zHLUIcjn2yUyQoOwxXeh2ccu33sS/GI1Z8pxdl1xwVboGLzHd38n0OQdQ5ufTdZdQBV03TLm65FKC/oFYqukuwxsD0GmHGWpg3rlP3mxB4u+VEfOIme8OiFzDEZDDX7Jpl/OBcyUfdCpvQDPMLp4/lwS8LcdYN5K3T3ui000EeqNijoxmD28Eo7HtPz/Do5/m8o9Ktw5LBV+nV4+5ArtpxkyORkRHp0gPDJBlw5tfZO6ZkFufhjjzfL5p6LWvstN9I46myf+eSb/1sYstIdQ51MiMFatYOROsj/MQxRWiXFNYgUG0e4iWVho9m5JclKuVnr1ht0C1uHBL3/pHez2P5nsDWgX3YHE8GcCm1JrhZr1btFeompHHWEqLgN5/3UltzsNEvdAyFGfTh3m0qt2u4IcebWvgLlpDVcpJfqxZZSbDOC0jygLru4qyo16bBYPhIS1EmwiJCkcdq8SrXcEUDMwZZNcD0xTooNEPuvSgWPO+vis5DjA3yv2nJ2d+X9Yq0rtYUN8RymkXLLt6Tyew45nX8Am0527YMPy2/HTu/LsFORWrJUpne3Srspd5SDqlSrtGk4VTs5SKpJOAWX4aJXonTEfzlNYij/ndlUst6Qudq1XWEVakutooyebAs4phSsvV/E2cAZ3Ni3IEeXh+I8QPA3hhM0YopPfdzN/eV39OUyfeNiAUFXWIiBX6WeO2Ev/8y2yzIXvLLX1ZW8/jXQqwviRa8uy9x/Ats+r84ZdBW/n9p+L/n8eqaXe77g8exhmvsyxLibpz+8LMnq3Pi4qf13IdXTR9Mxf/M5yzLNHNsCuQ4tjvuHkHlATpXPYeVT28oG6pVnhjTYDELQWW+2q4Eo218AkMefFztn+lsJPo4/D3QIbPjRcqanFQHj/PsS7uZD55FvGVa8x7/VksefCLn5OogPVWOoak7Wlsy4z4uDG8FWZVmhk166endHLZyq9PXxukXUccGY7gUXKvhpGBKnFvewvWI1y38WgmiHmJZ4EzKINd04sqq2f9bFpW+kCpRfYMxS9Jd/vxiW5yWxgxZtEr8PPrD/9UegfkQOEMYdGxiVq0YWVGUPO1+AGPuit2Z3blU7Er8cKGFxYJcDEfqK9z/nv9D/AR5f/qs=", + "view": { + "recursion": "fail", + "enum": "number", + "yt_mode": true + } +} +@@; + +$udfParse = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig); + +$data = @@ +{ + "ColorYtIntField": 1, + "ColorYtStringField": "RED", + "ColorField": 0 +} +@@; + +SELECT + $data, + $udfParse($data), + $udfSerialize($udfParse($data)), + Ensure("Success", StablePickle($udfParse($data)) == StablePickle($udfParse($udfSerialize($udfParse($data)))), "Fail") +; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in new file mode 100644 index 00000000000..db7b187bc30 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in @@ -0,0 +1 @@ +{"TestField"="{\"dict1\":[{\"key\":\"k1\",\"value\":{\"a\":\"1\"}}],\"dict2\":[{\"key\":\"k2\",\"value\":{\"a\":\"2\"}}],\"dict3\":[{\"key\":\"k3\",\"value\":{\"a\":\"3\"}}],\"dict4\":[{\"key\":\"k4\",\"value\":{\"a\":\"4\"}}],\"dict5\":[{\"key\":\"k5\",\"value\":\"v5\"}],\"dict6\":[{\"key\":\"k6\",\"value\":\"v6\"}],\"dict7\":[{\"key\":\"k7\",\"value\":\"v7\"}],\"dict8\":[{\"key\":\"k8\",\"value\":\"v8\"}]}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql new file mode 100644 index 00000000000..b3c7377390d --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_map.sql @@ -0,0 +1,47 @@ +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_YT; + + message Inner { + string a = 1; + } + map<string, Inner> dict1 = 1 [(NYT.flags) = MAP_AS_DICT]; + map<string, Inner> dict2 = 2 [(NYT.flags) = MAP_AS_OPTIONAL_DICT]; + map<string, Inner> dict3 = 3 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY]; + map<string, Inner> dict4 = 4 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS]; + map<string, string> dict5 = 5 [(NYT.flags) = MAP_AS_DICT]; + map<string, string> dict6 = 6 [(NYT.flags) = MAP_AS_OPTIONAL_DICT]; + map<string, string> dict7 = 7 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS_LEGACY]; + map<string, string> dict8 = 8 [(NYT.flags) = MAP_AS_LIST_OF_STRUCTS]; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWktvG9cVLp8iDyVqNJJtRo7rhHnYcWIqlV+q3KahyJFCl68OqSQyEAxGwyuKNjnDzAxty+iiQFddFVkVCIqi6CZFf0CBopsW3RcI0GZTBCjaAu1P6LLnPmY4fFm0m0cWDufc7zzuuefec869gj/dhhfaltXuko2+bbnW4eBoo0Ucw+70XcvOMZq8zBE5D5GtwMpup0uKPrBBXHkLokdIzIReiFxObb6cG2PKjXLUKVllHNl/RWF1yqgsQ9TUe1Ri6HJSZb/lDCz0deO+3iaZMCN7n/I3AVqkT8wWMY2TTAStSKoBivw6rPQHh92OoQVggLCYKvGB4hB8CZYfEv1+EJpi0DQlB4AFWOwRx0EDNPekTzJRNvsXJmY/PvOU4Goik5yHJDEHPS4hNsN/CiLGpSQomxCx4BD7QccgmTgTcGlCQIOPj8vw+HAqSfLIJabTsczMAhPyypRVJN3WuIghn3wTFqy+i7+cTALXJ7X5/NRAqHGM6oHlEkiONbANohlWi2gd88jKJJmAi5MTYcAC4koIU9POyLd8FuLOienqjzKLLELEV/Z3cVieJ8RuQ+yIzhID7Cl8wHlGnRh/RifmIWUSxyUtHhGROWMKONNkSEWfKaTeh2XfJM3WzbYXmxunWZJTPD6VsqlpMvItFwEsk1hHuL2MLsbJdC/VKGTCSxanGl3528NQW5gRKRW+ySaibR/SNqFxjy7mM0syI3KnzkwVbHxiS3bwU34JfILGwgrYKbToEatIW38M6VH3yGsQc1zddlkUxlT+IUsQwUOGnXIxlf6U3x5OOMIm/Orkio5IHp/3+i1YGpnAvKqzP4QzU0VjkKwNzI7pErtvExqxXFXm3wszYm4/iOZS1NXBJPFKMvGfBelH+F84+8c4rE3bM1O3L25/jOBDYjMnxVTxhTsi1tUPSRd3Q+hyevP1uXZlrkxZVM4pvwVRcURTCVfmk0D3ksr45POQpP/nsRFnNicogcaFvA4Jtk1axEtt/jcNrBY50gddV3ugdweEBTwGliC+S2nyRUjxXdVBnkfs9IypfKOVKIWqv+fgXhahyVRQAlN/a/zgvjB9ehN7CVMlQ1wTS693MysoIKGmObkmqNnfhiHKDpZlSDUP6opWrO3vlBUpJKcBGGG3XMs3pbD/Xao2b16XIj7DPidEg4Brm1IMA3aRCyi9rxQRER+lIGZBXoIko+zUamUp4ctsNNVSdU9K+jL31Np+XQJfQkVpNPJ7ipTyETsHTaUhLY6YhSqWfBVKdb8ipeUVWOIqPCOWx0hoqTQ0hEtZGSEgQs4WIMbCEMM9Xc7vKGWtVm+WatV8GX3n01TlB/slVSmi/wK0upJvIi2SNWBt2oE6dQsFYiE8IxaYrPFYyP4zDKtTkspUJd+DGI9lnmZfm5qdWGRPpFrGFyw1IjNKDSpiImA/mDj8eX68OU9+ZLSnSwKxKUngNqxMCJr7MP5xCDKznHPKkRgeORJvj3vwxdmLMLHWn4Tg7PSScqoNb0G8R9xjyyurXp2SrOnw+GILrmC2j8yqC7k1E5b+JAxnpgqfaugFgI7ZH7i8dOIncZJR2OFFT9mB649H2DhwEgNsDQ2NMkO/OWOmE4H5JkhGt0NMV3Ncm+i9jtlmqSaxHTvSuw5Rl/lwwxulHCyA7ABHfISDD/sc2Y+SkAoU4PKLsHhPf6BrXlPFPZGitLporN6ENQbBOaIio6s7DnNagkFlOlajQwVvRL4Bq4yjh7mp0+8SjbZ5Dks5vmUrFFERAGqRg2XhBcbWJiaxdZdo5MMBYjXdbGnHunOcWaMCdsKZkPocBe4JnMJgebP1DoLkbTjLpKBHcMKacUyM+9rAPdrKnA/qZxY2GKZAIfuIkBuwSBej13mMNls2y6HpKUdTwIO5mmCoYP+xHWvUFaWopjwpu5ZNA6pt+Q5O8YBqW5570VmGweeMvaloxpyMNOIsw9jjABHjDu6HM0NnBRlXJmY5zooa+yeTjPKIxv7JONstWOsf9yf5rgT5ZISMM77COnObGLhWrcy5IDwwIOcw/A2NmPohRoxu4w8nc5GBo649wC7CMBQ2mGdj8hVYsQ7vGTwiNRRz1HmUeZm5d5kOsHisM7L8Gsp2jnW7z45kBxeDZF7hUE6vemS6I5yHnSPXk3iJ7whGE9Iug0Q9MaL4MoOlkR7Ui8mAIodKX+OFGxKHGq/DWQrCg05v6a4eQL/B0NTtFTE4Yqc9ODzxA+sqt5PSvND60orz7DYsBuNeTgKPfCxIsAgq1Iq0fLmrYC2CZVS51FQ0db/aLFUUKRIo7O9EE69Kl7KfhiE92qnJ34Fz3rWKQ1ztYcdmG7Kn8+Tox8+aQDWI+x5idhlELsNF08IDAA8O3W5pwwstTTcwIB2LJ0JfyvOm1RDgYYbIC+hY+EZmhS9W1z29j/Hr2iesPk+oCSQo9PsraZPQmwkpif8mJcj+IwKLwXqdtj8Gy1ghdqa99MTqPlegqWw7zotjlXPSMoIGG+HFSEIVX/IexO85THacyX75ybLvNJjw5J2GVq2plXxZFezycxDt6o9PRpMeI827CCiBXtCNphpG+hI3wwbEmL9kAOEx6RtyAqKFmko3BO4ATtXqJaWAeyJ7A+LcCXSz+G5AJv4pZIS80f3KjqJK4dGljkqxrIO7MFCHfzXN+B9CkArU1bQg0rtd66Gmdzu6I0IDGClPKfMu3Ve0RWJSPPuLEEjjhe2YmaGv08zsz0OQHq1mx8x78Ws17+9hWBqpYee17kNY6bRIr2+59PJc65IHpJvJskNj48lVcq405CtTtu3VUlGp1GtNpVo40Par36/W3quqUmcM9iVu+zpI40bJ52CaWbizV2G5WsOciIlR2d1VCs0Gv/fw0c2RDZ79OAKrUyzBY5x3LLyJujqP9TlaM9SxlRQNDtZC6CXT7Rx1sJ7nPThvY5aHdH6l9AbIfcvpuJ0H9Ereu3yibU1UlbyRkun6aJO09TE0PcwjquSN+GisX1rWgNZ6HEdzR0hNcZoPEVX88NZrEUsxRuOQS7Cst9s2Fe4J4n1J2icz4PodSHh+oKmaegJLJ9Zsh+lFmOkNotKOow0v8cM4nlBTHce/AM1+ggXL6CME9i6JroVBTjn4C9jlU94tcmWBV33O9T+HIOGRMd1G+7p7zMTFdsJSSGXflI4VoMlCQNDpN13XLtFbrOmxej1cScdbV0EvCDJ9C3NtvdMdwUYZVvIGfPA2POfJbWEVig1Va8gUZ5cb5wSgKMY93uynIVjx2rSW76wKgG6alht012QoT/Dl8j6TGhCw3gMYjsx0G+Yp8cLEnil5Yw+cRPs5ev1ySNodU9wb8w/v+iXqX7/s/DSEHZvVG7d3Rxq7XXDeCd19q91xjweHOcRvtK2ubraH76zsh3EV+6mrbSvw6np7+PO/odCvwpG9+s5vwut7XF3dc49KjrrEoFOGj5fgKpaaNmkNDLJx4m6wM+AIuwSuxNnww1k86UaqB831U19+aaCvKO/Zer9PbFbD7Xb1dvajMERp+pQXIJKvHuDptgJLteY7iqoVauX9SpWebYuQoNeg9IoWu4BlrBnolyhtIuhsqaGopXy5dDdP7zK1g6YUldfh7Ci1rtaatZ39XSlGdXhXnFq51GhKcaZW3IRy0gLu3guVfF3LNxhBq+1Slft43GplZS9fOJASVMl0iJSkdoqxYqnQlEDOwJog+JrYSIpNEAuzYhG7nsXsfVj1HCVaGeaqpvDU63CpqNRVpUBvY7VGTW1quyWlXGxQySUxTySVFfTmRTgfROwc8F9eIRjKfne4Kqz+Y6ouC1WYaRpKPa+iJiFBWpRTsPBuHh2Li7G07UKGbgHNe1LgrwZHKMSRn/hcm/nL7+lbZHrzfA7jJzdhA78cPXPE3vKZbH/ImdDK3kvn0vrXqVr9eJzU6g85249hfUSr11XOo/czoffCiN7A8nLN5wKaA4POtgoxrubJTykzvDo2Py5q+21IGVZ30OOvOKdJpp5jxxznoQlwew+W75MT7SmkfCakLCFfYSjIhNVpEXTaK+x8QbTSmgiggL5g7Jyqb67w8fQFQofA0mi0nKppzoDx/mKEq/nAe7abFSvBFm8+5/FHPy6+AvID3e7opue1GSs+osWLG0mwMq/QRd95825Oz53o9D3xqkv0Xs4ebDwx9cDfFiBx4nJF60+XprI/W4BokzgulpaxVsdwvyWKBSlHqbkiJbELlp3oLz8/DyoHeehNUSUH0JtDdIqjNz30NfF3FgH0tSE6wdHXPPR18WgUQF8fopMcfd1D3xB/ORFA35iw+4aHvin+cCSAvjlh900PfUv8rU4AfWvC7lseekv8uUUAvTVh99b6GYiVTCy/MMuFdPEoENLXFYChz2lphCeCGKU/Mfv6b3r05SPFlTBJ4tVuO7wV8sRsfjFirn0xYq7/n2K2uJgbs8SsBcUkp3DefGbOW8/MufXUnNvRX39+PnoY54/7/wMXV6Lr", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +$udfParse = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSerialize = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT + TestField, + $udfParse(TestField), + $udfSerialize($udfParse(TestField)), + Ensure("Success", StablePickle($udfParse(TestField)) == StablePickle($udfParse($udfSerialize($udfParse(TestField)))), "Fail"), +FROM plato.Input; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in new file mode 100644 index 00000000000..b6dd409ad4e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql new file mode 100644 index 00000000000..d75aa4af2fe --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_no_ser.sql @@ -0,0 +1,36 @@ +/* +syntax='proto3'; + +message Test { + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; + Test test = 2; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrjWsjIxV2UmlxapFdQlF+Sr9TJyMUSklpcIqTIxZqZl5daJMGowKjBbcStBxLV8wQJBUFkhCS5WEqAghJMYBWsYBVBYCEpKS4usFIwIcTDxZgINocziDFRSp2LFSIsx8WYCTVeAMl4iB2MmUlsYCcZAwC/Qiqb", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +}@@; + +$udfPar = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail") +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in new file mode 100644 index 00000000000..f397ecd77cf --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql new file mode 100644 index 00000000000..87e9fc6bd26 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_plain.sql @@ -0,0 +1,113 @@ +/* +syntax='proto3'; + +message Test { + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; +} +*/ + +$configNO = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrjamXk4i7ISczM0ysoyi/JV8rjYglJLS4RUuRizczLSy2SYFRg1OA24tYDiep5goSCIDJSUlxcYD6YEOLhYkwEK+YMYkyUUudihQjLcTFmQs0QQDIDYhBjZhIb2FpjAIm1I9Q=", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_YT; + message InnerInner { + option (NYT.default_field_flags) = SERIALIZATION_YT; + string a = 1; + } + message Inner { + option (NYT.default_field_flags) = SERIALIZATION_YT; + InnerInner i = 1; + } + Inner inner = 1; +} +*/ + +$configYT = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWs2P28YVrz5XetJqKe7aVtZxbCsf3jixNnA+u27TaCXuRq6+SmmTrIGA4EojmTZFKiRle42iCNBTj7kGRQ+9pOgfUKDopUUvPRUo0OZWoGgLtH9Cj30zQ1KkPrxygCQ5bMQ372ve/GbeezOGP96CK0PTHOpkd2yZjnkyGez2id2ztLFjWiVGEzc4R8njKDYgf6DppOozdogjvgPxARILkSuxnczNF0ozQqWwRJuSZSZR/HccNheMiiLEDXVENUZ20jL7LRZgbaz27qtDUogysvcpPgfQJ2Ni9InROy3E0Iu0HKCIr0B+PDnRtZ4SYANkS8gCH6hOma/BxkOi3g+yZhhrjpIDjBXIjohtowOKczomhTib/ZW52c/OPONKdVFILEOaGJMR15BYEj8JOWa1pKiYq2LNJtYDrUcKSabg2pyCDh+f1eHJ4VTS5JFDDFszjcIaU/LiglUken9WxVROfAvWzLGDv+xCCtcnc/PZhUBocR7ZYxZrINjmxOoRpWf2iaIZA7OQZgouz0+EMVaQr4Zscs4OfYvnIWmfGo76qJBlCHG/ir9NwsYqELsFiQGdJQLsKWLAZcJBTH7FIJYhYxDbIX2OiNiKmAIuNA+p+FeC1Eew4bukWKox9LC5e5YnJcmTk6mYnCOhb7EKYBrEHOD26umIk8VRalGWuSiZnNrTxe9Ooba2BCkNvsnm0HYEOYtQ3GOI+czSzInSmTOTXTE+sXUr+Ck+Dz5BYbACdgplPWITaduPIRcOj7gFCdtRLYehMCHzD1GAGB4y7JRLyPSn+N50wjE24ZfmVzSkeXbe22/DemgCq5ou/hjOLVSNINmaGJrhEGtsEYpYbqrwn7UlmDsKcnMt8uZknng9nfrvmvAp/hct/iEJW4v2zMLti9sfEXxCLBakhOx+4Y5I6OoJ0XE3RHZyN19ZaVeW6lRE5pLiuxB3j2iq4fpqGuhekpmceBHS9P8cG0nmc4oSKC7EbUixbdInXmrzvymw+mSgTnRHeaDqE8IAj8ByiR9QmngZMnxXaSjziJ2eCZlvtBqlUPP3bNzLLjSZCUpg5t+ePbgvLZ7e3F7CVMk4XneXXtULeVSQknOc3HKpxd9EIc4Olg3IdI/bklJtHe3XJSEi5gAY4aDeKneFqP9da3bfekOI+QJHnBAPMrx+U0ggYLNcQe0jqYocyTAFedbEdUgzyn6rVRdSvs5OV641D4W0r/NQbh21BfA1NKROp3woCRmfY/+4K3WEbMgtNLHum5CaRw0hJ+ZhnZvwnNiYIaGnwtQRriUfIiCHWKxAgsEQ4Z6rl/elutJqd2utZrmOsfNpsvSjo5osVTF+AVpbKneRFiv2YGvRgbpwCwWwEF2CBaZrFgvFf0Vhc0FSWWjkB5DgWOZp9uWF2Ykhey7VMrlgqRFbUmpQFXOA/Xju8Of58a1V8iOjPV0SSCxIArcgP6do5cP4pxEoLAvOGUdiNHQk3pqN4NXlizC31l9E4PziknKhD+9CckScu6ZXVr20IFnT4dnFdqWC2T62rC7k3sx5+rMonFuofKGjlwA0YzxxeOnET+I0o7DDi56yE8cfj7Fx4CTG8M7U0Thz9LklM50D5msg9HSNGI5iOxZRR5oxZKkmtZcYqLpN5A0+3PFGqQQDkBWQSIYk+LAvUfwsDZlAAS5ehew99YGqeE0Vj0SG0tpuY/UabDEWnCMa6umqbbOgpRirSMdadKjijYhvwiaTGGFu0sY6UWibZ7OU43uWpxwNl4F6ZGNZeImJDYlBLNUhCvlkgryKavSVu6p9t7BFFexHCxH5Gcp46PJJjK1s9N9HJnEPzjMtGBGcsNK7S3r3lYkzeKdwMWifedhhPBXKcoQcYgeydDFG2mP02bRYDs0tOJoCESy1XIEG9h97iU5bkqpyxtNyYFoUUEPTD3CGA2poeuHFYPV6fM7Ym7rNmF0QQsHq9Q45g4txG/fDuWmwgoL5uVnOiqLF8em8oBiyOD6dFXsbtsZ3x/Ny14NyIrLMCr7IOnOL9HCt+oULQfbAgFhC+PcUYqgniBjVwh924TJjjjvWBLuIXk9ig2U2Jl6HvHlyr8cRqaCagfao8AIL7wYdYHhsM7L4Muq276rWmB3JNi4GKbzIWTm96ZHpjrAfagPH03iN7whGc7XtgEAjETK8w9hySA/axWRAOadGX+aFGxKnFt+A85QJDzq1rzpqgPtVxk3D3nAHQ35ak5NTH1g3uJ+U5kHrayvOi3uQDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNTs1hqSEAsU9rfjqZeEa7RqyIU7NfF7cMG7VrGJozzULLYhRypPjj5+tlyuDnE+RJ4DxiLW4bJh4gGAB4dq9ZXphZai9hCQtskToa/lWcPsuMzTDFF2WWfgG1sGX6yuR+oY8etYp6w+T8kpJEj0+xtpkzCacSGBfxNCEv8mhTX8mxLS+DctQPGfMcgGK3jaEPVYDouwU+75J9b7pQpNbntJXi7LXJIWFhR+hJcnKdn9Eg8hec9mupNM9wtP1n27w5Snb3eUZktulOuyKy4+A3FdfXwaToOMtOqyoAZ6ZRdOPoz0NW6PXUiweIkAbsSE74gpiFdaMt0iuCc4VWnXpArukuKbkORBoNvHDwMK8U9XR8QbPWrsS7IQnVv8oo37MlCZfzPt+e8jkAlU2rREUnXdfKiouqbaLjSAkcqUsurSfUObBrdL8RcREGZL3Rk3I9+mm8WfRyAXrm9n3Lv6rbr3jyish6raVb37BPJan4zGpkOv0xWdPCB6ocgOjd0n182l2lSuTsX2NmtVqdFudaVm5Vg5av6w2fqwKQvaDNvXuO3bIMw6JV6ARW7hzt6EjWYLsySmSungQKp0O/wmxOfuhjZ48fMYbC7wBI9x3sPwturGKt6XaBXRxubSbXmwOsIoGY420LDC5105b2w2pnR+yfQqiGPT1hztAb2k966jaKMTlwVvpGY4PrdBhuoMNz3MY7LgjfjcWNH0zQmt/jgfzR0ROcNpPotb10/vwbJYnDEaZ7kGG+pwaFHlniLeqeR8MmPcvg0pLw40edNIYDHF2u8ovRozvEE0qtnK9Fo/iuMpOaPZ/pVo8QssYcLPEtjNpHQTQU4l+JvYzhkvGaW6yy/7ktt/jkDKI2O6jY9V5y5Tl9iPChGZfVM61oQGg4BLp990XXWi9lkbZI5GuJK2t64uveKS6euYY6maHuKNM17BG/CZ9+AZT28f61JssfpToSS77rjgMlTdcU+2+JcI5L3Gre8HqwGgGobpBMM1D+U5uVLZF5IDCrZHANORpWHDPOW+ObGHS97qAyfRDo9eyJyQoWa4N8n8w7uQifsXMvs/wRbOHM26uy/MXDfY70fu3HCZhqauGsOSaQ2nD6+04rEDz6/jk/9FIr+Mxg7b+7+Obh9ywbYXDJkMdNKjE4TP1+EGlpoW6U96ZPfU2WU7foBdAldu7/rgdZ90Y83j7vaZL78U1nnpQ0sdj4nFKrYDXR0WP4tCnCZLcQ1i5eYxnmV5WG9135dkpdKqHzWa9CTLQopeg9IrWuwCNrBCoF9uIRPD0AodSa6V67U7ZXqXqRx3hbi4DefD1Lbc6rb2jw6EBLXhXXEq9VqnKySZWfcmlJPWcK9eapTbSrnDCErrgJo8wsNVqUuH5cqxkKJGFrMIaeqnO1atVboCiAXYcgm+JTaSYRPEMqxaxa4nW7wPm16g3FaGharrRuoVuFaV2rJUobexSqcld5WDmlSvdqjmmjtPJNUljOZluBjk2D/mv7yyL1L8/nRVWLXHTO24pjCvdKR2WUZLrgYhK2Zg7YMyBhYXY33PgQIFvOI9KfBXgwEqscUnPtcW/vo7+haZu3mxhPgpzfnAL0fPDdhbPtPtD9lzVtl76UpW/7bQqo/Heav+kL33GLZDVr2uchW7X7p2L4XsBpaXW74QsBwYtPdkSHAzT35KWRLVmflxVXvvQaZn6pMRf8U5SzONHDvUuAxNd3uHsHGfnCpPoeVLV8s6ylWmigzYXISgs15hVwNRvj8HoIC9IHbOtLcSfDx7AegQWA+j5UxLKwLG+xcj3MzH3rPdMqwEG7rVgscf/bj6BogPVEtTDS9qS1Y8ZMXDjeCKsqjQRd9/7U5JLZ2q9D3xhkPUUcma7D4x9cCfIpAd66pmnHJj20+XqoqfRiDeJTYtwxKagQUA68QyNzMlSi3VKEnmI9tXsHymP9gfPKEjqnuhHVH34r/6+8X49g1I8MHnIKK5moSAJq4uonF2/vckyV8y/w/fjdbu", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; + message InnerInner { + option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; + string a = 1; + } + message Inner { + option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; + InnerInner i = 1; + } + Inner inner = 1; +} +*/ + +$configPB = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWs2P28YVrz5XetJqKe7aVtZxbCsf3jixNnA+u27TaCXuRq6+SmmTrIGA4EojmTZFKiRle42iCNBTj7kGRQ+9pOgfUKDopUUvPRUo0OZWoGgLtH9Cj30zQ1KkPrxygCQ5bMQ372ve/GbeezOGP96CK0PTHOpkd2yZjnkyGez2id2ztLFjWiVGEzc4R8njKDYgf6DppOozdogjvgPxARILkSuxnczNF0ozQqWwRJuSZSZR/HccNheMiiLEDXVENUZ20jL7LRZgbaz27qtDUogysvcpPgfQJ2Ni9InROy3E0Iu0HKCIr0B+PDnRtZ4SYANkS8gCH6hOma/BxkOi3g+yZhhrjpIDjBXIjohtowOKczomhTib/ZW52c/OPONKdVFILEOaGJMR15BYEj8JOWa1pKiYq2LNJtYDrUcKSabg2pyCDh+f1eHJ4VTS5JFDDFszjcIaU/LiglUken9WxVROfAvWzLGDv+xCCtcnc/PZhUBocR7ZYxZrINjmxOoRpWf2iaIZA7OQZgouz0+EMVaQr4Zscs4OfYvnIWmfGo76qJBlCHG/ir9NwsYqELsFiQGdJQLsKWLAZcJBTH7FIJYhYxDbIX2OiNiKmAIuNA+p+FeC1Eew4bukWKox9LC5e5YnJcmTk6mYnCOhb7EKYBrEHOD26umIk8VRalGWuSiZnNrTxe9Ooba2BCkNvsnm0HYEOYtQ3GOI+czSzInSmTOTXTE+sXUr+Ck+Dz5BYbACdgplPWITaduPIRcOj7gFCdtRLYehMCHzD1GAGB4y7JRLyPSn+N50wjE24ZfmVzSkeXbe22/DemgCq5ou/hjOLVSNINmaGJrhEGtsEYpYbqrwn7UlmDsKcnMt8uZknng9nfrvmvAp/hct/iEJW4v2zMLti9sfEXxCLBakhOx+4Y5I6OoJ0XE3RHZyN19ZaVeW6lRE5pLiuxB3j2iq4fpqGuhekpmceBHS9P8cG0nmc4oSKC7EbUixbdInXmrzvymw+mSgTnRHeaDqE8IAj8ByiR9QmngZMnxXaSjziJ2eCZlvtBqlUPP3bNzLLjSZCUpg5t+ePbgvLZ7e3F7CVMk4XneXXtULeVSQknOc3HKpxd9EIc4Olg3IdI/bklJtHe3XJSEi5gAY4aDeKneFqP9da3bfekOI+QJHnBAPMrx+U0ggYLNcQe0jqYocyTAFedbEdUgzyn6rVRdSvs5OV641D4W0r/NQbh21BfA1NKROp3woCRmfY/+4K3WEbMgtNLHum5CaRw0hJ+ZhnZvwnNiYIaGnwtQRriUfIiCHWKxAgsEQ4Z6rl/elutJqd2utZrmOsfNpsvSjo5osVTF+AVpbKneRFiv2YGvRgbpwCwWwEF2CBaZrFgvFf0Vhc0FSWWjkB5DgWOZp9uWF2Ykhey7VMrlgqRFbUmpQFXOA/Xju8Of58a1V8iOjPV0SSCxIArcgP6do5cP4pxEoLAvOGUdiNHQk3pqN4NXlizC31l9E4PziknKhD+9CckScu6ZXVr20IFnT4dnFdqWC2T62rC7k3sx5+rMonFuofKGjlwA0YzxxeOnET+I0o7DDi56yE8cfj7Fx4CTG8M7U0Thz9LklM50D5msg9HSNGI5iOxZRR5oxZKkmtZcYqLpN5A0+3PFGqQQDkBWQSIYk+LAvUfwsDZlAAS5ehew99YGqeE0Vj0SG0tpuY/UabDEWnCMa6umqbbOgpRirSMdadKjijYhvwiaTGGFu0sY6UWibZ7OU43uWpxwNl4F6ZGNZeImJDYlBLNUhCvlkgryKavSVu6p9t7BFFexHCxH5Gcp46PJJjK1s9N9HJnEPzjMtGBGcsNK7S3r3lYkzeKdwMWifedhhPBXKcoQcYgeydDFG2mP02bRYDs0tOJoCESy1XIEG9h97iU5bkqpyxtNyYFoUUEPTD3CGA2poeuHFYPV6fM7Ym7rNmF0QQsHq9Q45g4txG/fDuWmwgoL5uVnOiqLF8em8oBiyOD6dFXsbtsZ3x/Ny14NyIrLMCr7IOnOL9HCt+oULQfbAgFhC+PcUYqgniBjVwh924TJjjjvWBLuIXk9ig2U2Jl6HvHlyr8cRqaCagfao8AIL7wYdYHhsM7L4Muq276rWmB3JNi4GKbzIWTm96ZHpjrAfagPH03iN7whGc7XtgEAjETK8w9hySA/axWRAOadGX+aFGxKnFt+A85QJDzq1rzpqgPtVxk3D3nAHQ35ak5NTH1g3uJ+U5kHrayvOi3uQDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNTs1hqSEAsU9rfjqZeEa7RqyIU7NfF7cMG7VrGJozzULLYhRypPjj5+tlyuDnE+RJ4DxiLW4bJh4gGAB4dq9ZXphZai9hCQtskToa/lWcPsuMzTDFF2WWfgG1sGX6yuR+oY8etYp6w+T8kpJEj0+xtpkzCacSGBfxNCEv8mhTX8mxLS+DctQPGfMcgGK3jaEPVYDouwU+75J9b7pQpNbntJXi7LXJIWFhR+hJcnKdn9Eg8hec9mupNM9wtP1n27w5Snb3eUZktulOuyKy4+A3FdfXwaToOMtOqyoAZ6ZRdOPoz0NW6PXUiweIkAbsSE74gpiFdaMt0iuCc4VWnXpArukuKbkORBoNvHDwMK8U9XR8QbPWrsS7IQnVv8oo37MlCZfzPt+e8jkAlU2rREUnXdfKiouqbaLjSAkcqUsurSfUObBrdL8RcREGZL3Rk3I9+mm8WfRyAXrm9n3Lv6rbr3jyish6raVb37BPJan4zGpkOv0xWdPCB6ocgOjd0n182l2lSuTsX2NmtVqdFudaVm5Vg5av6w2fqwKQvaDNvXuO3bIMw6JV6ARW7hzt6EjWYLsySmSungQKp0O/wmxOfuhjZ48fMYbC7wBI9x3sPwturGKt6XaBXRxubSbXmwOsIoGY420LDC5105b2w2pnR+yfQqiGPT1hztAb2k966jaKMTlwVvpGY4PrdBhuoMNz3MY7LgjfjcWNH0zQmt/jgfzR0ROcNpPotb10/vwbJYnDEaZ7kGG+pwaFHlniLeqeR8MmPcvg0pLw40edNIYDHF2u8ovRozvEE0qtnK9Fo/iuMpOaPZ/pVo8QssYcLPEtjNpHQTQU4l+JvYzhkvGaW6yy/7ktt/jkDKI2O6jY9V5y5Tl9iPChGZfVM61oQGg4BLp990XXWi9lkbZI5GuJK2t64uveKS6euYY6maHuKNM17BG/CZ9+AZT28f61JssfpToSS77rjgMlTdcU+2+JcI5L3Gre8HqwGgGobpBMM1D+U5uVLZF5IDCrZHANORpWHDPOW+ObGHS97qAyfRDo9eyJyQoWa4N8n8w7uQifsXMvs/wRbOHM26uy/MXDfY70fu3HCZhqauGsOSaQ2nD6+04rEDz6/jk/9FIr+Mxg7b+7+Obh9ywbYXDJkMdNKjE4TP1+EGlpoW6U96ZPfU2WU7foBdAldu7/rgdZ90Y83j7vaZL78U1nnpQ0sdj4nFKrYDXR0WP4tCnCZLcQ1i5eYxnmV5WG9135dkpdKqHzWa9CTLQopeg9IrWuwCNrBCoF9uIRPD0AodSa6V67U7ZXqXqRx3hbi4DefD1Lbc6rb2jw6EBLXhXXEq9VqnKySZWfcmlJPWcK9eapTbSrnDCErrgJo8wsNVqUuH5cqxkKJGFrMIaeqnO1atVboCiAXYcgm+JTaSYRPEMqxaxa4nW7wPm16g3FaGharrRuoVuFaV2rJUobexSqcld5WDmlSvdqjmmjtPJNUljOZluBjk2D/mv7yyL1L8/nRVWLXHTO24pjCvdKR2WUZLrgYhK2Zg7YMyBhYXY33PgQIFvOI9KfBXgwEqscUnPtcW/vo7+haZu3mxhPgpzfnAL0fPDdhbPtPtD9lzVtl76UpW/7bQqo/Heav+kL33GLZDVr2uchW7X7p2L4XsBpaXW74QsBwYtPdkSHAzT35KWRLVmflxVXvvQaZn6pMRf8U5SzONHDvUuAxNd3uHsHGfnCpPoeVLV8s6ylWmigzYXISgs15hVwNRvj8HoIC9IHbOtLcSfDx7AegQWA+j5UxLKwLG+xcj3MzH3rPdMqwEG7rVgscf/bj6BogPVEtTDS9qS1Y8ZMXDjeCKsqjQRd9/7U5JLZ2q9D3xhkPUUcma7D4x9cCfIpAd66pmjLmx7adLVcVPIxDvEpuWYQnNwAKAdWKZm5kSpZZqlCTzke0rWD7TH+wPntAR1b3Qjqh78V/9/WJi+wYk+OBzENFcTUJAE1cX0Tg7/3uS5C+Z/wfZGtbo", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +$udfParNO = Udf(Protobuf::Parse, $configNO as TypeConfig); +$udfSerNO = Udf(Protobuf::Serialize, $configNO as TypeConfig); +$udfParYT = Udf(Protobuf::Parse, $configYT as TypeConfig); +$udfSerYT = Udf(Protobuf::Serialize, $configYT as TypeConfig); +$udfParPB = Udf(Protobuf::Parse, $configPB as TypeConfig); +$udfSerPB = Udf(Protobuf::Serialize, $configPB as TypeConfig); + +SELECT TestField, + Ensure("Success", $udfParNO(TestField) == $udfParNO($udfSerNO($udfParNO(TestField))), "Fail"), + Ensure("Success", $udfParYT(TestField) == $udfParYT($udfSerYT($udfParYT(TestField))), "Fail"), + Ensure("Success", $udfParPB(TestField) == $udfParPB($udfSerPB($udfParPB(TestField))), "Fail") +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in new file mode 100644 index 00000000000..b6dd409ad4e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql new file mode 100644 index 00000000000..35945ffe7a2 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_pb.sql @@ -0,0 +1,40 @@ +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_PROTOBUF; + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; + Test test = 2; +} +*/ + + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWkuPG8cRDp9LFrnc4exKoinLsunHrmWLa8jPrBLHXHJ2TYWvDEnbK8AYzJJNaiRyhp4ZSlohCALklFPgqxHkkIuD/IAAQS4Jcg8QIHFOAYIkQPITckx198xwhg8tZcC2D2tO9VdV3dVfd1d1C/54E54dGsZwRPYnpmEbp9PBfp9YPVOb2IZZZDJxiyOKLqJQh+yRNiIVD9gmtvgORAcozIWejeylbrxQnFMqBjVaVCwzjcK/o7C9pFUUIaqrY2oxtJeU2W8xBxsTtXdPHZJcmIndT/EZgD6ZEL1P9N5ZLoK9SMo+ifgKZCfT05HWU3wwQFhMFnhDZQbeha0HRL3nh6YYNEPFPmAZ0mNiWdgBxT6bkFyUjf7ZhdHPjzzlaHVQSSxBkujTMbcQWxE/CRHzVhJUzTGxYRHzvtYjuTgzsLtgoM3b5224ejiUJHloE93SDD23wYy8uGQWyag/b2KmJ74FG8bExl9WLoHzk7rx9FIiNDlGdsFiFQTLmJo9ovSMPlE0fWDkkszA1cWBMGAZcVWEyRkr8C1ehLh1ptvqw1yaMcT5Kvw2DlvrUOwmxAZ0lEiwJ4gB1wkGMf4lg1iClE4sm/Q5IyJrcgq40iKlol+KUh/BltclxVT1ocvN/fN6UpRcPZmqyRkS+BYrAIZOjAEur94IebI8Sk0KWYiSwaW9kfjtGdU2VjClzhfZAtu6kDEJ5T2GmI8syTpRPHdksqPGB7Zp+j/F58ETKIxWwHahtCtsoCz/CDLB8Ig7ELNs1bQZC2My/xAFiOAmw3a5mEx/iu/NBhxhA35pcUYDlufHnX8bNgMDWNd14YdwYalpJMnOVNd0m5gTk1DGcle5/2ys4FzXj+ZW5O3povBaMvHfDeHH+F+48Ic47CxbM0uXLy5/ZPApMVmQYrLzhSsiNlJPyQhXQ2gvc+OVtVZlsUZVZK4pvgtRZ4umFq6tZ4GuJZnpiZchSf/PuRFnfU5QAeWFmIcEWyZ94h5t3jclVp8M1OnIVu6roylhhEdiOcIPqEy8Cim+qjTUech2z5jMF1qVSqj7uxauZYeazAUVMPdvz2/cV5YPb2Et4VHJEK87U6+Oclk0kJAzXNx0pIXfhCHKNpYtSHVOWpJSaXYPa5IQEjMATHBUa5Y6Qtj7rjY6b70hRDyFLhdE/YDXbwgxJGyaG6h+JFUQEQ9KELMhbkKSSQ6bzZqQ8Gy2O3K1cSwkPZvHcrPbEsCzUJfa7dKxJKQ8xOFJR2oL6UC30MWm50JqdOtCRszCJnfhdmJrToQ9FWYd4VayAQEixEIZYoyGSPdMrXQo1ZRmq1NtNko1jJ0nk6UfdKuyVMH4+WQtqdRBWaTQg51lG+rSJeTjQngFF5iteS4U/hWG7SWHylIn34MY5zI/Zl9eejoxZi8ctUzPn2pEVqQa1MQCYT9e2Pz5+fjWOucjkz3ZIRBbcgjchOyCobU345+EILcqOOdsieHAlnhzPoLPrZ6Ehbn+PAQXl6eUS/vwLsTHxL5juGnVS0sOa9o8P9mOlv+0j6zKC3lvFnr60zBcWGp8aUevAGj6ZGrz1InvxEkmYZsX3WWnttceYe3ARQzwzqyjUdbRZ1aMdIGYr4HQG2lEtxXLNok61vQhO2oSB7GBOrKIvMWb224r1WAEMn0a8YAGb/Y0Cp8mIeVLwMXnIH1Xva8qblHFI5GispZTWL0GOwyCY0RHvZFqWSxoCQYVaVuTNpXdFvFN2GYaYzybtMmIKLTMs9iR4/UsSxF1B0B7ZGFaeIWpDYlOTNUmCvlkilhF1fvKHdW6k9uhBg7DuZD8FAUeOziJwUp6/30EiQdwkVnBiOCAld4d0runTO3BO7nLfv+sh22GKVNIFxFiG9J0MsbaI+yzYbIzNLNka/JFsNh0FOpYfxzE2i1Jqsgp18qRYVJCDQ0vwClOqKHhhheD1evxMWNt6hRjVk4IBKvXO+YAh+MWrocLs2D5FbMLo5xXRY+Ts0VFMeBxcjav9jbsTO5MFvWu+fVEhMwrvsgqc5P0cK76uUt+uK9BLCL9ewrR1VNkjGriDyt3lYGjtjnFKqLXk1hjibWJ1yBrnN7tcUYqaGagPcy9wMK7RRsYH1tMLL6Mtq07qjlhW7KFk0FyL3IolzdcMV0R1gNtYLsWd/mKYDLH2h4INBIBx3sMlkG53y8eBhQ5c/oyT9xQOPP4BlykINzo1L5qqz70qwxNw153GgP9NKenZx6xrvN+UplLra8sOS8cQNrPezEJnPmYkGASVG5WaPpyW8JcBNOoWrUjKXK30anWJSHiS+xvRRMvCbs0a8gEKzXxO3DJvVaxiK080Ey2IMcqPxw9/uw4qDaxP0TMEYOINbiqG7gB4Mahmn1ldqGlqD0kpGXwg9Cz8rRutB3w7IQoOdA5+kZW0Rez67E6Qf7a5hnLzxNyAgUS/f5ayiSMZlSI4d+YEMe/cWED/yaEJP5NClD4ZwTS/gyeFkQ9doaF2C73/GPz/WKZHm4HcZ4uy1yTJhaUfoSnJwnZ+RKPIX7XYrbjzPYLj7d9q82MJ2+1lUZTrpdqsqMuPgXRkfroLHgMMtG604IW6JVd8PBhoq9weexDjMVLBHAiJnxLTEC03JTpEsE1waVKqyqVcZUU3oQ4DwJdPl4YUIl/OjZCbmu3fijJQnhh8gsWrktfZv71lOe/D0HKl2nTFEkdjYwHijrSVMuhBjBRiUrWnbqvadHgcin8IgTCfKo7183QN9nNws9DkAnmt3Pde+4b7d4/wrAZyGrX7d0nkNX6ZDwxbHqdrozIfTLKFdimsf/4vLlYnenVqNrBdrUi1VvNjtQonyjdxvcbzQ8bsqDNwb7CZd8CYb5T4iVY1i1c2duw1WjiKYlHpXR0JJU7bX4T4qE7gQVe+CwC20t6gts4r2F4WXV9nd4XaRbRwuLSKXkwO8Io6bY20DDD51U5L2y2ZnJ+yfQqiBPD0mztPr2kd6+jaKETlQW3parbHlonQ3UOTTfziCy4LR4aM5q+MaXZH8fRsyMkp7jMgzh5/eweLI3JGZNxyC5sqcOhSY27hnilkvHEDJi/BQk3DvTwppHAZIqV32F6Naa7jehUs5TZtX4Y2xNySrO8K9HC55jCBJ8lsJpJjAwkOdXgb2J757xkFGsOXvY0838KQcIV43Ebnaj2HWYudhgWQjL7pnLMCXVGAUdOv+m8jojaZ2WQMR7jTFruvDrysiOmr2O2qWqjADbKsILb4IEP4CnXbh/zUiyx+jOlOLvuuOQAKk67q1v4cwiybuHW94JVB1B13bD94Vqk8oJeseQpyT4D+THArGVl2PCcct6c2MMlL/WBi2iFRy9kTslQ052bZP7hXshEvQuZwx9hCWeM57t7KMxdN1jvh25fd0BDY6Tqw6JhDmcPrzTjsXzPr5PT/4VCvwxHjluHvw7nj7liyw2GTAYj0qMDhM824TqmmibpT3tk/8zeZyt+gFUCN27te+R1nnQjjZNO/tyXX0rrrPShqU4mxGQZ29FIHRY+DUOUHpbiBkRKjRPcy7Kw2ey8L8lKuVnr1ht0J0tDgl6D0itarAK2MEOgX04iE8HQCm1JrpZq1dslepepnHSEqJiHi0FpS252mofdIyFGfbhXnEqt2u4IcebWuQnlog1cq1fqpZZSajOB0jyiLru4uSo16bhUPhES1MlyiJCk/XTaKtVyRwAxBzuOwPPEWlJsgJiGVSpY9aQL92DbDZRTyrBQdZxIvQK7FaklS2V6G6u0m3JHOapKtUqbWq4640RRTcJoXoXLfsThCf/lpn2hwndns8KyPeZqz3GF50pbapVk9ORYENJiCjY+KGFgcTI2D2zIUcIr7pMCfzUYoBFLfOxzbe4vv6NvkZkbl4vIn+JCH/jl6IUBe8tntr0ma8Erey9dy+tfl3r1+Ljo1WuyDh5BPuDVrSrX8fuF4/dKwK9vernnSz7PvkbrQIYYd/P4p5QVUZ0bHzd18B6kesZoOuavOOdZppFjmxrXocfdwTFs3SNnyhNY+cKxsol65ZkhHbaXMei8V9j1SJTtLxDI58/PnXP9rUUf15+POgQ2g2w519OahHH/xQh387H7bLeKK/6Cbr3g8Uc/br4O4n3V1FTdjdqKGQ94cXkjOKosKnTSD1+7XVSLZyp9T7xuE3VcNKf7jz164G8hSGO2PzUn3Fn+yY6qws9CEO0Qi6ZhMU3HBIBVYqkbqSKVFqtUJPMWWuzbKHRerWIMITNRPo+ZNcWwP7h5h1Tnrjuk5nchxsXPQEhzzAs+89xHSDuI/urvl2Oncf6w+X9iYdnD", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +}@@; + +$udfPar = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail") +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in new file mode 100644 index 00000000000..b6dd409ad4e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in @@ -0,0 +1 @@ +{"TestField"="{\"inner\":{\"i\":{\"a\":\"hello\"}},\"test\":{\"inner\":{\"i\":{\"a\":\"bye\"}}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql new file mode 100644 index 00000000000..42312716e68 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_ser_yt.sql @@ -0,0 +1,39 @@ +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_YT; + message InnerInner { + string a = 1; + } + message Inner { + InnerInner i = 1; + } + Inner inner = 1; + Test test = 2; +} +*/ + +$config = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWkuPG8cRDp9LFrnc4exKoinLsunHrmWLa8jPrBLHXHJ2TYWvDEnbK8AYzJJNaiRyhp4ZSlohCALklFPgqxHkkIuD/IAAQS4Jcg8QIHFOAYIkQPITckx198xwhg8tZcC2D2tO9VdV3dVfd1d1C/54E54dGsZwRPYnpmEbp9PBfp9YPVOb2IZZZDJxiyOKLqJQh+yRNiIVD9gmtvgORAcozIWejeylbrxQnFMqBjVaVCwzjcK/o7C9pFUUIaqrY2oxtJeU2W8xBxsTtXdPHZJcmIndT/EZgD6ZEL1P9N5ZLoK9SMo+ifgKZCfT05HWU3wwQFhMFnhDZQbeha0HRL3nh6YYNEPFPmAZ0mNiWdgBxT6bkFyUjf7ZhdHPjzzlaHVQSSxBkujTMbcQWxE/CRHzVhJUzTGxYRHzvtYjuTgzsLtgoM3b5224ejiUJHloE93SDD23wYy8uGQWyag/b2KmJ74FG8bExl9WLoHzk7rx9FIiNDlGdsFiFQTLmJo9ovSMPlE0fWDkkszA1cWBMGAZcVWEyRkr8C1ehLh1ptvqw1yaMcT5Kvw2DlvrUOwmxAZ0lEiwJ4gB1wkGMf4lg1iClE4sm/Q5IyJrcgq40iKlol+KUh/BltclxVT1ocvN/fN6UpRcPZmqyRkS+BYrAIZOjAEur94IebI8Sk0KWYiSwaW9kfjtGdU2VjClzhfZAtu6kDEJ5T2GmI8syTpRPHdksqPGB7Zp+j/F58ETKIxWwHahtCtsoCz/CDLB8Ig7ELNs1bQZC2My/xAFiOAmw3a5mEx/iu/NBhxhA35pcUYDlufHnX8bNgMDWNd14YdwYalpJMnOVNd0m5gTk1DGcle5/2ys4FzXj+ZW5O3povBaMvHfDeHH+F+48Ic47CxbM0uXLy5/ZPApMVmQYrLzhSsiNlJPyQhXQ2gvc+OVtVZlsUZVZK4pvgtRZ4umFq6tZ4GuJZnpiZchSf/PuRFnfU5QAeWFmIcEWyZ94h5t3jclVp8M1OnIVu6roylhhEdiOcIPqEy8Cim+qjTUech2z5jMF1qVSqj7uxauZYeazAUVMPdvz2/cV5YPb2Et4VHJEK87U6+Oclk0kJAzXNx0pIXfhCHKNpYtSHVOWpJSaXYPa5IQEjMATHBUa5Y6Qtj7rjY6b70hRDyFLhdE/YDXbwgxJGyaG6h+JFUQEQ9KELMhbkKSSQ6bzZqQ8Gy2O3K1cSwkPZvHcrPbEsCzUJfa7dKxJKQ8xOFJR2oL6UC30MWm50JqdOtCRszCJnfhdmJrToQ9FWYd4VayAQEixEIZYoyGSPdMrXQo1ZRmq1NtNko1jJ0nk6UfdKuyVMH4+WQtqdRBWaTQg51lG+rSJeTjQngFF5iteS4U/hWG7SWHylIn34MY5zI/Zl9eejoxZi8ctUzPn2pEVqQa1MQCYT9e2Pz5+fjWOucjkz3ZIRBbcgjchOyCobU345+EILcqOOdsieHAlnhzPoLPrZ6Ehbn+PAQXl6eUS/vwLsTHxL5juGnVS0sOa9o8P9mOlv+0j6zKC3lvFnr60zBcWGp8aUevAGj6ZGrz1InvxEkmYZsX3WWnttceYe3ARQzwzqyjUdbRZ1aMdIGYr4HQG2lEtxXLNok61vQhO2oSB7GBOrKIvMWb224r1WAEMn0a8YAGb/Y0Cp8mIeVLwMXnIH1Xva8qblHFI5GispZTWL0GOwyCY0RHvZFqWSxoCQYVaVuTNpXdFvFN2GYaYzybtMmIKLTMs9iR4/UsSxF1B0B7ZGFaeIWpDYlOTNUmCvlkilhF1fvKHdW6k9uhBg7DuZD8FAUeOziJwUp6/30EiQdwkVnBiOCAld4d0runTO3BO7nLfv+sh22GKVNIFxFiG9J0MsbaI+yzYbIzNLNka/JFsNh0FOpYfxzE2i1Jqsgp18qRYVJCDQ0vwClOqKHhhheD1evxMWNt6hRjVk4IBKvXO+YAh+MWrocLs2D5FbMLo5xXRY+Ts0VFMeBxcjav9jbsTO5MFvWu+fVEhMwrvsgqc5P0cK76uUt+uK9BLCL9ewrR1VNkjGriDyt3lYGjtjnFKqLXk1hjibWJ1yBrnN7tcUYqaGagPcy9wMK7RRsYH1tMLL6Mtq07qjlhW7KFk0FyL3IolzdcMV0R1gNtYLsWd/mKYDLH2h4INBIBx3sMlkG53y8eBhQ5c/oyT9xQOPP4BlykINzo1L5qqz70qwxNw153GgP9NKenZx6xrvN+UplLra8sOS8cQNrPezEJnPmYkGASVG5WaPpyW8JcBNOoWrUjKXK30anWJSHiS+xvRRMvCbs0a8gEKzXxO3DJvVaxiK080Ey2IMcqPxw9/uw4qDaxP0TMEYOINbiqG7gB4Mahmn1ldqGlqD0kpGXwg9Cz8rRutB3w7IQoOdA5+kZW0Rez67E6Qf7a5hnLzxNyAgUS/f5ayiSMZlSI4d+YEMe/cWED/yaEJP5NClD4ZwTS/gyeFkQ9doaF2C73/GPz/WKZHm4HcZ4uy1yTJhaUfoSnJwnZ+RKPIX7XYrbjzPYLj7d9q82MJ2+1lUZTrpdqsqMuPgXRkfroLHgMMtG604IW6JVd8PBhoq9weexDjMVLBHAiJnxLTEC03JTpEsE1waVKqyqVcZUU3oQ4DwJdPl4YUIl/OjZCbmu3fijJQnhh8gsWrktfZv71lOe/D0HKl2nTFEkdjYwHijrSVMuhBjBRiUrWnbqvadHgcin8IgTCfKo7183QN9nNws9DkAnmt3Pde+4b7d4/wrAZyGrX7d0nkNX6ZDwxbHqdrozIfTLKFdimsf/4vLlYnenVqNrBdrUi1VvNjtQonyjdxvcbzQ8bsqDNwb7CZd8CYb5T4iVY1i1c2duw1WjiKYlHpXR0JJU7bX4T4qE7gQVe+CwC20t6gts4r2F4WXV9nd4XaRbRwuLSKXkwO8Io6bY20DDD51U5L2y2ZnJ+yfQqiBPD0mztPr2kd6+jaKETlQW3parbHlonQ3UOTTfziCy4LR4aM5q+MaXZH8fRsyMkp7jMgzh5/eweLI3JGZNxyC5sqcOhSY27hnilkvHEDJi/BQk3DvTwppHAZIqV32F6Naa7jehUs5TZtX4Y2xNySrO8K9HC55jCBJ8lsJpJjAwkOdXgb2J757xkFGsOXvY0838KQcIV43Ebnaj2HWYudhgWQjL7pnLMCXVGAUdOv+m8jojaZ2WQMR7jTFruvDrysiOmr2O2qWqjADbKsILb4IEP4CnXbh/zUiyx+jOlOLvuuOQAKk67q1v4cwiybuHW94JVB1B13bD94Vqk8oJeseQpyT4D+THArGVl2PCcct6c2MMlL/WBi2iFRy9kTslQ052bZP7hXshEvQuZwx9hCWeM57t7KMxdN1jvh25fd0BDY6Tqw6JhDmcPrzTjsXzPr5PT/4VCvwxHjluHvw7nj7liyw2GTAYj0qMDhM824TqmmibpT3tk/8zeZyt+gFUCN27te+R1nnQjjZNO/tyXX0rrrPShqU4mxGQZ29FIHRY+DUOUHpbiBkRKjRPcy7Kw2ey8L8lKuVnr1ht0J0tDgl6D0itarAK2MEOgX04iE8HQCm1JrpZq1dslepepnHSEqJiHi0FpS252mofdIyFGfbhXnEqt2u4IcebWuQnlog1cq1fqpZZSajOB0jyiLru4uSo16bhUPhES1MlyiJCk/XTaKtVyRwAxBzuOwPPEWlJsgJiGVSpY9aQL92DbDZRTyrBQdZxIvQK7FaklS2V6G6u0m3JHOapKtUqbWq4640RRTcJoXoXLfsThCf/lpn2hwndns8KyPeZqz3GF50pbapVk9ORYENJiCjY+KGFgcTI2D2zIUcIr7pMCfzUYoBFLfOxzbe4vv6NvkZkbl4vIn+JCH/jl6IUBe8tntr0ma8Erey9dy+tfl3r1+Ljo1WuyDh5BPuDVrSrX8fuF4/dKwK9vernnSz7PvkbrQIYYd/P4p5QVUZ0bHzd18B6kesZoOuavOOdZppFjmxrXocfdwTFs3SNnyhNY+cKxsol65ZkhHbaXMei8V9j1SJTtLxDI58/PnXP9rUUf15+POgQ2g2w519OahHH/xQh387H7bLeKK/6Cbr3g8Uc/br4O4n3V1FTdjdqKGQ94cXkjOKosKnTSD1+7XVSLZyp9T7xuE3VcNKf7jz164G8hSGO2PzXPuLP8kx1VhZ+FINohFk3DYpqOCQCrxFI3UkUqLVapSOYttNi3Uei8WsUYQmaifB4za4phf3DzDqnOXXdIze9CjIufgZDmmBd85rmPkHYQ/dXfL0dP4/xh8/9pqNnL", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +}@@; + +$udfPar = Udf(Protobuf::Parse, $config as TypeConfig); +$udfSer = Udf(Protobuf::Serialize, $config as TypeConfig); + +SELECT TestField, Ensure("Success", $udfPar(TestField) == $udfPar($udfSer($udfPar(TestField))), "Fail") +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in new file mode 100644 index 00000000000..6ab446801f2 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in @@ -0,0 +1,4 @@ +{"TestField"="{\"Name\":\"n1\"}"}; +{"TestField"="{\"Name\":\"n1\",\"a\":\"a1\"}"}; +{"TestField"="{\"Name\":\"n1\",\"test\":{\"Name\":\"n2\",\"a\":\"a2\"}}"}; +{"TestField"="{\"Name\":\"n1\",\"test\":{\"Name\":\"n2\",\"test\":{\"Name\":\"n3\"}}}"}; diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr new file mode 100644 index 00000000000..f10d440a236 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.in.attr @@ -0,0 +1 @@ +{schema=[{name=TestField;type=string}]} diff --git a/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql new file mode 100644 index 00000000000..7c75b1cea5e --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/cases/yt_mode_variant.sql @@ -0,0 +1,82 @@ +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_YT; + option (NYT.default_oneof_flags) = VARIANT; + + oneof Var { + Test test = 1 [(NYT.column_name) = "_test"]; + string a = 2; + } + string Name = 3 [(NYT.column_name) = "name"];; +} +*/ + +$configVar = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWs2P28YVjz5XetrVUty1razjOFESe+PE2sD57LpNow/uWq6+SmmTrIGA4FIjLW2JVEjK9ho9FOippyLXoCiKXlLk1FOBopcWvRcI0OYWoGgLtH9Cj30zQ1KkpPXKAZL4YItv3te8+c2892YMf7kJzw1MczAkO2PLdMyjSX+nR2zN0seOaRUZTVznHEWPo9CA3J4+JFWfsUMc8R2I95GYjzwX287ceLE4I1QMS7QpWWYShX/HYWPBqChC3FBHVGNkOy2z32IeVsaqdk8dkHyUkb1P8VmAHhkTo0cM7SQfQy/ScoAivgK58eRoqGtKgA2QLSELfKA6Zb4K6w+Iei/ImmGsWUoOMFZgdURsGx1QnJMxycfZ7J+bm/3szDOuVBeFxBKkiTEZcQ2JU+InIceslhQVc1Ws2MS6r2skn2QKrs4p6PDxWR2eHE4lTR46xLB108ivMCUvLVhFMuzNqpjKiW/Bijl28JedT+H6ZG48sxAILc4je8xiDQTbnFgaUTSzRxTd6Jv5NFNweX4ijLGCfDVkk7N26Fs8D0n7xHDUh/lVhhD3q/CHJKwvA7GbkOjTWSLAniAGXCYcxOTXDGIJMgaxHdLjiIgtiSngQvOQin8tSH0I675LiqUaAw+bO2d5UpQ8OZmKyVkS+harAKZBzD5uL22IOFkcpRZlmYuSyanaUPzeFGorpyClwTfZHNoOIGsRinsMMZ9ZmjlRPHNmsivGJ7ZmBT/FF8AnKAxWwE6hVY/YRNrWI8iGwyNuQsJ2VMthKEzI/EMUIIaHDDvlEjL9Kb43nXCMTfjK/IqGNM/Oe+ttWAtNYFnThZ/AuYWqESSbE0M3HGKNLUIRy03l/7NyCuYOgtxci7wxmSdeS6f+uyL8FP9EC39OwuaiPbNw++L2RwQfEYsFKSG7X7gjEkP1iAxxN0S2szdeWWpXFutUROaS4rsQd49oquHachroXpKZnHgR0vRfjo0k8zlFCRQX4hak2DbpES+1+d8UWD3SVydDR7mvDieEAR6B5RLfpzTxMmT4rtJR5iE7PRMy32g1SqHm79q4l11oMhOUwMy/PXtwX1o8vbm9hKmScbzuLr06zOdQQUrOcnLLpRZ+H4U4O1jWIdM9bEtKtXVQrktCRMwCMMJevVXqClH/u9bsvvWGEPMFDjghHmR4/YaQQMCucgW1D6UqciTDFORZEdcgzSjlVqsupHydna5ca+4LaV/nvtw6aAvga2hInU5pXxIyPkf5sCt1hNWQW2hizTchNQ8aQlbMwRo34TmxPkNCT4WpI1xLLkRADrFQgQSDIcI9Wy+VpbrSandrrWapjrHzabL044OaLFUxfgFaWyp1kRYraLC56EBduIUCWIieggWmaxYLhX9FYWNBUllo5IeQ4FjmafblhdmJIXsu1TK5YKkRO6XUoCrmAPvR3OHP8+Nby+RHRnuyJJBYkARuQm5O0dKH8c8ikD8tOGccidHQkXhzNoLPn74Ic2v9WQTOLy4pF/rwLiRHxDk2vbLqyoJkTYdnF9uVCmb72Gl1IfdmztOfR+HcQuULHb0EoBvjicNLJ34SpxmFHV70lJ04/niMjQMnMYZ3po7GmaPPnjLTOWC+BoI21InhKLZjEXWkGwOWalK7ib46tIm8zoc73iiVYACyAhLJkAQf9iUKn6QhEyjAxedh9a56X1W8popHIkNpbbexeg02GQvOEQ1pQ9W2WdBSjFWkYy06VPFGxDdhg0mMMDfp4yFRaJtns5Tje5ajHA2XgXpkY1l4iYkNiEEs1SEK+XiCvIpq9JRj1T7Ob1IF5Wg+Ij9NGfddPomxlYzeLWQSd+E804IRwQkr2jHR7ikTp/9O/mLQPvOww3gqlOUAOcQOrNLFGOmP0GfTYjk0u+BoCkSw2HIFGth/7CY6bUmqyhlPy55pUUANTD/AGQ6ogemFF4OlaXzO2Ju6zZidF0LB0rR9zuBi3Mb9cG4arKBgbm6Ws6JocXwyLyiGLI5PZsXehs3x8Xhe7lpQTkSWWcGXWGduEQ3Xqpe/EGQPDIhFhL+mEEM9QsSoFv6w85cZc9yxJthFaJrEBktsTLwGOfPorsYRqaCavv4w/yIL7zodYHhsM7L4Muq2j1VrzI5kGxeD5F/irJze9Mh0R9gP9L7jabzKdwSjudq2QaCRCBneZmxZpAftYjKgnFOjL/PCDYlTi2/AecqEB53aUx01wP0q46Zhb7iDIT+tydGJD6zr3E9K86D1jRXnhV1YDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNDs1hqSEAsU9rfjqSvC1cIXUciGOzXx+3DBu1axiaM80C22IUcqT44+fjZdrg5xPkCePcYi1uGyYeIBgAeHavWU6YWWomoISNvkidDX8oxhdlzmaYYouawz8I2dBl+srkfqGPHrWCesPk/JKSRI9PtbaZMwmikhjX+nBSj8MwarwXqdtj8ay1gRdqa98NjqvlihqWw3yYtjmUvSMoKCjfBiJCW7X+I+JO/aTHeS6X7x8bpvd5jy9O2O0mzJjVJddsXFpyE+VB+dhJMeIy27CKiBXtCFUw0jfYObYQcSLF4igBsx4SkxBfFKS6YbAncApyrtmlTBPVF4E5I8CHSz+GFAIf7p6oh4oweNsiQL0fBSx4VEwcZdGKjDv51m/E8RyATqaloQqcOh+UBRh7pqu9AARipRyrJL9y1tkYSQLPwqAsJsYTvjZuS7dLPwywhkw9XsjHvPf6fu/SMKa6EadlnvPoac3iOjsenQy3NlSO6TYb7ADo2dx1fJxdpUrk7FdjdqVanRbnWlZuVQOWj+qNn6oCkL+gzbN7jt2yDMOiVegEVu4c7egPVmC3MiJkZpb0+qdDv83sPn7oY2eOHTGGws8ASPcd6x8Cbq+jLeF2nN0MZW0m1wsBbCKBmO3texnuc9OG9j1qd0fqX0Kohj09Yd/T69kvcun2hbE5cFb6RmOD63QQbqDDc9zGOy4I343Fi/9MwJrfU4H80dETnDaT6LW8VPb71WsRRjNM5yFdbVwcCiyj1FvC/J+mTGuHUbUl4caKqmkcDSiTXbUXoRZniDaFS3leklfhTHU3JGt/0L0MJnWLCEHyGwd0kNTQQ5leAvYNtnvFsU6y6/7Etu/TUCKY+M6TY+Vp1jpi5RjgoRmX1TOlaABoOAS6ffdF2HRO2xpsccjXAlbW9dXXrFJdO3MMdS9WGIN854BW/AZ96Fpz29PaxCsaHqTYWS7HLjgstQdcc92cIXEch5bVrPD1YDQDUM0wmGax7Kc3LFki8kBxRsjQCmI6eGDfOU+8LEnil5Yw+cRPs5ev1yRAa64d4b8w/v+iXuX7+UfxHBjs0czfpbFmZuF+xbkTvvDnTneHJURP6dgTlUjcH0nZX90K5jP3V9YAZeXW9Of/4vEvlNNLbfLn8e3drn5tpeeGTSHxKNThk+XYPrWGpapDfRyM6Js8POgD52CdyIvePD2X3SjTUPu1tnvvxSoOekDyx1PCYWq+H2huqg8EkU4jR9iisQKzUP8XTLwVqre0uSlUqrftBo0rNtFVL0GpRe0WIXsI41A/1yS5sYBlvoSHKtVK/dKdG7TOWwK8TFLTgfprblVrdVPtgTEtSGd8Wp1GudrpBkZt2bUE5awd17qVFqK6UOIyitPWryAI9bpS7tlyqHQooaWcwipKmf7li1VukKIOZh0yX4lthIhk0QC7NqFbue1cI92PAC5bYyLFRdN1KvwNWq1JalCr2NVTotuavs1aR6tUM119x5IqkuYTQvw8UgR/mQ//IKwUjhB9NVYfUfM7XtmsJM05HaJRktuRqEVTEDK++XMLC4GGu7DuTpFlC8JwX+atBHJbb42Ofa/N/+SN8iszcuFhE/xTkf+OXouT57y2e6/SF7zip7L13K6t8XWvXxOG/VH7J3H8FWyKrXVS5j90vX7qWQ3cDycssXApYDg/auDAlu5vFPKadEdWZ+XNXue5DRzOFkxF9xztJMI8eOOS5DE+DuPqzfIyfKE2j50tWyhnKVqSIDNhYh6KxX2OVAlOvNAShgL4idM+0tBR/PXgA6BNbCaDnT0pKA8f7HCDfzkfdsdxpWgi3ecsHjj35cfQPE+6qlq4YXtVNWPGTFw43girKo0EUvv3anqBZPVPqeeN0h6qhoTXYem3rgd1jTnDjc0NaTpanCCOJdYjviFYg7+C/ryjI3EkVKLKc//+piQqH0W0/JbBwL6ojKMzuSIqr4DMSp17wMKqdQgJXAMqPupn6L37/+6uJaOQGx91XrKMlfLP8PjbXUZg==", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +/* +syntax='proto3'; + +import "yt/yt_proto/yt/formats/extension.proto"; + +message Test { + option (NYT.default_field_flags) = SERIALIZATION_YT; + option (NYT.default_oneof_flags) = SEPARATE_FIELDS; + + oneof Var { + Test test = 1 [(NYT.column_name) = "_test"]; + string a = 2; + } + string Name = 3 [(NYT.column_name) = "name"];; +} +*/ + +$configSeparate = @@{ + "name": "Test", + "format": "json", + "skip": 0, + "lists": { + "optional": false + }, + "meta": "eNrFWs2P28YVjz5XetrVUty1razjOFESe+PE2sD57LpNow/uWq6+SmmTrIGA4FIjLW2JVEjK9ho9FOippyLXoCiKXlLk1FOBopcWvRcI0OYWoGgLtH9Cj30zQ1KkpPXKAZL4YItv3te8+c2892YMf7kJzw1MczAkO2PLdMyjSX+nR2zN0seOaRUZTVznHEWPo9CA3J4+JFWfsUMc8R2I95GYjzwX287ceLE4I1QMS7QpWWYShX/HYWPBqChC3FBHVGNkOy2z32IeVsaqdk8dkHyUkb1P8VmAHhkTo0cM7SQfQy/ScoAivgK58eRoqGtKgA2QLSELfKA6Zb4K6w+Iei/ImmGsWUoOMFZgdURsGx1QnJMxycfZ7J+bm/3szDOuVBeFxBKkiTEZcQ2JU+InIceslhQVc1Ws2MS6r2skn2QKrs4p6PDxWR2eHE4lTR46xLB108ivMCUvLVhFMuzNqpjKiW/Bijl28JedT+H6ZG48sxAILc4je8xiDQTbnFgaUTSzRxTd6Jv5NFNweX4ijLGCfDVkk7N26Fs8D0n7xHDUh/lVhhD3q/CHJKwvA7GbkOjTWSLAniAGXCYcxOTXDGIJMgaxHdLjiIgtiSngQvOQin8tSH0I675LiqUaAw+bO2d5UpQ8OZmKyVkS+harAKZBzD5uL22IOFkcpRZlmYuSyanaUPzeFGorpyClwTfZHNoOIGsRinsMMZ9ZmjlRPHNmsivGJ7ZmBT/FF8AnKAxWwE6hVY/YRNrWI8iGwyNuQsJ2VMthKEzI/EMUIIaHDDvlEjL9Kb43nXCMTfjK/IqGNM/Oe+ttWAtNYFnThZ/AuYWqESSbE0M3HGKNLUIRy03l/7NyCuYOgtxci7wxmSdeS6f+uyL8FP9EC39OwuaiPbNw++L2RwQfEYsFKSG7X7gjEkP1iAxxN0S2szdeWWpXFutUROaS4rsQd49oquHachroXpKZnHgR0vRfjo0k8zlFCRQX4hak2DbpES+1+d8UWD3SVydDR7mvDieEAR6B5RLfpzTxMmT4rtJR5iE7PRMy32g1SqHm79q4l11oMhOUwMy/PXtwX1o8vbm9hKmScbzuLr06zOdQQUrOcnLLpRZ+H4U4O1jWIdM9bEtKtXVQrktCRMwCMMJevVXqClH/u9bsvvWGEPMFDjghHmR4/YaQQMCucgW1D6UqciTDFORZEdcgzSjlVqsupHydna5ca+4LaV/nvtw6aAvga2hInU5pXxIyPkf5sCt1hNWQW2hizTchNQ8aQlbMwRo34TmxPkNCT4WpI1xLLkRADrFQgQSDIcI9Wy+VpbrSandrrWapjrHzabL044OaLFUxfgFaWyp1kRYraLC56EBduIUCWIieggWmaxYLhX9FYWNBUllo5IeQ4FjmafblhdmJIXsu1TK5YKkRO6XUoCrmAPvR3OHP8+Nby+RHRnuyJJBYkARuQm5O0dKH8c8ikD8tOGccidHQkXhzNoLPn74Ic2v9WQTOLy4pF/rwLiRHxDk2vbLqyoJkTYdnF9uVCmb72Gl1IfdmztOfR+HcQuULHb0EoBvjicNLJ34SpxmFHV70lJ04/niMjQMnMYZ3po7GmaPPnjLTOWC+BoI21InhKLZjEXWkGwOWalK7ib46tIm8zoc73iiVYACyAhLJkAQf9iUKn6QhEyjAxedh9a56X1W8popHIkNpbbexeg02GQvOEQ1pQ9W2WdBSjFWkYy06VPFGxDdhg0mMMDfp4yFRaJtns5Tje5ajHA2XgXpkY1l4iYkNiEEs1SEK+XiCvIpq9JRj1T7Ob1IF5Wg+Ij9NGfddPomxlYzeLWQSd+E804IRwQkr2jHR7ikTp/9O/mLQPvOww3gqlOUAOcQOrNLFGOmP0GfTYjk0u+BoCkSw2HIFGth/7CY6bUmqyhlPy55pUUANTD/AGQ6ogemFF4OlaXzO2Ju6zZidF0LB0rR9zuBi3Mb9cG4arKBgbm6Ws6JocXwyLyiGLI5PZsXehs3x8Xhe7lpQTkSWWcGXWGduEQ3Xqpe/EGQPDIhFhL+mEEM9QsSoFv6w85cZc9yxJthFaJrEBktsTLwGOfPorsYRqaCavv4w/yIL7zodYHhsM7L4Muq2j1VrzI5kGxeD5F/irJze9Mh0R9gP9L7jabzKdwSjudq2QaCRCBneZmxZpAftYjKgnFOjL/PCDYlTi2/AecqEB53aUx01wP0q46Zhb7iDIT+tydGJD6zr3E9K86D1jRXnhV1YDeJeTANHPhYkWARVWlVavtyRsBbBMqpe60qKfNDs1hqSEAsU9rfjqSvC1cIXUciGOzXx+3DBu1axiaM80C22IUcqT44+fjZdrg5xPkCePcYi1uGyYeIBgAeHavWU6YWWomoISNvkidDX8oxhdlzmaYYouawz8I2dBl+srkfqGPHrWCesPk/JKSRI9PtbaZMwmikhjX+nBSj8MwarwXqdtj8ay1gRdqa98NjqvlihqWw3yYtjmUvSMoKCjfBiJCW7X+I+JO/aTHeS6X7x8bpvd5jy9O2O0mzJjVJddsXFpyE+VB+dhJMeIy27CKiBXtCFUw0jfYObYQcSLF4igBsx4SkxBfFKS6YbAncApyrtmlTBPVF4E5I8CHSz+GFAIf7p6oh4oweNsiQL0fBSx4VEwcZdGKjDv51m/E8RyATqaloQqcOh+UBRh7pqu9AARipRyrJL9y1tkYSQLPwqAsJsYTvjZuS7dLPwywhkw9XsjHvPf6fu/SMKa6EadlnvPoac3iOjsenQy3NlSO6TYb7ADo2dx1fJxdpUrk7FdjdqVanRbnWlZuVQOWj+qNn6oCkL+gzbN7jt2yDMOiVegEVu4c7egPVmC3MiJkZpb0+qdDv83sPn7oY2eOHTGGws8ASPcd6x8Cbq+jLeF2nN0MZW0m1wsBbCKBmO3texnuc9OG9j1qd0fqX0Kohj09Yd/T69kvcun2hbE5cFb6RmOD63QQbqDDc9zGOy4I343Fi/9MwJrfU4H80dETnDaT6LW8VPb71WsRRjNM5yFdbVwcCiyj1FvC/J+mTGuHUbUl4caKqmkcDSiTXbUXoRZniDaFS3leklfhTHU3JGt/0L0MJnWLCEHyGwd0kNTQQ5leAvYNtnvFsU6y6/7Etu/TUCKY+M6TY+Vp1jpi5RjgoRmX1TOlaABoOAS6ffdF2HRO2xpsccjXAlbW9dXXrFJdO3MMdS9WGIN854BW/AZ96Fpz29PaxCsaHqTYWS7HLjgstQdcc92cIXEch5bVrPD1YDQDUM0wmGax7Kc3LFki8kBxRsjQCmI6eGDfOU+8LEnil5Yw+cRPs5ev1yRAa64d4b8w/v+iXuX7+UfxHBjs0czfpbFmZuF+xbkTvvDnTneHJURP6dgTlUjcH0nZX90K5jP3V9YAZeXW9Of/4vEvlNNLbfLn8e3drn5tpeeGTSHxKNThk+XYPrWGpapDfRyM6Js8POgD52CdyIvePD2X3SjTUPu1tnvvxSoOekDyx1PCYWq+H2huqg8EkU4jR9iisQKzUP8XTLwVqre0uSlUqrftBo0rNtFVL0GpRe0WIXsI41A/1yS5sYBlvoSHKtVK/dKdG7TOWwK8TFLTgfprblVrdVPtgTEtSGd8Wp1GudrpBkZt2bUE5awd17qVFqK6UOIyitPWryAI9bpS7tlyqHQooaWcwipKmf7li1VukKIOZh0yX4lthIhk0QC7NqFbue1cI92PAC5bYyLFRdN1KvwNWq1JalCr2NVTotuavs1aR6tUM119x5IqkuYTQvw8UgR/mQ//IKwUjhB9NVYfUfM7XtmsJM05HaJRktuRqEVTEDK++XMLC4GGu7DuTpFlC8JwX+atBHJbb42Ofa/N/+SN8iszcuFhE/xTkf+OXouT57y2e6/SF7zip7L13K6t8XWvXxOG/VH7J3H8FWyKrXVS5j90vX7qWQ3cDycssXApYDg/auDAlu5vFPKadEdWZ+XNXue5DRzOFkxF9xztJMI8eOOS5DE+DuPqzfIyfKE2j50tWyhnKVqSIDNhYh6KxX2OVAlOvNAShgL4idM+0tBR/PXgA6BNbCaDnT0pKA8f7HCDfzkfdsdxpWgi3ecsHjj35cfQPE+6qlq4YXtVNWPGTFw43girKo0EUvv3anqBZPVPqeeN0h6qhoTXYem3rgd1jTnDjc0NaTpanCCOJdYjviFYg7+C/ryjI3EkVKLKc//+piQqH0W0/JbBwL6ojKMzuSIqr4DMSp17wMKqdQgJXAMqPupn6L37/+6uJqOQGx91XrKMlfLP8PjaXUZQ==", + "view": { + "recursion": "bytes", + "enum": "number", + "yt_mode": true + } +} +@@; + +$udfParseVar = Udf(Protobuf::Parse, $configVar as TypeConfig); +$udfSerializeVar = Udf(Protobuf::Serialize, $configVar as TypeConfig); + +$udfParseSep = Udf(Protobuf::Parse, $configSeparate as TypeConfig); +$udfSerializeSep = Udf(Protobuf::Serialize, $configSeparate as TypeConfig); + +SELECT + TestField, + $udfParseVar(TestField), + $udfSerializeVar($udfParseVar(TestField)), + Ensure("Success", StablePickle($udfParseVar(TestField)) == StablePickle($udfParseVar($udfSerializeVar($udfParseVar(TestField)))), "Fail"), + $udfParseSep(TestField), + $udfSerializeSep($udfParseSep(TestField)), + Ensure("Success", StablePickle($udfParseSep(TestField)) == StablePickle($udfParseSep($udfSerializeSep($udfParseSep(TestField)))), "Fail"), +FROM plato.Input; + diff --git a/yql/essentials/udfs/common/protobuf/test/ya.make b/yql/essentials/udfs/common/protobuf/test/ya.make new file mode 100644 index 00000000000..e44cb5458c9 --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/protobuf) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/protobuf/ya.make b/yql/essentials/udfs/common/protobuf/ya.make new file mode 100644 index 00000000000..714ad77137f --- /dev/null +++ b/yql/essentials/udfs/common/protobuf/ya.make @@ -0,0 +1,23 @@ +YQL_UDF_CONTRIB(protobuf_udf) + +YQL_ABI_VERSION( + 2 + 9 + 0 +) + +SRCS( + protobuf_udf.cpp +) + +PEERDIR( + library/cpp/protobuf/yql + yql/essentials/minikql/protobuf_udf + yql/essentials/public/udf +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.c b/yql/essentials/udfs/common/python/bindings/py27_backports.c new file mode 100644 index 00000000000..cf21a97cef0 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.c @@ -0,0 +1,91 @@ +#include "py27_backports.h" + + +// Provide implementations from python 2.7.15 as backports + +int +_PySlice_Unpack(PyObject *_r, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) +{ + PySliceObject *r = (PySliceObject *)_r; + /* this is harder to get right than you might think */ + + assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX); + + if (r->step == Py_None) { + *step = 1; + } + else { + if (!_PyEval_SliceIndex(r->step, step)) return -1; + if (*step == 0) { + PyErr_SetString(PyExc_ValueError, + "slice step cannot be zero"); + return -1; + } + /* Here *step might be -PY_SSIZE_T_MAX-1; in this case we replace it + * with -PY_SSIZE_T_MAX. This doesn't affect the semantics, and it + * guards against later undefined behaviour resulting from code that + * does "step = -step" as part of a slice reversal. + */ + if (*step < -PY_SSIZE_T_MAX) + *step = -PY_SSIZE_T_MAX; + } + + if (r->start == Py_None) { + *start = *step < 0 ? PY_SSIZE_T_MAX : 0; + } + else { + if (!_PyEval_SliceIndex(r->start, start)) return -1; + } + + if (r->stop == Py_None) { + *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX; + } + else { + if (!_PyEval_SliceIndex(r->stop, stop)) return -1; + } + + return 0; +} + +Py_ssize_t +_PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step) +{ + /* this is harder to get right than you might think */ + + assert(step != 0); + assert(step >= -PY_SSIZE_T_MAX); + + if (*start < 0) { + *start += length; + if (*start < 0) { + *start = (step < 0) ? -1 : 0; + } + } + else if (*start >= length) { + *start = (step < 0) ? length - 1 : length; + } + + if (*stop < 0) { + *stop += length; + if (*stop < 0) { + *stop = (step < 0) ? -1 : 0; + } + } + else if (*stop >= length) { + *stop = (step < 0) ? length - 1 : length; + } + + if (step < 0) { + if (*stop < *start) { + return (*start - *stop - 1) / (-step) + 1; + } + } + else { + if (*start < *stop) { + return (*stop - *start - 1) / step + 1; + } + } + return 0; +} diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.h b/yql/essentials/udfs/common/python/bindings/py27_backports.h new file mode 100644 index 00000000000..766af6a76fa --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.h @@ -0,0 +1,26 @@ +#pragma once + +#include "Python.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Declare functions which are to be backported +// (see details about need for backports in ya.make) + +int _PySlice_Unpack(PyObject *slice, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); + +Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t step); + +// Declare py23 compatible names + +#define PySlice_Unpack _PySlice_Unpack +#define PySlice_AdjustIndices _PySlice_AdjustIndices + +#ifdef __cplusplus +} +#endif diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.cpp b/yql/essentials/udfs/common/python/bindings/py_callable.cpp new file mode 100644 index 00000000000..c60403bdca2 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_callable.cpp @@ -0,0 +1,423 @@ +#include "py_callable.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_stream.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { +namespace { + +////////////////////////////////////////////////////////////////////////////// +// TPyCallableObject +////////////////////////////////////////////////////////////////////////////// +struct TPyCallableObject +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* Type; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + NUdf::TCallableTypeInspector Inspector; + + TPyCallableObject(const TPyCastContext::TPtr& castCtx, const NUdf::TType* type) + : CastCtx(castCtx) + , Type(type) + , Inspector(*castCtx->PyCtx->TypeInfoHelper, type) + {} +}; + +inline TPyCallableObject* CastToCallable(PyObject* o) +{ + return reinterpret_cast<TPyCallableObject*>(o); +} + +void CallableDealloc(PyObject* self) +{ + delete CastToCallable(self); +} + +PyObject* CallableRepr(PyObject*) +{ + // TODO: print callable signature + return PyRepr("<yql.TCallable>").Release(); +} + +PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs) +{ + Y_UNUSED(kwargs); + + PY_TRY { + TPyCallableObject* callable = CastToCallable(self); + auto callableType = callable->Type; + auto valueBuilder = callable->CastCtx->ValueBuilder; + const auto& inspector = callable->Inspector; + + TSmallVec<NUdf::TUnboxedValue> cArgs; + cArgs.resize(inspector.GetArgsCount()); + FromPyArgs(callable->CastCtx, callableType, args, cArgs.data(), inspector); + + NUdf::TUnboxedValue result; + { + TPyGilUnlocker unlock; + result = NUdf::TBoxedValueAccessor::Run(*callable->Value.Get(), valueBuilder, cArgs.data()); + } + + return ToPyObject(callable->CastCtx, inspector.GetReturnType(), result).Release(); + } PY_CATCH(nullptr) +} + +} + +PyTypeObject PyCallableType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TCallable"), + INIT_MEMBER(tp_basicsize , sizeof(TPyCallableObject)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , CallableDealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , CallableRepr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , CallableCall), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , 0), + INIT_MEMBER(tp_doc , "yql.TCallable object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , nullptr), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyCallable +////////////////////////////////////////////////////////////////////////////// +class TPyCallable: public NUdf::TBoxedValue +{ +public: + TPyCallable( + PyObject* function, + const NUdf::TType* functionType, + const TPyCastContext::TPtr& castCtx) + : Function_(function, TPyObjectPtr::ADD_REF) + , FunctionType_(functionType) + , CastCtx_(castCtx) + , Inspector_(*castCtx->PyCtx->TypeInfoHelper, functionType) + { + // keep ownership of function closure if any + if (PyFunction_Check(function)) { + PyObject* closure = PyFunction_GetClosure(function); + if (closure) { + Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF); + } + } + } + + ~TPyCallable() { + TPyGilLocker lock; + Closure_.Reset(); + Function_.Reset(); + CastCtx_.Reset(); + } + +private: + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder*, + const NUdf::TUnboxedValuePod* args) const final + { + TPyGilLocker lock; + try { + TPyObjectPtr pyArgs = ToPyArgs(CastCtx_, FunctionType_, args, Inspector_); + TPyObjectPtr resultObj = + PyObject_CallObject(Function_.Get(), pyArgs.Get()); + if (!resultObj) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + + auto returnType = Inspector_.GetReturnType(); + if (CastCtx_->PyCtx->TypeInfoHelper->GetTypeKind(returnType) == NUdf::ETypeKind::Stream) { + return FromPyStream(CastCtx_, returnType, resultObj, Function_, Closure_, pyArgs); + } + + return FromPyObject(CastCtx_, returnType, resultObj.Get()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" << e.what()).data()); + } + } + + TPyObjectPtr Function_; + TPyObjectPtr Closure_; + const NUdf::TType* FunctionType_; + TPyCastContext::TPtr CastCtx_; + NUdf::TCallableTypeInspector Inspector_; +}; + + +TPyObjectPtr ToPyCallable( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + TPyCallableObject* callable = new TPyCallableObject(castCtx, type); + PyObject_INIT(callable, &PyCallableType); + + callable->Value.Set(castCtx->PyCtx, value.AsBoxed()); + + return reinterpret_cast<PyObject*>(callable); +} + +NUdf::TUnboxedValue FromPyCallable( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) +{ + return NUdf::TUnboxedValuePod(new TPyCallable(value, type, castCtx)); +} + +TMaybe<TPyObjectPtr> GetOptionalAttribute(PyObject* value, const char* attrName) { + if (TPyObjectPtr attr = PyObject_GetAttrString(value, attrName)) { + return attr; + } else { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + return Nothing(); + } else { + throw yexception() << "Cannot get attribute '" << attrName << "', error: " << GetLastErrorAsString(); + } + } +} + + +struct TPySecureParam +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + + TPySecureParam(const TPyCastContext::TPtr& castCtx) : CastCtx(castCtx) {} +}; + +inline TPySecureParam* CastToSecureParam(PyObject* o) +{ + return reinterpret_cast<TPySecureParam*>(o); +} + +void SecureParamDealloc(PyObject* self) +{ + delete CastToSecureParam(self); +} + +PyObject* SecureParamRepr(PyObject*) +{ + return PyRepr("<yql.TSecureParam>").Release(); +} + +PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs) +{ + Y_UNUSED(kwargs); + + struct PyBufDeleter { + void operator() (Py_buffer* view) { PyBuffer_Release(view); } + }; + Py_buffer input; + if (!PyArg_ParseTuple(args, "s*", &input)) { + return nullptr; + } + std::unique_ptr<Py_buffer, PyBufDeleter> bufPtr(&input); + auto valueBuilder = CastToSecureParam(self)->CastCtx->ValueBuilder; + NUdf::TStringRef key(static_cast<const char*>(input.buf), input.len); + PY_TRY { + if (!valueBuilder->GetSecureParam(key, key)) { + throw yexception() << "Cannot get secure parameter for key: " << key; + } + return PyRepr(TStringBuf(key.Data(), key.Size())).Release(); + } PY_CATCH(nullptr) +} + +static PyTypeObject PySecureParamType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TSecureParam"), + INIT_MEMBER(tp_basicsize , sizeof(TPySecureParam)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , SecureParamDealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , SecureParamRepr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , SecureParamCall), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , 0), + INIT_MEMBER(tp_doc , "yql.TSecureParam object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , nullptr), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +TPyObjectPtr ToPySecureParam(const TPyCastContext::TPtr& castCtx) +{ + TPySecureParam* ret = new TPySecureParam(castCtx); + PyObject_INIT(ret, &PySecureParamType); + return reinterpret_cast<PyObject*>(ret); +} + + +void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value) { + if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) try { + castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get()); + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what(); + } + + if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) try { + Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get()); + if (itemsCount != 2) { + throw yexception() << "Expected tuple of 2 callables"; + } + + castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0)); + castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1)); + if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) { + throw yexception() << "Expected tuple of 2 callables"; + } + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what(); + } + + if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) try { + PyObject* bytesValue = nullptr; + if (PyBytes_Check(bytesDecodeMode->Get())) { + bytesValue = PyObject_Bytes(bytesDecodeMode->Get()); + } else if (PyUnicode_Check(bytesDecodeMode->Get())) { + bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get()); + } else { + throw yexception() << "Expected bytes or unicode"; + } + if (!bytesValue) { + PyErr_Clear(); + throw yexception() << "Failed to convert to bytes"; + } + + TStringBuf view(PyBytes_AS_STRING(bytesValue)); + if (view == "never") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Never; + } else if (view == "strict") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Strict; + } else { + Py_DECREF(bytesValue); + throw yexception() << "Expected values 'never' or 'strict'"; + } + Py_DECREF(bytesValue); + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what(); + } + + if (PyObject_SetAttrString(value, "_yql_secure_param", ToPySecureParam(castCtx).Get()) != 0) { + throw yexception() << "Cannot set attribute '_yql_secure_param'"; + } +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.h b/yql/essentials/udfs/common/python/bindings/py_callable.h new file mode 100644 index 00000000000..4ce79e1d7f4 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_callable.h @@ -0,0 +1,22 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyCallableType; + +TPyObjectPtr ToPyCallable( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyCallable( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp new file mode 100644 index 00000000000..1c58d7b3714 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp @@ -0,0 +1,87 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyCallableTest) { + struct TTestCallable: public NUdf::TBoxedValue { + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override + { + Y_UNUSED(valueBuilder); + return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); + } + }; + + Y_UNIT_TEST(FromPyFunction) { + TPythonTestEngine engine; + const NUdf::IValueBuilder* vb = &engine.GetValueBuilder(); + + engine.ToMiniKQL<char* (*)(char*, ui32)>( + "def Test():\n" + " def test(str, count):\n" + " return str * count\n" + " return test", + [vb](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue args[2]; + args[0] = vb->NewString("j"); + args[1] = NUdf::TUnboxedValuePod((ui32) 5); + auto result = value.Run(vb, args); + + UNIT_ASSERT(result); + UNIT_ASSERT(5 == result.AsStringRef().Size()); + UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj"); + }); + } + + Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value):\n" + " assert type(value).__name__ == 'TCallable'\n" + " assert value.__call__ != None\n" + " assert value(-2) == 40\n" + " assert value(-1) == 41\n" + " assert value(0) == 42\n" + " assert value(1) == 43\n" + " assert value(2) == 44\n"); + } + + Y_UNIT_TEST(ToPythonAndBack) { + struct TTestCallable: public NUdf::TBoxedValue { + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override + { + Y_UNUSED(valueBuilder); + return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); + } + }; + + TPythonTestEngine engine; + engine.ToPythonAndBack<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32) 5); + const auto result = value.Run(nullptr, &arg); + + UNIT_ASSERT(result); + UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>()); + }); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp new file mode 100644 index 00000000000..3aa5537b21b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp @@ -0,0 +1,955 @@ +#include "py_cast.h" +#include "py_ptr.h" +#include "py_errors.h" +#include "py_callable.h" +#include "py_dict.h" +#include "py_list.h" +#include "py_gil.h" +#include "py_utils.h" +#include "py_void.h" +#include "py_resource.h" +#include "py_stream.h" +#include "py_struct.h" +#include "py_tuple.h" +#include "py_variant.h" +#include "py_decimal.h" + +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_type_printer.h> +#include <yql/essentials/public/udf/udf_terminator.h> +#include <yql/essentials/utils/utf8.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/string/join.h> +#include <util/string/builder.h> + +#ifdef HAVE_LONG_LONG +# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask +# define YQL_PyLong_Asi64 PyLong_AsLongLong +# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong +#else +# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask +# define YQL_PyLong_Asi64 PyLong_AsLong +# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong +#endif + +#define TO_PYTHON(Format, Type) \ + template <> \ + ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \ + return Py_BuildValue(Format, value); \ + } + +#define TO_PYTHON_BYTES(Type) \ + template <> \ + ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ + const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ + return PyBytes_FromStringAndSize(value.data(), size); \ + } + +#define TO_PYTHON_UNICODE(Type) \ + template <> \ + ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ + Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ + return PyUnicode_FromStringAndSize(value.data(), size); \ + } + +#define PY_ENSURE_TYPE(Type, Value, Message) \ + do { \ + if (!Py##Type##_Check(Value)) { \ + throw yexception() << Message << " " #Type "; Object repr: " \ + << PyObjectRepr(Value); \ + } \ + } while (0) + +#define FROM_PYTHON_FLOAT(Type) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + double result = PyFloat_AsDouble(value); \ + if (result == -1.0 && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Float"); \ + } \ + return static_cast<Type>(result); \ + } + +#define FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ + } + +#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if ( \ + static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \ + static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \ + ) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ + } + +#define FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char *str = nullptr; \ + const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } \ + ThrowCastTypeException(value, "String"); \ + } + +#define FROM_PYTHON_BYTES(Type) \ + template <> \ + Type PyCast<Type>(PyObject* value) { \ + PY_ENSURE_TYPE(Bytes, value, "Expected"); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } + +#define TRY_FROM_PYTHON_FLOAT(Type) \ + template <> \ + bool TryPyCast<Type>(PyObject* value, Type& result) { \ + double v = PyFloat_AsDouble(value); \ + if (v == -1.0 && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + result = static_cast<Type>(v); \ + return true; \ + } + +#define TRY_FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject* value, Type& res) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ + } + +#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject* value, Type& res) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + res = static_cast<Type>(result); \ + if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \ + return false; \ + } \ + return true; \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ + } + +#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char *str = nullptr; \ + const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } \ + return false; \ + } + +#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + int rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else { \ + return false; \ + } \ + } + +namespace NPython { + +using namespace NKikimr; + +inline void ThrowCastTypeException(PyObject* value, TStringBuf toType) { + throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to " << toType + << "; Object repr: " << PyObjectRepr(value); +} + +inline void ThrowCastException(PyObject* value, TStringBuf toType) { + throw yexception() << "Cast error object " << PyObjectRepr(value) << " to " << toType << ": " + << GetLastErrorAsString(); +} + + +template <> +bool TryPyCast<bool>(PyObject* value, bool& result) +{ + int isTrue = PyObject_IsTrue(value); + if (isTrue == -1) { + return false; + } + result = (isTrue == 1); + return true; +} + +#if PY_MAJOR_VERSION >= 3 +TRY_FROM_PYTHON_LONG(i8, i64) +TRY_FROM_PYTHON_LONG(ui8, ui64) +TRY_FROM_PYTHON_LONG(i16, i64) +TRY_FROM_PYTHON_LONG(ui16, ui64) +TRY_FROM_PYTHON_LONG(i32, i64) +TRY_FROM_PYTHON_LONG(ui32, ui64) +TRY_FROM_PYTHON_LONG(i64, i64) +TRY_FROM_PYTHON_LONG(ui64, ui64) +TRY_FROM_PYTHON_BYTES_OR_UTF(TString) +TRY_FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef) +#else +TRY_FROM_PYTHON_INT_OR_LONG(i8, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui8, ui64) +TRY_FROM_PYTHON_INT_OR_LONG(i16, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui16, ui64) +TRY_FROM_PYTHON_INT_OR_LONG(i32, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui32, ui64) +TRY_FROM_PYTHON_INT_OR_LONG(i64, i64) +TRY_FROM_PYTHON_INT_OR_LONG(ui64, ui64) +TRY_FROM_PYTHON_STR_OR_UTF(TString) +TRY_FROM_PYTHON_STR_OR_UTF(NUdf::TStringRef) +#endif + +TRY_FROM_PYTHON_FLOAT(float) +TRY_FROM_PYTHON_FLOAT(double) + +template <> +bool PyCast<bool>(PyObject* value) +{ + int res = PyObject_IsTrue(value); + if (res == -1) { + throw yexception() << "Can't cast object '" << Py_TYPE(value)->tp_name << "' to bool. " + << GetLastErrorAsString(); + } + return res == 1; +} + +#if PY_MAJOR_VERSION >= 3 +FROM_PYTHON_LONG(i8, i64) +FROM_PYTHON_LONG(ui8, ui64) +FROM_PYTHON_LONG(i16, i64) +FROM_PYTHON_LONG(ui16, ui64) +FROM_PYTHON_LONG(i32, i64) +FROM_PYTHON_LONG(ui32, ui64) +FROM_PYTHON_LONG(i64, i64) +FROM_PYTHON_LONG(ui64, ui64) +FROM_PYTHON_BYTES_OR_UTF(TString) +FROM_PYTHON_BYTES_OR_UTF(TStringBuf) +FROM_PYTHON_BYTES_OR_UTF(NUdf::TStringRef) +#else +FROM_PYTHON_INT_OR_LONG(i8, i64) +FROM_PYTHON_INT_OR_LONG(ui8, ui64) +FROM_PYTHON_INT_OR_LONG(i16, i64) +FROM_PYTHON_INT_OR_LONG(ui16, ui64) +FROM_PYTHON_INT_OR_LONG(i32, i64) +FROM_PYTHON_INT_OR_LONG(ui32, ui64) +FROM_PYTHON_INT_OR_LONG(i64, i64) +FROM_PYTHON_INT_OR_LONG(ui64, ui64) +FROM_PYTHON_BYTES(TString) +FROM_PYTHON_BYTES(TStringBuf) +FROM_PYTHON_BYTES(NUdf::TStringRef) +#endif + +FROM_PYTHON_FLOAT(float) +FROM_PYTHON_FLOAT(double) + +template <> +TPyObjectPtr PyCast<bool>(bool value) +{ + PyObject* res = value ? Py_True : Py_False; + return TPyObjectPtr(res, TPyObjectPtr::ADD_REF); +} + +TO_PYTHON("b", i8) +TO_PYTHON("B", ui8) +TO_PYTHON("h", i16) +TO_PYTHON("H", ui16) +TO_PYTHON("i", i32) +TO_PYTHON("I", ui32) +#ifdef HAVE_LONG_LONG +TO_PYTHON("L", i64) +TO_PYTHON("K", ui64) +#else +TO_PYTHON("l", i64) +TO_PYTHON("k", ui64) +#endif + +TO_PYTHON_BYTES(TString) +TO_PYTHON_BYTES(TStringBuf) +TO_PYTHON_BYTES(NUdf::TStringRef) +TO_PYTHON_UNICODE(TString) +TO_PYTHON_UNICODE(TStringBuf) +TO_PYTHON_UNICODE(NUdf::TStringRef) + +template <typename T> +NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) { + PY_ENSURE(PyTuple_Check(value), + "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); + + Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); + PY_ENSURE(tupleSize == 2, + "Expected to get Tuple with 2 elements, but got " + << tupleSize << " elements"); + + PyObject* el0 = PyTuple_GET_ITEM(value, 0); + PyObject* el1 = PyTuple_GET_ITEM(value, 1); + auto num = PyCast<T>(el0); + if (num >= limit) { + throw yexception() << "Python object " << PyObjectRepr(el0) \ + << " is out of range for " << typeName; + } + + auto name = PyCast<NUdf::TStringRef>(el1); + auto ret = NUdf::TUnboxedValuePod(num); + ui32 tzId; + if (!ctx->ValueBuilder->GetDateBuilder().FindTimezoneId(name, tzId)) { + throw yexception() << "Unknown timezone: " << TStringBuf(name); + } + + ret.SetTimezoneId(tzId); + return ret; +} + +TO_PYTHON("f", float) +TO_PYTHON("d", double) + +namespace { + +TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto typeId = inspector.GetTypeId(); + + switch (typeId) { + case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>()); + case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>()); + case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>()); + case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>()); + case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>()); + case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>()); + case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>()); + case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<const char*>::Id: { + if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) { + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + } else { + auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << + "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" << + GetLastErrorAsString()).data() + ); + } + return pyObj; + } + } + case NUdf::TDataType<NUdf::TYson>::Id: { + auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef()); + if (ctx->YsonConverterIn) { + TPyObjectPtr pyArgs(PyTuple_New(1)); + PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release()); + pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + } + + return pyObj; + } + case NUdf::TDataType<NUdf::TUuid>::Id: + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: + return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<NUdf::TTzDate>::Id: { + TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + } + + throw yexception() + << "Unsupported type " << typeId; +} + +NUdf::TUnboxedValue FromPyData( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto typeId = inspector.GetTypeId(); + + switch (typeId) { + case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value)); + case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value)); + case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value)); + case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value)); + case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value)); + case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value)); + case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value)); + case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value)); + case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value)); + case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value)); + case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value)); + case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<NUdf::TYson>::Id: { + if (ctx->YsonConverterOut) { + TPyObjectPtr input(value, TPyObjectPtr::ADD_REF); + TPyObjectPtr pyArgs(PyTuple_New(1)); + // PyTuple_SET_ITEM steals reference, so pass ownership to it + PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release()); + input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get())); + if (!input) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get())); + } + } +#if PY_MAJOR_VERSION >= 3 + case NUdf::TDataType<const char*>::Id: + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + if (PyUnicode_Check(value)) { + const TPyObjectPtr uif8(PyUnicode_AsUTF8String(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); + } + throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; +#else + case NUdf::TDataType<const char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: { + if (PyUnicode_Check(value)) { + const TPyObjectPtr utf8(PyUnicode_AsUTF8String(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); + } + + if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) && + PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) { + throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json"; + } + + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + } +#endif + case NUdf::TDataType<NUdf::TUuid>::Id: { + const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + if (ret.AsStringRef().Size() != 16) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " has invalid value for Uuid"; + } + + return ret; + } + case NUdf::TDataType<NUdf::TDate>::Id: { + auto num = PyCast<ui16>(value); + if (num >= NUdf::MAX_DATE) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Date"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TDatetime>::Id: { + auto num = PyCast<ui32>(value); + if (num >= NUdf::MAX_DATETIME) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Datetime"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TTimestamp>::Id: { + auto num = PyCast<ui64>(value); + if (num >= NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Timestamp"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TInterval>::Id: { + auto num = PyCast<i64>(value); + if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for Interval"; + } + + return NUdf::TUnboxedValuePod(num); + } + + case NUdf::TDataType<NUdf::TTzDate>::Id: + return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx); + case NUdf::TDataType<NUdf::TTzDatetime>::Id: + return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx); + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx); + } + + throw yexception() + << "Unsupported type " << typeId; +} + +TPyObjectPtr ToPyList( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto itemType = inspector.GetItemType(); + + if (ctx->LazyInputObjects) { + return ToPyLazyList(ctx, itemType, value); + } + + TPyObjectPtr list(PyList_New(0)); + const auto iterator = value.GetListIterator(); + for (NUdf::TUnboxedValue item; iterator.Next(item);) { + auto pyItem = ToPyObject(ctx, itemType, item); + if (PyList_Append(list.Get(), pyItem.Get()) < 0) { + throw yexception() << "Can't append item to list" + << GetLastErrorAsString(); + } + } + + return list; +} + +NUdf::TUnboxedValue FromPyList( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + + if (PyList_Check(value)) { + // eager list to list conversion + auto itemType = inspector.GetItemType(); + Py_ssize_t cnt = PyList_GET_SIZE(value); + NUdf::TUnboxedValue *items = nullptr; + const auto list = ctx->ValueBuilder->NewArray(cnt, items); + for (Py_ssize_t i = 0; i < cnt; ++i) { + PyObject *item = PyList_GET_ITEM(value, i); + *items++ = FromPyObject(ctx, itemType, item); + } + return list; + } + + if (PyTuple_Check(value)) { + // eager tuple to list conversion + auto itemType = inspector.GetItemType(); + Py_ssize_t cnt = PyTuple_GET_SIZE(value); + NUdf::TUnboxedValue *items = nullptr; + const auto list = ctx->ValueBuilder->NewArray(cnt, items); + for (Py_ssize_t i = 0; i < cnt; ++i) { + PyObject *item = PyTuple_GET_ITEM(value, i); + *items++ = FromPyObject(ctx, itemType, item); + } + return list; + } + + if (PyGen_Check(value)) { + TPyObjectPtr valuePtr(PyObject_GetIter(value)); + return FromPyLazyIterator(ctx, type, std::move(valuePtr)); + } + + if (PyIter_Check(value) +#if PY_MAJOR_VERSION < 3 + // python 2 iterators must also implement "next" method + && 1 == PyObject_HasAttrString(value, "next") +#endif + ) { + TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF); + return FromPyLazyIterator(ctx, type, std::move(valuePtr)); + } + + // assume that this function will returns generator + if (PyCallable_Check(value)) { + TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF); + return FromPyLazyGenerator(ctx, type, std::move(valuePtr)); + } + + if (PySequence_Check(value) || PyObject_HasAttrString(value, "__iter__")) { + TPyObjectPtr valuePtr(value, TPyObjectPtr::ADD_REF); + return FromPyLazyIterable(ctx, type, std::move(valuePtr)); + } + + throw yexception() << "Expected list, tuple, generator, generator factory, " + "iterator or iterable object, but got: " << PyObjectRepr(value); +} + +TPyObjectPtr ToPyOptional( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + if (!value) { + return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF); + } + + const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + return ToPyObject(ctx, inspector.GetItemType(), value); +} + +NUdf::TUnboxedValue FromPyOptional( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + if (value == Py_None) { + return NUdf::TUnboxedValue(); + } + + const NUdf::TOptionalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + return FromPyObject(ctx, inspector.GetItemType(), value).Release().MakeOptional(); +} + +TPyObjectPtr ToPyDict( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto keyType = inspector.GetKeyType(); + const auto valueType = inspector.GetValueType(); + + if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) { + if (ctx->LazyInputObjects) { // TODO + return ToPyLazySet(ctx, keyType, value); + } + + const TPyObjectPtr set(PyFrozenSet_New(nullptr)); + const auto iterator = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; iterator.Next(key);) { + auto pyKey = ToPyObject(ctx, keyType, key); + if (PySet_Add(set.Get(), pyKey.Get()) < 0) { + throw yexception() << "Can't add item to set" << GetLastErrorAsString(); + } + } + + return set; + } else { + if (ctx->LazyInputObjects) { + return ToPyLazyDict(ctx, keyType, valueType, value); + } + + const TPyObjectPtr dict(PyDict_New()); + const auto iterator = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, valueObj; iterator.NextPair(key, valueObj);) { + auto pyKey = ToPyObject(ctx, keyType, key); + auto pyValue = ToPyObject(ctx, valueType, valueObj); + if (PyDict_SetItem(dict.Get(), pyKey.Get(), pyValue.Get()) < 0) { + throw yexception() << "Can't add item to dict" << GetLastErrorAsString(); + } + } + + return dict; + } +} + +NUdf::TUnboxedValue FromPyDict( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto keyType = inspector.GetKeyType(); + const auto valueType = inspector.GetValueType(); + + if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType) + && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) { + const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType); + if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) { + return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value); + } + } else if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) { + if (PyAnySet_Check(value)) { + return FromPySet(ctx, keyType, value); + } else if (value->ob_type->tp_as_sequence && value->ob_type->tp_as_sequence->sq_contains) { + return FromPySequence(ctx, keyType, value); + } + } else if (PyDict_Check(value)) { + return FromPyDict(ctx, keyType, valueType, value); + } else if (PyMapping_Check(value)) { + return FromPyMapping(ctx, keyType, valueType, value); + } + + throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict."; +} + +} // namespace + +TPyObjectPtr ToPyObject( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { + case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value); + case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: return ToPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value); + case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value); + default: { + ::TStringBuilder sb; + sb << "Failed to export: "; + NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out); + throw yexception() << sb; + } + } +} + +NUdf::TUnboxedValue FromPyObject( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { + case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value); + case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: return FromPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); + case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value); + default: { + ::TStringBuilder sb; + sb << "Failed to import: "; + NUdf::TTypePrinter(*ctx->PyCtx->TypeInfoHelper, type).Out(sb.Out); + throw yexception() << sb; + } + } +} + +TPyObjectPtr ToPyArgs( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod* args, + const NUdf::TCallableTypeInspector& inspector) +{ + const auto argsCount = inspector.GetArgsCount(); + TPyObjectPtr tuple(PyTuple_New(argsCount)); + + for (ui32 i = 0; i < argsCount; i++) { + auto arg = ToPyObject(ctx, inspector.GetArgType(i), args[i]); + PyTuple_SET_ITEM(tuple.Get(), i, arg.Release()); + } + + return tuple; +} + +void FromPyArgs( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* pyArgs, + NUdf::TUnboxedValue* cArgs, + const NUdf::TCallableTypeInspector& inspector) +{ + PY_ENSURE_TYPE(Tuple, pyArgs, "Expected"); + + const auto argsCount = inspector.GetArgsCount(); + const auto optArgsCount = inspector.GetOptionalArgsCount(); + + ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs)); + PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount, + "arguments count missmatch: " + "min " << (argsCount - optArgsCount) << ", max " << argsCount + << ", got " << pyArgsCount); + + for (ui32 i = 0; i < pyArgsCount; i++) { + PyObject* item = PyTuple_GET_ITEM(pyArgs, i); + cArgs[i] = FromPyObject(ctx, inspector.GetArgType(i), item); + } + + for (ui32 i = pyArgsCount; i < argsCount; i++) { + cArgs[i] = NUdf::TUnboxedValuePod(); + } +} + +class TDummyMemoryLock : public IMemoryLock { +public: + void Acquire() override {} + void Release() override {} +}; + +TPyCastContext::TPyCastContext( + const NKikimr::NUdf::IValueBuilder* builder, + TPyContext::TPtr pyCtx, + THolder<IMemoryLock> memoryLock) + : ValueBuilder(builder) + , PyCtx(std::move(pyCtx)) + , MemoryLock(std::move(memoryLock)) +{ + if (!MemoryLock) { + MemoryLock = MakeHolder<TDummyMemoryLock>(); + } +} + +TPyCastContext::~TPyCastContext() { + TPyGilLocker locker; + StructTypes.clear(); + YsonConverterIn.Reset(); + YsonConverterOut.Reset(); + TimezoneNames.clear(); +} + +const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) { + auto& x = TimezoneNames[id]; + if (!x) { + NKikimr::NUdf::TStringRef ref; + if (!ValueBuilder->GetDateBuilder().FindTimezoneName(id, ref)) { + throw yexception() << "Unknown timezone id: " << id; + } + + x = PyRepr(ref); + } + + return x; +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.h b/yql/essentials/udfs/common/python/bindings/py_cast.h new file mode 100644 index 00000000000..e6850c74040 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_cast.h @@ -0,0 +1,45 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +#include <util/generic/typetraits.h> + +namespace NPython { + +template <typename T> +TPyObjectPtr PyCast(typename TTypeTraits<T>::TFuncParam value); + +template <typename T> +T PyCast(PyObject* value); + +template <typename T> +bool TryPyCast(PyObject* value, T& result); + +template <typename T> +TPyObjectPtr ToPyUnicode(const T& value); + +TPyObjectPtr ToPyObject( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyObject( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +TPyObjectPtr ToPyArgs( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod* args, + const NKikimr::NUdf::TCallableTypeInspector& inspector); + +void FromPyArgs( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* pyArgs, + NKikimr::NUdf::TUnboxedValue* cArgs, + const NKikimr::NUdf::TCallableTypeInspector& inspector); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp new file mode 100644 index 00000000000..47f65ab6fab --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp @@ -0,0 +1,90 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyCastTest) { + Y_UNIT_TEST(FromPyStrToInt) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<i32>( + "def Test():\n" + " return '123a'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "str"); + } + + Y_UNIT_TEST(FromPyTupleToLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return 1, 1", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "tuple"); + } + + Y_UNIT_TEST(FromPyFuncToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def f():\n" + " return 42\n" + "def Test():\n" + " return f", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "function"); + } + + Y_UNIT_TEST(FromPyNoneToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def Test():\n" + " return None", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "None"); + } + + Y_UNIT_TEST(BadFromPythonFloat) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<float>( + "def Test():\n" + " return '3 <dot> 1415926'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object '3 <dot> 1415926' to Float"); + } + +#if PY_MAJOR_VERSION >= 3 +# define RETVAL "-1" +#else +# define RETVAL "-18446744073709551616L" +#endif + + Y_UNIT_TEST(BadFromPythonLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return " RETVAL, + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object " RETVAL " to Long"); + } + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_ctx.h b/yql/essentials/udfs/common/python/bindings/py_ctx.h new file mode 100644 index 00000000000..9e86042908f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_ctx.h @@ -0,0 +1,120 @@ +#pragma once + +#include "py_ptr.h" + +#include <yql/essentials/public/udf/udf_types.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_string.h> + +#include <util/generic/ptr.h> +#include <util/generic/intrlist.h> + +#include <unordered_map> + +namespace NPython { + +enum class EBytesDecodeMode { + Never, + Strict, +}; + +class IMemoryLock { +public: + virtual ~IMemoryLock() = default; + virtual void Acquire() = 0; + virtual void Release() = 0; +}; + +struct TPyCleanupListItemBase: public TIntrusiveListItem<TPyCleanupListItemBase> { + virtual ~TPyCleanupListItemBase() = default; + virtual void Cleanup() = 0; +}; + +template <typename TValueType> +class TPyCleanupListItem: public TPyCleanupListItemBase { +public: + TPyCleanupListItem() = default; + virtual ~TPyCleanupListItem() { + Unlink(); + } + + void Cleanup() override { + Value = {}; + } + + template <typename TCtx> + void Set(const TIntrusivePtr<TCtx>& ctx, TValueType val) { + Value = std::move(val); + ctx->CleanupList.PushBack(this); + } + + bool IsSet() const { + return !!Value; + } + + const TValueType& Get() const { + if (!Value) { + throw yexception() << "Trying to use python wrap object with destroyed yql value"; + } + return Value; + } + +private: + TValueType Value; +}; + +struct TPyContext: public TSimpleRefCount<TPyContext> { + const NKikimr::NUdf::ITypeInfoHelper::TPtr TypeInfoHelper; + const NKikimr::NUdf::TStringRef ResourceTag; + const NKikimr::NUdf::TSourcePosition Pos; + TIntrusiveList<TPyCleanupListItemBase> CleanupList; + + TPyContext(NKikimr::NUdf::ITypeInfoHelper::TPtr helper, const NKikimr::NUdf::TStringRef& tag, const NKikimr::NUdf::TSourcePosition& pos) + : TypeInfoHelper(std::move(helper)) + , ResourceTag(tag) + , Pos(pos) + { + } + + void Cleanup() { + for (auto& o: CleanupList) { + o.Cleanup(); + } + CleanupList.Clear(); + } + + ~TPyContext() = default; + + using TPtr = TIntrusivePtr<TPyContext>; +}; + +struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { + const NKikimr::NUdf::IValueBuilder *const ValueBuilder; + const TPyContext::TPtr PyCtx; + std::unordered_map<const NKikimr::NUdf::TType*, TPyObjectPtr> StructTypes; + bool LazyInputObjects = true; + TPyObjectPtr YsonConverterIn; + TPyObjectPtr YsonConverterOut; + EBytesDecodeMode BytesDecodeMode = EBytesDecodeMode::Never; + TPyObjectPtr Decimal; + std::unordered_map<ui32, TPyObjectPtr> TimezoneNames; + THolder<IMemoryLock> MemoryLock; + + TPyCastContext( + const NKikimr::NUdf::IValueBuilder* builder, + TPyContext::TPtr pyCtx, + THolder<IMemoryLock> memoryLock = {}); + + ~TPyCastContext(); + + const TPyObjectPtr& GetTimezoneName(ui32 id); + const TPyObjectPtr& GetDecimal(); + + using TPtr = TIntrusivePtr<TPyCastContext>; +}; + +using TPyCastContextPtr = TPyCastContext::TPtr; + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp new file mode 100644 index 00000000000..0070e3420f1 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_decimal.cpp @@ -0,0 +1,59 @@ +#include "py_decimal.h" +#include "py_errors.h" +#include "py_utils.h" +#include "py_cast.h" + +#include <util/stream/str.h> + +#include <yql/essentials/public/udf/udf_value.h> + +using namespace NKikimr; + +namespace NPython { + +TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale) +{ + const auto str = NYql::NDecimal::ToString(value.GetInt128(), precision, scale); + PY_ENSURE(str, "Bad decimal value."); + + const TPyObjectPtr pyStr(PyRepr(str)); + + const TPyObjectPtr args(PyTuple_Pack(1, pyStr.Get())); + PY_ENSURE(args, "Can't pack args."); + + const TPyObjectPtr dec(PyObject_CallObject(ctx->GetDecimal().Get(), args.Get())); + PY_ENSURE(dec, "Can't create Decimal."); + return dec; +} + +NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& ctx, PyObject* value, ui8 precision, ui8 scale) +{ + const TPyObjectPtr print(PyObject_Str(value)); + PY_ENSURE(print, "Can't print decimal."); + + TString str; + PY_ENSURE(TryPyCast<TString>(print.Get(), str), "Can't get decimal string."); + + if (str.EndsWith("Infinity")) { + str.resize(str.size() - 5U); + } + + const auto dec = NYql::NDecimal::FromStringEx(str.c_str(), precision, scale); + PY_ENSURE(!NYql::NDecimal::IsError(dec), "Can't make Decimal from string."); + + return NKikimr::NUdf::TUnboxedValuePod(dec); +} + +const TPyObjectPtr& TPyCastContext::GetDecimal() { + if (!Decimal) { + const TPyObjectPtr module(PyImport_ImportModule("decimal")); + PY_ENSURE(module, "Can't import decimal."); + + Decimal.ResetSteal(PyObject_GetAttrString(module.Get(), "Decimal")); + PY_ENSURE(Decimal, "Can't get Decimal."); + } + + return Decimal; +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal.h b/yql/essentials/udfs/common/python/bindings/py_decimal.h new file mode 100644 index 00000000000..5764fe4fa85 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_decimal.h @@ -0,0 +1,12 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyDecimal(const TPyCastContext::TPtr& castCtx, const NKikimr::NUdf::TUnboxedValuePod& value, ui8 precision, ui8 scale); + +NKikimr::NUdf::TUnboxedValue FromPyDecimal(const TPyCastContext::TPtr& castCtx, PyObject* value, ui8 precision, ui8 scale); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp new file mode 100644 index 00000000000..8388c110f32 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp @@ -0,0 +1,122 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyDecimalTest) { + Y_UNIT_TEST(FromPyZero) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<12,5>>( + R"( +from decimal import Decimal +def Test(): return Decimal() + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(!value.GetInt128()); + }); + } + + Y_UNIT_TEST(FromPyPi) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<28,18>>( + R"( +from decimal import Decimal +def Test(): return Decimal('3.141592653589793238') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL); + }); + } + + Y_UNIT_TEST(FromPyTini) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35,35>>( + R"( +from decimal import Decimal +def Test(): return Decimal('-.00000000000000000000000000000000001') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -1); + }); + } + + Y_UNIT_TEST(FromPyNan) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( + R"( +from decimal import Decimal +def Test(): return Decimal('NaN') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan()); + }); + } + + Y_UNIT_TEST(FromPyInf) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( + R"( +from decimal import Decimal +def Test(): return Decimal('-inf') + )", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf()); + }); + } + + Y_UNIT_TEST(ToPyZero) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<7,7>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod::Zero(); + }, + "def Test(value): assert value.is_zero()" + ); + } + + Y_UNIT_TEST(ToPyPi) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<20,18>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL)); + }, + "def Test(value): assert str(value) == '3.141592653589793238'" + ); + } + + Y_UNIT_TEST(ToPyTini) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<35,35>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1)); + }, + "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'" + ); + } + + Y_UNIT_TEST(ToPyNan) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<2,2>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan()); + }, + "def Test(value): assert value.is_nan()" + ); + } + + Y_UNIT_TEST(ToPyInf) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<30,0>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf()); + }, + "def Test(value): assert value.is_infinite() and value.is_signed()" + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_dict.cpp new file mode 100644 index 00000000000..f2bd0669eda --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_dict.cpp @@ -0,0 +1,683 @@ +#include "py_dict.h" +#include "py_iterator.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> + + +using namespace NKikimr; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyDict interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazyDict +{ + using TPtr = NUdf::TRefCountedPtr<TPyLazyDict, TPyPtrOps<TPyLazyDict>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* KeyType; + const NUdf::TType* PayloadType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + + inline static TPyLazyDict* Cast(PyObject* o) { + return reinterpret_cast<TPyLazyDict*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static PyObject* Subscript(PyObject* self, PyObject* key); + static int Contains(PyObject* self, PyObject* key); + static PyObject* Get(PyObject* self, PyObject* args); + + static PyObject* Iter(PyObject* self) { return Keys(self, nullptr); } + static PyObject* Keys(PyObject* self, PyObject* /* args */); + static PyObject* Items(PyObject* self, PyObject* /* args */); + static PyObject* Values(PyObject* self, PyObject* /* args */); +}; + +PyMappingMethods LazyDictMapping = { + INIT_MEMBER(mp_length, TPyLazyDict::Len), + INIT_MEMBER(mp_subscript, TPyLazyDict::Subscript), + INIT_MEMBER(mp_ass_subscript, nullptr), +}; + +PySequenceMethods LazyDictSequence = { + INIT_MEMBER(sq_length , TPyLazyDict::Len), + INIT_MEMBER(sq_concat , nullptr), + INIT_MEMBER(sq_repeat , nullptr), + INIT_MEMBER(sq_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_slice , nullptr), +#else + INIT_MEMBER(sq_slice , nullptr), +#endif + INIT_MEMBER(sq_ass_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_ass_slice , nullptr), +#else + INIT_MEMBER(sq_ass_slice , nullptr), +#endif + INIT_MEMBER(sq_contains , TPyLazyDict::Contains), + INIT_MEMBER(sq_inplace_concat , nullptr), + INIT_MEMBER(sq_inplace_repeat , nullptr), +}; + +PyNumberMethods LazyDictNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyLazyDict::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#define Py_TPFLAGS_HAVE_SEQUENCE_IN 0 +#endif + +PyDoc_STRVAR(get__doc__, + "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."); +PyDoc_STRVAR(keys__doc__, + "D.keys() -> an iterator over the keys of D"); +PyDoc_STRVAR(values__doc__, + "D.values() -> an iterator over the values of D"); +PyDoc_STRVAR(items__doc__, + "D.items() -> an iterator over the (key, value) items of D"); +#if PY_MAJOR_VERSION < 3 +PyDoc_STRVAR(iterkeys__doc__, + "D.iterkeys() -> an iterator over the keys of D"); +PyDoc_STRVAR(itervalues__doc__, + "D.itervalues() -> an iterator over the values of D"); +PyDoc_STRVAR(iteritems__doc__, + "D.iteritems() -> an iterator over the (key, value) items of D"); +#endif + +static PyMethodDef LazyDictMethods[] = { + { "get", TPyLazyDict::Get, METH_VARARGS, get__doc__ }, + { "keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__ }, + { "items", TPyLazyDict::Items, METH_NOARGS, items__doc__ }, + { "values", TPyLazyDict::Values, METH_NOARGS, values__doc__ }, +#if PY_MAJOR_VERSION < 3 + { "iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__ }, + { "iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__ }, + { "itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__ }, +#endif + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +PyTypeObject PyLazyDictType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TDict"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazyDict)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazyDict::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazyDict::Repr), + INIT_MEMBER(tp_as_number , &LazyDictNumbering), + INIT_MEMBER(tp_as_sequence , &LazyDictSequence), + INIT_MEMBER(tp_as_mapping , &LazyDictMapping), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc , "yql.TDict object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , &TPyLazyDict::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , LazyDictMethods), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazySet interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazySet +{ + using TPtr = NUdf::TRefCountedPtr<TPyLazySet, TPyPtrOps<TPyLazySet>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + + inline static TPyLazySet* Cast(PyObject* o) { + return reinterpret_cast<TPyLazySet*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static int Contains(PyObject* self, PyObject* key); + static PyObject* Get(PyObject* self, PyObject* args); + + static PyObject* Iter(PyObject* self); +}; + +PySequenceMethods LazySetSequence = { + INIT_MEMBER(sq_length , TPyLazySet::Len), + INIT_MEMBER(sq_concat , nullptr), + INIT_MEMBER(sq_repeat , nullptr), + INIT_MEMBER(sq_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_slice , nullptr), +#else + INIT_MEMBER(sq_slice , nullptr), +#endif + INIT_MEMBER(sq_ass_item , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(was_sq_ass_slice , nullptr), +#else + INIT_MEMBER(sq_ass_slice , nullptr), +#endif + INIT_MEMBER(sq_contains , TPyLazySet::Contains), + INIT_MEMBER(sq_inplace_concat , nullptr), + INIT_MEMBER(sq_inplace_repeat , nullptr), +}; + +PyNumberMethods LazySetNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyLazySet::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyLazySet::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + +PyTypeObject PyLazySetType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TSet"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazySet)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazySet::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazySet::Repr), + INIT_MEMBER(tp_as_number , &LazySetNumbering), + INIT_MEMBER(tp_as_sequence , &LazySetSequence), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc , "yql.TSet object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , &TPyLazySet::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyDict implementation +////////////////////////////////////////////////////////////////////////////// +int TPyLazyDict::Bool(PyObject* self) +{ + PY_TRY { + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazyDict::Repr(PyObject*) +{ + return PyRepr("<yql.TDict>").Release(); +} + +Py_ssize_t TPyLazyDict::Len(PyObject* self) +{ + PY_TRY { + return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); + } PY_CATCH(-1) +} + +PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key) +{ + PY_TRY { + TPyLazyDict* dict = Cast(self); + + if (dict->KeyType) { + const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + + const TPyObjectPtr repr = PyObject_Repr(key); + PyErr_SetObject(PyExc_KeyError, repr.Get()); + return nullptr; + } else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } + + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return nullptr; + } + + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + + const TPyObjectPtr repr = PyObject_Repr(key); + PyErr_SetObject(PyExc_IndexError, repr.Get()); + return nullptr; + } + + } PY_CATCH(nullptr) +} + +// -1 error +// 0 not found +// 1 found +int TPyLazyDict::Contains(PyObject* self, PyObject* key) +{ + PY_TRY { + TPyLazyDict* dict = Cast(self); + NUdf::TUnboxedValue mkqlKey; + + if (dict->KeyType) { + mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + } else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return -1; + } + + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return 0; + } + mkqlKey = NUdf::TUnboxedValuePod(ui64(index)); + } + + return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazyDict::Get(PyObject* self, PyObject* args) +{ + PY_TRY { + PyObject* key = nullptr; + PyObject* failobj = Py_None; + + if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) + return nullptr; + + TPyLazyDict* dict = Cast(self); + if (dict->KeyType) { + const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + } else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } + + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return nullptr; + } + + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } + } + + Py_INCREF(failobj); + return failobj; + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::Keys(PyObject* self, PyObject* /* args */) +{ + PY_TRY { + const auto dict = Cast(self); + return ToPyIterator(dict->CastCtx, dict->KeyType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::Items(PyObject* self, PyObject* /* args */) +{ + PY_TRY { + const auto dict = Cast(self); + return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::Values(PyObject* self, PyObject* /* args */) +{ + PY_TRY { + const auto dict = Cast(self); + return ToPyIterator(dict->CastCtx, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyDict::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value) +{ + TPyLazyDict* dict = new TPyLazyDict; + PyObject_INIT(dict, &PyLazyDictType); + + dict->CastCtx = castCtx; + dict->KeyType = keyType; + dict->PayloadType = payloadType; + dict->Value.Set(castCtx->PyCtx, value); + return reinterpret_cast<PyObject*>(dict); +} + +////////////////////////////////////////////////////////////////////////////// +// TPyLazySet implementation +////////////////////////////////////////////////////////////////////////////// +int TPyLazySet::Bool(PyObject* self) +{ + PY_TRY { + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazySet::Repr(PyObject*) +{ + return PyRepr("<yql.TSet>").Release(); +} + +Py_ssize_t TPyLazySet::Len(PyObject* self) +{ + PY_TRY { + return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); + } PY_CATCH(-1) +} + +// -1 error +// 0 not found +// 1 found +int TPyLazySet::Contains(PyObject* self, PyObject* key) +{ + PY_TRY { + const auto set = Cast(self); + const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key); + return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0; + } PY_CATCH(-1) +} + +PyObject* TPyLazySet::Iter(PyObject* self) +{ + PY_TRY { + const auto set = Cast(self); + return ToPyIterator(set->CastCtx, set->ItemType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazySet::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value) +{ + TPyLazySet* dict = new TPyLazySet; + PyObject_INIT(dict, &PyLazySetType); + + dict->CastCtx = castCtx; + dict->ItemType = itemType; + dict->Value.Set(castCtx->PyCtx, value); + return reinterpret_cast<PyObject*>(dict); +} + +////////////////////////////////////////////////////////////////////////////// + +TPyObjectPtr ToPyLazyDict( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyLazyDict::New(castCtx, keyType, payloadType, value.AsBoxed()); +} + +TPyObjectPtr ToPyLazySet( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyLazySet::New(castCtx, itemType, value.AsBoxed()); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.h b/yql/essentials/udfs/common/python/bindings/py_dict.h new file mode 100644 index 00000000000..538ca69a127 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_dict.h @@ -0,0 +1,50 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyLazyDictType; +extern PyTypeObject PyLazySetType; + +TPyObjectPtr ToPyLazyDict( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +TPyObjectPtr ToPyLazySet( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyMapping( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* map); + +NKikimr::NUdf::TUnboxedValue FromPyDict( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* dict); + +NKikimr::NUdf::TUnboxedValue FromPySet( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + PyObject* set); + +NKikimr::NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + PyObject* sequence); + +NKikimr::NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TDataTypeId keyType, + PyObject* sequence); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp new file mode 100644 index 00000000000..9ac9627ebba --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp @@ -0,0 +1,722 @@ +#include "ut3/py_test_engine.h" + +#include <yql/essentials/public/udf/udf_ut_helpers.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyDictTest) { + Y_UNIT_TEST(FromPyEmptyDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); + } + + Y_UNIT_TEST(FromPyDict_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); + } + + Y_UNIT_TEST(FromPyDict_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4)))); + }); + } + + Y_UNIT_TEST(FromPyDict_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4)))); + }); + } + + Y_UNIT_TEST(FromPyDict_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::map<ui32, TString> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<ui32>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[1], "one"); + UNIT_ASSERT_EQUAL(items[2], "two"); + UNIT_ASSERT_EQUAL(items[3], "three"); + }); + } + + Y_UNIT_TEST(FromPyDict_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<ui32> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<ui32>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], 1U); + UNIT_ASSERT_EQUAL(items[1], 2U); + UNIT_ASSERT_EQUAL(items[2], 3U); + }); + } + + Y_UNIT_TEST(FromPyDict_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "three"); + UNIT_ASSERT_EQUAL(items[2], "two"); + }); + } + + Y_UNIT_TEST(FromPyList_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); + } + + Y_UNIT_TEST(FromPyTuple_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i32, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1))); + UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three"); + const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2))); + UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two"); + const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3))); + UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one"); + + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4)))); + }); + } + + Y_UNIT_TEST(FromPyList_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i16, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4)))); + }); + } + + Y_UNIT_TEST(FromPyTuple_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui16, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<std::pair<ui16, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui16>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3U); + UNIT_ASSERT_EQUAL(items[0].first, 0); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 2); + UNIT_ASSERT_EQUAL(items[0].second, "one"); + UNIT_ASSERT_EQUAL(items[1].second, "two"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + }); + } + + Y_UNIT_TEST(FromPyList_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i64, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<i64> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<i64>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], 0); + UNIT_ASSERT_EQUAL(items[1], 1); + UNIT_ASSERT_EQUAL(items[2], 2); + }); + } + + Y_UNIT_TEST(FromPyTuple_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui64, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "two"); + UNIT_ASSERT_EQUAL(items[2], "three"); + }); + } + + Y_UNIT_TEST(ToPyEmptyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<ui8, ui32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n" + ); + } + + Y_UNIT_TEST(ToPyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value):\n" + " assert value\n" + " assert len(value) == 3\n" + " assert iter(value) is not None\n" + " assert 2 in value\n" + " assert 0 not in value\n" + " assert set(iter(value)) == set([1, 2, 3])\n" + " assert value[2] == 0.2\n" + " assert value.get(0, 0.7) == 0.7\n" + " assert value.get(3, 0.7) == 0.3\n" + " assert sorted(value.keys()) == [1, 2, 3]\n" + " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n" + " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n" +#if PY_MAJOR_VERSION < 3 + " assert all(isinstance(k, int) for k in value.iterkeys())\n" + " assert all(isinstance(v, float) for v in value.itervalues())\n" + " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n" +#endif + ); + } + + Y_UNIT_TEST(ToPyDictWrongKey) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value):\n" + " try:\n" + " print(value[0])\n" + " except KeyError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(FromPyEmptySet) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<ui32, void>>( + "def Test(): return set([])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); + + } + + Y_UNIT_TEST(FromPySet) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return set(['one', 'two', 'three'])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + + std::set<TString> set; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + set.emplace(key.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(set.size(), 3); + UNIT_ASSERT(set.count("one")); + UNIT_ASSERT(set.count("two")); + UNIT_ASSERT(set.count("three")); + }); + + } + + Y_UNIT_TEST(FromPySet_Contains) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return {b'one', b'two', b'three'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three"))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero"))); + }); + + } + + Y_UNIT_TEST(ToPyEmptySet) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n" + ); + + } + + Y_UNIT_TEST(ToPySet) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((ui8) 1), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod((ui8) 2), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod((ui8) 3), NUdf::TUnboxedValuePod::Void()) + .Build(); + + }, + "def Test(value):\n" + " assert len(value) == 3\n" + " assert all(isinstance(k, int) for k in iter(value))\n" + " assert all(i in value for i in [1, 2, 3])\n"); + } + + Y_UNIT_TEST(FromPyMultiDict) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>( + "def Test(): return {1: ['one', 'two'], 3: ['three']}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 2); + + std::unordered_map<ui32, std::vector<TString>> map; + const auto dictIt = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) { + auto& val = map[key.Get<ui32>()]; + const auto listIt = payload.GetListIterator(); + for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) { + val.emplace_back(listItem.AsStringRef()); + } + } + + UNIT_ASSERT_EQUAL(map.size(), 2); + auto it = map.find(1); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 2); + UNIT_ASSERT_EQUAL(it->second[0], "one"); + UNIT_ASSERT_EQUAL(it->second[1], "two"); + it = map.find(3); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 1); + UNIT_ASSERT_EQUAL(it->second[0], "three"); + }); + + } + + Y_UNIT_TEST(ToPyMultiDict) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi; + return vb.NewDict(type, flags)-> + Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("one")) + .Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("two")) + .Add(NUdf::TUnboxedValuePod((ui8) 3), vb.NewString("three")) + .Build(); + + }, + "def Test(value):\n" + " assert len(value) == 2\n" + " assert 1 in value\n" + " assert 3 in value\n" + " assert len(value[1]) == 2\n" + " assert 'one' in value[1]\n" + " assert 'two' in value[1]\n" + " assert list(value[3]) == ['three']\n"); + } + + Y_UNIT_TEST(ToPyAndBackDictAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> + Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32) 0))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32) 3))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32) 2)).Get<double>(), 0.2); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32) 4))); + + std::vector<std::pair<i32, double>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i32>(), payload.Get<double>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 1); + UNIT_ASSERT_EQUAL(items[1].first, 2); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, 0.1); + UNIT_ASSERT_EQUAL(items[1].second, 0.2); + UNIT_ASSERT_EQUAL(items[2].second, 0.3); + + std::vector<i32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 1); + UNIT_ASSERT_EQUAL(keys[1], 2); + UNIT_ASSERT_EQUAL(keys[2], 3); + + std::vector<double> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<double>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], 0.1); + UNIT_ASSERT_EQUAL(values[1], 0.2); + UNIT_ASSERT_EQUAL(values[2], 0.3); + } + ); + } + + Y_UNIT_TEST(PyInvertDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> + Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) + .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) + .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) + .Build(); + }, + "def Test(value): return { v: k for k, v in value.items() }", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double) 0.1))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double) 0.0))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double) 0.4))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double) 0.2)).Get<i32>(), 2); + + std::map<double, i32> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<double>(), payload.Get<i32>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0.1], 1); + UNIT_ASSERT_EQUAL(items[0.2], 2); + UNIT_ASSERT_EQUAL(items[0.3], 3); + } + ); + } + + Y_UNIT_TEST(FromPyOrderedDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "from collections import OrderedDict\n" + "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v.AsStringRef(), "one"); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4))))); + +#if PY_MAJOR_VERSION >= 3 + std::vector<std::pair<ui32, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui32>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 2); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, "two"); + UNIT_ASSERT_EQUAL(items[1].second, "one"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + + std::vector<ui32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui32>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 2); + UNIT_ASSERT_EQUAL(keys[1], 1); + UNIT_ASSERT_EQUAL(keys[2], 3); + + std::vector<TString> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], "two"); + UNIT_ASSERT_EQUAL(values[1], "one"); + UNIT_ASSERT_EQUAL(values[2], "three"); +#endif + }); + } + + Y_UNIT_TEST(ToPyAndBackSetAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<float, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> + Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void()) + .Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void()) + .Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f))); + UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f))); + + std::vector<float> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<float>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 0.1f); + UNIT_ASSERT_EQUAL(keys[1], 0.2f); + UNIT_ASSERT_EQUAL(keys[2], 0.3f); + } + ); + } + + Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue *items = nullptr; + const auto a = vb.NewArray(9U, items); + const float f[] = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f }; + std::transform(f, f + 9U, items, [](float v){ return NUdf::TUnboxedValuePod(v); }); + return a; + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13)))); + + std::vector<std::pair<i8, float>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i8>(), payload.Get<float>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 0.1f); + UNIT_ASSERT_EQUAL(items.back().second, 0.9f); + + std::vector<i8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i8>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); + + std::vector<float> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<float>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 0.1f); + UNIT_ASSERT_EQUAL(values.back(), 0.9f); + } + ); + } + + Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10)); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13)))); + + std::vector<std::pair<ui8, i32>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui8>(), payload.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 1); + UNIT_ASSERT_EQUAL(items.back().second, 9); + + std::vector<ui8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui8>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); + + std::vector<i32> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 1); + UNIT_ASSERT_EQUAL(values.back(), 9); + } + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.cpp b/yql/essentials/udfs/common/python/bindings/py_errors.cpp new file mode 100644 index 00000000000..5741978d543 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_errors.cpp @@ -0,0 +1,72 @@ +#include "py_errors.h" +#include "py_ptr.h" +#include "py_cast.h" +#include "py_utils.h" + +#include <util/generic/string.h> +#include <util/stream/output.h> + +namespace NPython { + +// this function in conjuction with code after Py_Initialize +// does approximately following: +// +// sys.stderr = StderrProxy(sys.stderr) +// +// ... +// +// sys.stderr._toggle_real_mode() +// sys.excepthook( +// sys.last_type, +// sys.last_value, +// sys.last_traceback) +// sys.stderr._get_value() +// sys.stderr._toggle_real_mode() +// +// where _toggle_real_mode, _get_value & all calls to stderr not in real mode +// are handled in a thread-safe way +// +TString GetLastErrorAsString() +{ + PyObject* etype; + PyObject* evalue; + PyObject* etraceback; + + PyErr_Fetch(&etype, &evalue, &etraceback); + + if (!etype) { + return {}; + } + + TPyObjectPtr etypePtr {etype, TPyObjectPtr::ADD_REF}; + TPyObjectPtr evaluePtr {evalue, TPyObjectPtr::ADD_REF}; + TPyObjectPtr etracebackPtr {etraceback, TPyObjectPtr::ADD_REF}; + + TPyObjectPtr stderrObject {PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF}; + if (!stderrObject) { + return {}; + } + + TPyObjectPtr unused = PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr); + + PyErr_Restore(etypePtr.Get(), evaluePtr.Get(), etracebackPtr.Get()); + // in unusual situations there may be low-level write to stderr + // (by direct C FILE* write), but that's OK + PyErr_Print(); + + TPyObjectPtr error = PyObject_CallMethod(stderrObject.Get(), "_get_value", nullptr); + if (!error) { + return {}; + } + unused.ResetSteal( + PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr) + ); + + TString errorValue; + if (!TryPyCast(error.Get(), errorValue)) { + errorValue = TString("can't get error string from: ") += PyObjectRepr(error.Get()); + } + return errorValue; +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.h b/yql/essentials/udfs/common/python/bindings/py_errors.h new file mode 100644 index 00000000000..2306b47bb95 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_errors.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/generic/fwd.h> + +namespace NPython { + +TString GetLastErrorAsString(); + +#define PY_TRY try + +#define PY_CATCH(ErrorValue) \ + catch (const yexception& e) { \ + PyErr_SetString(PyExc_RuntimeError, e.what()); \ + return ErrorValue; \ + } + +#define PY_ENSURE(condition, message) \ + do { \ + if (Y_UNLIKELY(!(condition))) { \ + throw yexception() << message; \ + } \ + } while (0) + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_gil.h b/yql/essentials/udfs/common/python/bindings/py_gil.h new file mode 100644 index 00000000000..70e9bf3e91d --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_gil.h @@ -0,0 +1,37 @@ +#pragma once + +#include <Python.h> + + +namespace NPython { + +struct TPyGilLocker +{ + TPyGilLocker() + : Gil(PyGILState_Ensure()) + { + } + + ~TPyGilLocker() { + PyGILState_Release(Gil); + } + +private: + PyGILState_STATE Gil; +}; + +struct TPyGilUnlocker { + TPyGilUnlocker() + : ThreadState(PyEval_SaveThread()) + { + } + + ~TPyGilUnlocker() { + PyEval_RestoreThread(ThreadState); + } + +private: + PyThreadState* ThreadState; +}; + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp new file mode 100644 index 00000000000..090211be2c1 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp @@ -0,0 +1,280 @@ +#include "py_iterator.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +using namespace NKikimr; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// TPyIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyIterator +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator; + + inline static TPyIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TDictKeysIterator>").Release(); + } + + static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator); + static PyObject* Next(PyObject* self); +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TDictKeysIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyPairIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyPairIterator +{ + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* KeyType; + const NUdf::TType* PayloadType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Iterator; + + inline static TPyPairIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyPairIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TDictIterator>").Release(); + } + + static PyObject* New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator); + static PyObject* Next(PyObject* self); +}; + +PyTypeObject PyPairIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TDictIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyPairIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyPairIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyPairIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TPairIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyPairIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, NUdf::IBoxedValuePtr&& iterator) +{ + TPyIterator* dictIter = new TPyIterator; + PyObject_INIT(dictIter, &PyIteratorType); + dictIter->CastCtx = ctx; + dictIter->ItemType = itemType; + dictIter->Iterator.Set(ctx->PyCtx, iterator); + return reinterpret_cast<PyObject*>(dictIter); +} + +PyObject* TPyIterator::Next(PyObject* self) +{ + PY_TRY { + const auto iter = Cast(self); + NUdf::TUnboxedValue item; + if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) { + return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release(); + } + return nullptr; + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// +// TPyPairIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyPairIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payloadType, NUdf::IBoxedValuePtr&& iterator) +{ + TPyPairIterator* dictIter = new TPyPairIterator; + PyObject_INIT(dictIter, &PyPairIteratorType); + dictIter->CastCtx = ctx; + dictIter->KeyType = keyType; + dictIter->PayloadType = payloadType; + dictIter->Iterator.Set(ctx->PyCtx, iterator); + return reinterpret_cast<PyObject*>(dictIter); +} + +PyObject* TPyPairIterator::Next(PyObject* self) +{ + PY_TRY { + const auto iter = Cast(self); + NUdf::TUnboxedValue k, v; + if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) { + const TPyObjectPtr key = iter->KeyType ? + ToPyObject(iter->CastCtx, iter->KeyType, k): + PyCast<ui64>(k.Get<ui64>()); + const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v); + return PyTuple_Pack(2, key.Get(), value.Get()); + } + return nullptr; + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyIterator::New(castCtx, itemType, value.AsBoxed()); +} + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) +{ + return TPyPairIterator::New(castCtx, keyType, payloadType, value.AsBoxed()); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.h b/yql/essentials/udfs/common/python/bindings/py_iterator.h new file mode 100644 index 00000000000..5c5de27b0bc --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.h @@ -0,0 +1,23 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyIteratorType; +extern PyTypeObject PyPairIteratorType; + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +TPyObjectPtr ToPyIterator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); + + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp new file mode 100644 index 00000000000..ffaa2fe4ec0 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp @@ -0,0 +1,705 @@ +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/generic/maybe.h> +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { +namespace { +////////////////////////////////////////////////////////////////////////////// +// TLazyDictBase +////////////////////////////////////////////////////////////////////////////// +class TLazyDictBase: public NUdf::TBoxedValue +{ +protected: + class TIterator: public NUdf::TBoxedValue { + public: + TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter) + : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter)) + {} + + ~TIterator() { + const TPyGilLocker lock; + PyIter_.Reset(); + } + + private: + bool Skip() override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Next(NUdf::TUnboxedValue& value) override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + value = FromPyObject(CastCtx_, ItemType_, next.Get()); + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + payload = NUdf::TUnboxedValuePod::Void(); + return Next(key); + } + + private: + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PyIter_; + }; + + class TPairIterator: public NUdf::TBoxedValue { + public: + TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter) + : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter)) + {} + + ~TPairIterator() { + const TPyGilLocker lock; + PyIter_.Reset(); + } + + private: + bool Skip() override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + key = FromPyObject(CastCtx_, KeyType_, PyTuple_GET_ITEM(next.Get(), 0)); + pay = FromPyObject(CastCtx_, PayType_, PyTuple_GET_ITEM(next.Get(), 1)); + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + private: + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* KeyType_; + const NUdf::TType* PayType_; + TPyObjectPtr PyIter_; + }; + + TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject) + : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef()) + {} + + ~TLazyDictBase() { + const TPyGilLocker lock; + PyObject_.Reset(); + } + + bool HasDictItems() const override try { + const TPyGilLocker lock; + const auto has = PyObject_IsTrue(PyObject_.Get()); + if (has < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return bool(has); + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PyObject_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyMapping +////////////////////////////////////////////////////////////////////////////// +class TLazyMapping: public TLazyDictBase +{ +public: + TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) + : TLazyDictBase(ctx, keyType, dict), PayType_(payType) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PyMapping_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyMapping_Keys(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyMapping_Values(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetDictIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyMapping_Items(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + if (const auto item = PyObject_GetItem(PyObject_.Get(), pyKey.Get())) { + return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional(); + } + + if (PyErr_Occurred()) { + PyErr_Clear(); + } + + return NUdf::TUnboxedValue(); + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto map = PyObject_.Get(); + const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ? + (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) : + PyMapping_HasKey(map, pyKey.Get()); + + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + +private: + const NUdf::TType* PayType_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyDict +////////////////////////////////////////////////////////////////////////////// +class TLazyDict: public TLazyDictBase +{ +public: + TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) + : TLazyDictBase(ctx, keyType, dict), PayType_(payType) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PyDict_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyDict_Keys(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyDict_Values(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetDictIterator() const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyList = PyDict_Items(PyObject_.Get())) { + if (TPyObjectPtr pyIter = PyObject_GetIter(pyList.Get())) { + return NUdf::TUnboxedValuePod(new TPairIterator(CastCtx_, ItemType_, PayType_, std::move(pyIter))); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + if (const auto item = PyDict_GetItem(PyObject_.Get(), pyKey.Get())) { + return FromPyObject(CastCtx_, PayType_, item).Release().MakeOptional(); + } else if (!PyErr_Occurred()) { + return NUdf::TUnboxedValue(); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto has = PyDict_Contains(PyObject_.Get(), pyKey.Get()); + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + +private: + const NUdf::TType* PayType_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazySet +////////////////////////////////////////////////////////////////////////////// +class TLazySet: public TLazyDictBase +{ +public: + TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set) + : TLazyDictBase(ctx, itemType, set) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PySet_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod(); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto has = PySet_Contains(PyObject_.Get(), pyKey.Get()); + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetDictIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetListIterator() const override { + return GetKeysIterator(); + } + + ui64 GetListLength() const override { + return GetDictLength(); + } + + bool HasListItems() const override { + return HasDictItems(); + } + + bool HasFastListLength() const override { + return true; + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazySequenceAsSet +////////////////////////////////////////////////////////////////////////////// +class TLazySequenceAsSet: public TLazyDictBase +{ +public: + TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence) + : TLazyDictBase(ctx, keyType, sequence) + {} + +private: + bool IsSortedDict() const override { return false; } + + ui64 GetDictLength() const override try { + const TPyGilLocker lock; + const auto len = PySequence_Size(PyObject_.Get()); + if (len < 0) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return ui64(len); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + return Contains(key) ? NUdf::TUnboxedValuePod::Void() : NUdf::TUnboxedValuePod(); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override try { + const TPyGilLocker lock; + if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { + const auto has = PySequence_Contains(PyObject_.Get(), pyKey.Get()); + if (has >= 0) { + return bool(has); + } + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetKeysIterator() const override try { + const TPyGilLocker lock; + if (TPyObjectPtr pyIter = PyObject_GetIter(PyObject_.Get())) { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, std::move(pyIter))); + } + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetDictIterator() const override { + return GetKeysIterator(); + } + + NUdf::TUnboxedValue GetListIterator() const override { + return GetKeysIterator(); + } + + ui64 GetListLength() const override { + return GetDictLength(); + } + + bool HasListItems() const override { + return HasDictItems(); + } + + bool HasFastListLength() const override { + return true; + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazySequenceAsDict +////////////////////////////////////////////////////////////////////////////// +template<typename KeyType> +class TLazySequenceAsDict: public NUdf::TBoxedValue +{ +private: + class TKeyIterator: public NUdf::TBoxedValue { + public: + TKeyIterator(Py_ssize_t size) + : Size(size), Index(0) + {} + + private: + bool Skip() override { + if (Index >= Size) + return false; + + ++Index; + return true; + } + + bool Next(NUdf::TUnboxedValue& value) override { + if (Index >= Size) + return false; + + value = NUdf::TUnboxedValuePod(KeyType(Index++)); + return true; + } + + private: + const Py_ssize_t Size; + Py_ssize_t Index; + }; + + class TIterator: public NUdf::TBoxedValue { + public: + TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq) + : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size(size), Index(0) + {} + + ~TIterator() { + const TPyGilLocker lock; + PySeq_.Reset(); + } + + private: + bool Skip() override { + if (Index >= Size) + return false; + + ++Index; + return true; + } + + bool Next(NUdf::TUnboxedValue& value) override try { + if (Index >= Size) + return false; + + const TPyGilLocker lock; + value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++)); + return true; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try { + if (Index >= Size) + return false; + + const TPyGilLocker lock; + key = NUdf::TUnboxedValuePod(KeyType(Index)); + pay = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index++)); + return true; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + private: + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PySeq_; + const Py_ssize_t Size; + Py_ssize_t Index; + }; + +public: + TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size) + : CastCtx_(ctx), ItemType_(itemType), Size(size), PySeq_(std::move(sequence)) + {} + + ~TLazySequenceAsDict() + { + const TPyGilLocker lock; + PySeq_.Reset(); + } + +private: + bool IsSortedDict() const override { return true; } + + bool HasDictItems() const override { + return Size > 0; + } + + ui64 GetDictLength() const override { + return Size; + } + + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + const Py_ssize_t index = key.Get<KeyType>(); + if (index >= -Size && index < Size) try { + const TPyGilLocker lock; + if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size + index)) { + return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional(); + } else if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + return NUdf::TUnboxedValue(); + } + + bool Contains(const NUdf::TUnboxedValuePod& key) const override { + const Py_ssize_t index = key.Get<KeyType>(); + return index >= -Size && index < Size; + } + + NUdf::TUnboxedValue GetKeysIterator() const override { + return NUdf::TUnboxedValuePod(new TKeyIterator(Size)); + } + + NUdf::TUnboxedValue GetPayloadsIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_)); + } + + NUdf::TUnboxedValue GetDictIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(CastCtx_, ItemType_, Size, PySeq_)); + } + + const TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + const Py_ssize_t Size; + TPyObjectPtr PySeq_; +}; + +} // namspace + +NUdf::TUnboxedValue FromPyDict( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* dict) +{ + return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict)); +} + +NUdf::TUnboxedValue FromPyMapping( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* map) +{ + return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map)); +} + +NUdf::TUnboxedValue FromPySet( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + PyObject* set) +{ + return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set)); +} + +NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + PyObject* set) +{ + return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set)); +} + +NUdf::TUnboxedValue FromPySequence( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TDataTypeId keyType, + PyObject* sequence) +{ + if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) { + const auto size = PySequence_Fast_GET_SIZE(fast.Get()); + if (size >= 0) { + switch (keyType) { +#define MAKE_PRIMITIVE_TYPE_SIZE(type) \ + case NUdf::TDataType<type>::Id: \ + return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size)); + INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE) +#undef MAKE_PRIMITIVE_TYPE_SIZE + } + Y_ABORT("Invalid key type."); + } + } + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp new file mode 100644 index 00000000000..fe3b8892e66 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp @@ -0,0 +1,382 @@ +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/generic/maybe.h> +#include <util/string/builder.h> + + +using namespace NKikimr; + +namespace NPython { +namespace { + +static ui64 CalculateIteratorLength(PyObject* iter, const TPyCastContext::TPtr& castCtx) +{ + PyObject* item; + + ui64 length = 0; + while ((item = PyIter_Next(iter))) { + length++; + Py_DECREF(item); + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return length; +} + +static bool IsIteratorHasItems(PyObject* iter, const TPyCastContext::TPtr& castCtx) +{ + if (const TPyObjectPtr item = PyIter_Next(iter)) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; +} + +////////////////////////////////////////////////////////////////////////////// +// TBaseLazyList +////////////////////////////////////////////////////////////////////////////// +template<typename TDerived> +class TBaseLazyList: public NUdf::TBoxedValue +{ + using TListSelf = TBaseLazyList<TDerived>; + + class TIterator: public NUdf::TBoxedValue { + public: + TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter) + : CastCtx_(ctx) + , PyIter_(std::move(pyIter)) + , ItemType_(type) + {} + + ~TIterator() { + const TPyGilLocker lock; + PyIter_.Reset(); + } + + private: + bool Skip() override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool Next(NUdf::TUnboxedValue& value) override try { + const TPyGilLocker lock; + const TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + value = FromPyObject(CastCtx_, ItemType_, next.Get()); + return true; + } + + if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return false; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + private: + const TPyCastContext::TPtr CastCtx_; + TPyObjectPtr PyIter_; + const NUdf::TType* ItemType_; + }; + +public: + TBaseLazyList( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : CastCtx_(castCtx) + , PyObject_(std::move(pyObject)) + , ItemType_(NUdf::TListTypeInspector(*CastCtx_->PyCtx->TypeInfoHelper, type).GetItemType()) + { + } + + ~TBaseLazyList() { + TPyGilLocker lock; + PyObject_.Reset(); + } + +private: + TPyObjectPtr GetIterator() const try { + return static_cast<const TDerived*>(this)->GetIteratorImpl(); + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool HasFastListLength() const override { + return Length_.Defined(); + } + + ui64 GetEstimatedListLength() const override { + return GetListLength(); + } + + ui64 GetListLength() const override try { + if (!Length_.Defined()) { + const TPyGilLocker lock; + TPyObjectPtr iter = GetIterator(); + Length_ = CalculateIteratorLength(iter.Get(), CastCtx_); + } + + return *Length_; + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool HasListItems() const override try { + if (Length_.Defined()) + return *Length_ > 0; + + const TPyGilLocker lock; + TPyObjectPtr iter = GetIterator(); + const bool hasItems = IsIteratorHasItems(iter.Get(), CastCtx_); + if (!hasItems) { + Length_ = 0; + } + return hasItems; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + NUdf::TUnboxedValue GetListIterator() const override try { + const TPyGilLocker lock; + TPyObjectPtr pyIter = GetIterator(); + auto* self = const_cast<TListSelf*>(this); + return NUdf::TUnboxedValuePod(new TIterator(self->CastCtx_, self->ItemType_, std::move(pyIter))); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + const NUdf::TOpaqueListRepresentation* GetListRepresentation() const override { + return nullptr; + } + + NUdf::IBoxedValuePtr ReverseListImpl( + const NUdf::IValueBuilder& builder) const override + { + Y_UNUSED(builder); + return nullptr; + } + + NUdf::IBoxedValuePtr SkipListImpl( + const NUdf::IValueBuilder& builder, ui64 count) const override + { + Y_UNUSED(builder); + Y_UNUSED(count); + return nullptr; + } + + NUdf::IBoxedValuePtr TakeListImpl( + const NUdf::IValueBuilder& builder, ui64 count) const override + { + Y_UNUSED(builder); + Y_UNUSED(count); + return nullptr; + } + + NUdf::IBoxedValuePtr ToIndexDictImpl( + const NUdf::IValueBuilder& builder) const override + { + Y_UNUSED(builder); + return nullptr; + } + +protected: + const TPyCastContext::TPtr CastCtx_; + TPyObjectPtr PyObject_; + const NUdf::TType* ItemType_; + mutable TMaybe<ui64> Length_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyIterable +////////////////////////////////////////////////////////////////////////////// +class TLazyIterable: public TBaseLazyList<TLazyIterable> +{ + using TBase = TBaseLazyList<TLazyIterable>; +public: + TLazyIterable( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : TBase(castCtx, std::move(pyObject), type) + {} + + TPyObjectPtr GetIteratorImpl() const { + if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) { + return ret; + } + + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos + << "Cannot get iterator from object: " + << PyObjectRepr(PyObject_.Get()) << ", error: " + << GetLastErrorAsString()).data()); + } + +private: + bool HasFastListLength() const override { + return Length_.Defined(); + } + + ui64 GetListLength() const override try { + if (!Length_.Defined()) { + const TPyGilLocker lock; + const auto len = PyObject_Size(PyObject_.Get()); + if (len >= 0) { + Length_ = len; + } else { + Length_ = CalculateIteratorLength(GetIteratorImpl().Get(), CastCtx_); + } + } + return *Length_; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + + bool HasListItems() const override try { + const TPyGilLocker lock; + bool hasItems = false; + const auto isTrue = PyObject_IsTrue(PyObject_.Get()); + if (isTrue != -1) { + hasItems = static_cast<bool>(isTrue); + } else { + TPyObjectPtr iter = GetIteratorImpl(); + hasItems = IsIteratorHasItems(iter.Get(), CastCtx_); + } + if (!hasItems) { + Length_ = 0; + } + return hasItems; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyIterator +////////////////////////////////////////////////////////////////////////////// +class TLazyIterator: public TBaseLazyList<TLazyIterator> +{ + using TBase = TBaseLazyList<TLazyIterator>; +public: + TLazyIterator( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : TBase(castCtx, std::move(pyObject), type) + , IteratorDrained_(false) + {} + + TPyObjectPtr GetIteratorImpl() const { + if (IteratorDrained_) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << + "Lazy list was build under python iterator. " + "Iterator was already used.").data()); + } + IteratorDrained_ = true; + return PyObject_; + } + +private: + mutable bool IteratorDrained_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TLazyGenerator +////////////////////////////////////////////////////////////////////////////// +class TLazyGenerator: public TBaseLazyList<TLazyGenerator> +{ + using TBase = TBaseLazyList<TLazyGenerator>; +public: + TLazyGenerator( + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) + : TBase(castCtx, std::move(pyObject), type) + { + // keep ownership of function closure if any + if (PyFunction_Check(PyObject_.Get())) { + PyObject* closure = PyFunction_GetClosure(PyObject_.Get()); + if (closure) { + Closure_ = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF); + } + } + } + + ~TLazyGenerator() { + const TPyGilLocker lock; + Closure_.Reset(); + } + + TPyObjectPtr GetIteratorImpl() const { + TPyObjectPtr generator = PyObject_CallObject(PyObject_.Get(), nullptr); + if (!generator || !PyGen_Check(generator.Get())) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected generator as a result of function call").data()); + } + return PyObject_GetIter(generator.Get()); + } + +private: + TPyObjectPtr Closure_; +}; + +} // namspace + + +NUdf::TUnboxedValue FromPyLazyGenerator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr callableObj) +{ + return NUdf::TUnboxedValuePod(new TLazyGenerator(castCtx, std::move(callableObj), type)); +} + +NUdf::TUnboxedValue FromPyLazyIterable( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iterableObj) +{ + return NUdf::TUnboxedValuePod(new TLazyIterable(castCtx, std::move(iterableObj), type)); +} + +NUdf::TUnboxedValue FromPyLazyIterator( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iteratorObj) +{ + return NUdf::TUnboxedValuePod(new TLazyIterator(castCtx, std::move(iteratorObj), type)); +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list.cpp b/yql/essentials/udfs/common/python/bindings/py_list.cpp new file mode 100644 index 00000000000..376a1ca124a --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_list.cpp @@ -0,0 +1,1116 @@ +#include "py_list.h" +#include "py_dict.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +using namespace NKikimr; + +#if PY_MAJOR_VERSION >= 3 +#define SLICEOBJ(obj) obj +#else +#define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj)) +// See details about need for backports in ya.make +#include "py27_backports.h" +#endif + +namespace NPython { + +namespace { +inline Py_ssize_t CastIndex(PyObject* key, const char* name) +{ + Py_ssize_t index = -1; + if (PyIndex_Check(key)) { + index = PyNumber_AsSsize_t(key, PyExc_IndexError); + } + if (index < 0) { + const TPyObjectPtr value = PyUnicode_FromFormat("argument of %s must be positive integer or long", name); + PyErr_SetObject(PyExc_IndexError, value.Get()); + } + + return index; +} +} + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyList interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazyList +{ + using TPtr = NUdf::TRefCountedPtr<TPyLazyList, TPyPtrOps<TPyLazyList>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Dict; + Py_ssize_t Step; + Py_ssize_t CachedLength; + + inline static TPyLazyList* Cast(PyObject* o) { + return reinterpret_cast<TPyLazyList*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step = 1, + Py_ssize_t size = -1); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static PyObject* Iter(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static PyObject* Subscript(PyObject* self, PyObject* slice); + static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */); + static PyObject* Reversed(PyObject* self, PyObject* /* arg */); + static PyObject* Take(PyObject* self, PyObject* arg); + static PyObject* Skip(PyObject* self, PyObject* arg); + static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */); + static PyObject* HasItems(PyObject* self, PyObject* /* arg */); +}; + +PyMappingMethods LazyListMapping = { + INIT_MEMBER(mp_length, TPyLazyList::Len), + INIT_MEMBER(mp_subscript, TPyLazyList::Subscript), + INIT_MEMBER(mp_ass_subscript, nullptr), +}; + +PyNumberMethods LazyListNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyLazyList::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyLazyList::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + +PyDoc_STRVAR(reversed__doc__, "DEPRECATED: use reversed(list) or list[::-1] instead."); +PyDoc_STRVAR(take__doc__, "DEPRECATED: use slice list[:n] instead."); +PyDoc_STRVAR(skip__doc__, "DEPRECATED: use slice list[n:] instead."); +PyDoc_STRVAR(to_index_dict__doc__, "DEPRECATED: use list[n] instead."); +PyDoc_STRVAR(has_fast_len__doc__, "DEPRECATED: do not use."); +PyDoc_STRVAR(has_items__doc__, "DEPRECATED: test list as bool instead."); + +static PyMethodDef TPyLazyListMethods[] = { + { "__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr }, + { "to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, + { "reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__ }, + { "take", TPyLazyList::Take, METH_O, take__doc__ }, + { "skip", TPyLazyList::Skip, METH_O, skip__doc__ }, + { "has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, + { "has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__ }, + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyLazyListType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TList"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazyList)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazyList::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazyList::Repr), + INIT_MEMBER(tp_as_number , &LazyListNumbering), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , &LazyListMapping), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TList object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , TPyLazyList::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , TPyLazyListMethods), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyListIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyLazyListIterator +{ + PyObject_HEAD; + TPyLazyList::TPtr List; + TPyCleanupListItem<NUdf::TUnboxedValue> Iterator; + Py_ssize_t Length; + TPyCastContext::TPtr CastCtx; + + inline static TPyLazyListIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyLazyListIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + auto obj = Cast(self); + auto ctx = obj->CastCtx; + ctx->MemoryLock->Acquire(); + delete obj; + ctx->MemoryLock->Release(); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TListIterator>").Release(); + } + + static PyObject* New(TPyLazyList* list); + static PyObject* Next(PyObject* self); +}; + +PyTypeObject PyLazyListIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TListIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyLazyListIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyLazyListIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyLazyListIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.ListIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyLazyListIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyList implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyLazyList::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step, + Py_ssize_t size) +{ + TPyLazyList* list = new TPyLazyList; + PyObject_INIT(list, &PyLazyListType); + + list->CastCtx = castCtx; + list->ItemType = itemType; + list->Value.Set(castCtx->PyCtx, value); + list->Step = step; + list->CachedLength = size; + + return reinterpret_cast<PyObject*>(list); +} + +PyObject* TPyLazyList::Repr(PyObject*) +{ + return PyRepr("<yql.TList>").Release(); +} + +PyObject* TPyLazyList::Iter(PyObject* self) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + return TPyLazyListIterator::New(list); + } PY_CATCH(nullptr) +} + +Py_ssize_t TPyLazyList::Len(PyObject* self) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); + } + return (list->CachedLength + list->Step - 1) / list->Step; + } PY_CATCH(-1) +} + +PyObject* TPyLazyList::Subscript(PyObject* self, PyObject* slice) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + const auto vb = list->CastCtx->ValueBuilder; + + if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); + + if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); + } + + if (index < 0) { + if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get())); + } + + ++index *= list->Step; + --index += list->CachedLength; + } else { + index *= list->Step; + } + + if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } + + if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release(); + } + + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } + + if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; + + if (list->CachedLength >= 0) { + if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) { + return nullptr; + } + } else { + if (PySlice_Unpack(slice, &start, &stop, &step) < 0) { + return nullptr; + } + + if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); + size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step); + } else { + size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step); + } + } + + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } + + const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step; + const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step; + step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN); + + NUdf::TUnboxedValue newList; + if (size > 0) { + size = step > 0 ? + (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX): + (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX); + + start = start < hi ? start * list->Step : PY_SSIZE_T_MAX; + const Py_ssize_t skip = step > 0 ? start : start - size + 1; + + newList = NUdf::TUnboxedValuePod(list->Value.Get().Get()); + if (skip > 0) { + newList = vb->SkipList(newList, skip); + } + + if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) { + newList = vb->TakeList(newList, size); + } + + if (step < 0) { + step = -step; + newList = vb->ReverseList(newList); + } + } else { + newList = vb->NewEmptyList(); + } + + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size); + } + + const TPyObjectPtr type = PyObject_Type(slice); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::ToIndexDict(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); + } + + return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::Reversed(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::Take(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + Py_ssize_t count = CastIndex(arg, "take"); + if (count < 0) { + return nullptr; + } + count *= list->Step; + + auto vb = list->CastCtx->ValueBuilder; + NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + auto newList = vb->TakeList(value, static_cast<ui64>(count)); + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::Skip(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + Py_ssize_t count = CastIndex(arg, "skip"); + if (count < 0) { + return nullptr; + } + count *= list->Step; + + NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count)); + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::HasFastLen(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } PY_CATCH(nullptr) +} + +PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get())) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } PY_CATCH(nullptr) +} + +int TPyLazyList::Bool(PyObject* self) +{ + PY_TRY { + TPyLazyList* list = Cast(self); + if (list->CachedLength == -1) { + return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0; + } else { + return list->CachedLength > 0 ? 1 : 0; + } + } PY_CATCH(-1) +} + +////////////////////////////////////////////////////////////////////////////// +// TPyLazyListIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyLazyListIterator::New(TPyLazyList* list) +{ + TPyLazyListIterator* listIter = new TPyLazyListIterator; + PyObject_INIT(listIter, &PyLazyListIteratorType); + listIter->List.Reset(list); + listIter->Iterator.Set(list->CastCtx->PyCtx, NUdf::TBoxedValueAccessor::GetListIterator(*list->Value.Get())); + listIter->Length = 0; + listIter->CastCtx = list->CastCtx; + return reinterpret_cast<PyObject*>(listIter); +} + +PyObject* TPyLazyListIterator::Next(PyObject* self) +{ + PY_TRY { + TPyLazyListIterator* iter = Cast(self); + TPyLazyList* list = iter->List.Get(); + + NUdf::TUnboxedValue item; + if (iter->Iterator.Get().Next(item)) { + ++iter->Length; + + for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length) + continue; + + return ToPyObject(list->CastCtx, list->ItemType, item).Release(); + } + + // store calculated list length after traverse over whole list + if (list->CachedLength == -1) { + list->CachedLength = iter->Length; + } + + return nullptr; + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// +// TPyThinList interface +////////////////////////////////////////////////////////////////////////////// +struct TPyThinList +{ + using TPtr = NUdf::TRefCountedPtr<TPyThinList, TPyPtrOps<TPyThinList>>; + + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + const NUdf::TType* ItemType; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + const NUdf::TUnboxedValue* Elements; + Py_ssize_t Length; + Py_ssize_t Step; + + inline static TPyThinList* Cast(PyObject* o) { + return reinterpret_cast<TPyThinList*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(), + const NUdf::TUnboxedValue* elements = nullptr, + Py_ssize_t length = 0, + Py_ssize_t step = 1); + + static int Bool(PyObject* self); + static PyObject* Repr(PyObject* self); + static PyObject* Iter(PyObject* self); + static Py_ssize_t Len(PyObject* self); + static PyObject* Subscript(PyObject* self, PyObject* slice); + static PyObject* ToIndexDict(PyObject* self, PyObject* /* arg */); + static PyObject* Reversed(PyObject* self, PyObject* /* arg */); + static PyObject* Take(PyObject* self, PyObject* arg); + static PyObject* Skip(PyObject* self, PyObject* arg); + static PyObject* HasFastLen(PyObject* self, PyObject* /* arg */); + static PyObject* HasItems(PyObject* self, PyObject* /* arg */); +}; + +PyMappingMethods ThinListMapping = { + INIT_MEMBER(mp_length, TPyThinList::Len), + INIT_MEMBER(mp_subscript, TPyThinList::Subscript), + INIT_MEMBER(mp_ass_subscript, nullptr), +}; + +PyNumberMethods ThinListNumbering = { + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_divide, nullptr), +#endif + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_bool, TPyThinList::Bool), +#else + INIT_MEMBER(nb_nonzero, TPyThinList::Bool), +#endif + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_coerce, nullptr), +#endif + INIT_MEMBER(nb_int, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_reserved, nullptr), +#else + INIT_MEMBER(nb_long, nullptr), +#endif + INIT_MEMBER(nb_float, nullptr), +#if PY_MAJOR_VERSION < 3 + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), +#endif + + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), +#endif +}; + +static PyMethodDef TPyThinListMethods[] = { + { "__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr }, + { "to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, + { "reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__ }, + { "take", TPyThinList::Take, METH_O, take__doc__ }, + { "skip", TPyThinList::Skip, METH_O, skip__doc__ }, + { "has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, + { "has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__ }, + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyThinListType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TList"), + INIT_MEMBER(tp_basicsize , sizeof(TPyThinList)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyThinList::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyThinList::Repr), + INIT_MEMBER(tp_as_number , &ThinListNumbering), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , &ThinListMapping), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TList object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , TPyThinList::Iter), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , TPyThinListMethods), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyThinListIterator interface +////////////////////////////////////////////////////////////////////////////// +struct TPyThinListIterator +{ + PyObject_HEAD; + TPyThinList::TPtr List; + const NUdf::TUnboxedValue* Elements; + Py_ssize_t Count; + + inline static TPyThinListIterator* Cast(PyObject* o) { + return reinterpret_cast<TPyThinListIterator*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TListIterator>").Release(); + } + + static PyObject* New(TPyThinList* list); + static PyObject* Next(PyObject* self); +}; + +PyTypeObject PyThinListIteratorType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TListIterator"), + INIT_MEMBER(tp_basicsize , sizeof(TPyThinListIterator)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyThinListIterator::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyThinListIterator::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.ListIterator object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyThinListIterator::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +////////////////////////////////////////////////////////////////////////////// +// TPyThinList implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyThinList::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + const NUdf::TUnboxedValue* elements, + Py_ssize_t length, + Py_ssize_t step) +{ + TPyThinList* list = new TPyThinList; + PyObject_INIT(list, &PyThinListType); + + list->CastCtx = castCtx; + list->ItemType = itemType; + list->Value.Set(castCtx->PyCtx, value); + list->Elements = elements; + list->Length = length; + list->Step = step; + + return reinterpret_cast<PyObject*>(list); +} + +PyObject* TPyThinList::Repr(PyObject*) +{ + return PyRepr("<yql.TList>").Release(); +} + +PyObject* TPyThinList::Iter(PyObject* self) +{ + PY_TRY { + TPyThinList* list = Cast(self); + return TPyThinListIterator::New(list); + } PY_CATCH(nullptr) +} + +Py_ssize_t TPyThinList::Len(PyObject* self) +{ + return Cast(self)->Length; +} + +PyObject* TPyThinList::Subscript(PyObject* self, PyObject* slice) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const auto vb = list->CastCtx->ValueBuilder; + + if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); + + if (index < 0) { + index += list->Length; + } + + if (index < 0 || index >= list->Length) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } + + if (list->Step > 0) { + index *= list->Step; + } else { + index = list->Length - ++index; + index *= -list->Step; + } + + return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release(); + } + + if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; + + if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) { + return nullptr; + } + + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } + + if (size > 0) { + const Py_ssize_t skip = list->Step * (list->Step > 0 ? + (step > 0 ? start : start + step * (size - 1)): + (step > 0 ? stop : start + 1) - list->Length); + + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step); + } else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); + } + } + + const TPyObjectPtr type = PyObject_Type(slice); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } PY_CATCH(nullptr) +} + +#undef SLICEOBJ + +PyObject* TPyThinList::ToIndexDict(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())); + return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release(); + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::Reversed(PyObject* self, PyObject* /* arg */) +{ + PY_TRY { + TPyThinList* list = Cast(self); + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step); + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::Take(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const Py_ssize_t count = CastIndex(arg, "take"); + if (count < 0) { + return nullptr; + } + + if (const auto size = std::min(count, list->Length)) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step); + } else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); + } + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::Skip(PyObject* self, PyObject* arg) +{ + PY_TRY { + TPyThinList* list = Cast(self); + const Py_ssize_t count = CastIndex(arg, "skip"); + if (count < 0) { + return nullptr; + } + + if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step); + } else { + return New(list->CastCtx, list->ItemType); + } + } PY_CATCH(nullptr) +} + +PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */) +{ + Py_RETURN_TRUE; +} + +PyObject* TPyThinList::HasItems(PyObject* self, PyObject* /* arg */) +{ + if (Cast(self)->Length > 0) + Py_RETURN_TRUE; + else + Py_RETURN_FALSE; +} + +int TPyThinList::Bool(PyObject* self) +{ + return Cast(self)->Length > 0 ? 1 : 0; +} + +////////////////////////////////////////////////////////////////////////////// +// TPyThinListIterator implementation +////////////////////////////////////////////////////////////////////////////// +PyObject* TPyThinListIterator::New(TPyThinList* list) +{ + TPyThinListIterator* listIter = new TPyThinListIterator; + PyObject_INIT(listIter, &PyThinListIteratorType); + listIter->List.Reset(list); + listIter->Elements = list->Step > 0 ? list->Elements - list->Step : list->Elements - list->Length * list->Step; + listIter->Count = list->Length; + return reinterpret_cast<PyObject*>(listIter); +} + +PyObject* TPyThinListIterator::Next(PyObject* self) +{ + PY_TRY { + TPyThinListIterator* iter = Cast(self); + + if (iter->Count) { + --iter->Count; + TPyThinList* list = iter->List.Get(); + return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release(); + } + + return nullptr; + } PY_CATCH(nullptr) +} + +TPyObjectPtr ToPyLazyList( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) +{ + if (const auto elements = value.GetElements()) { + return TPyThinList::New(castCtx, itemType, value.AsBoxed(), elements, value.GetListLength()); + } else { + return TPyLazyList::New(castCtx, itemType, value.AsBoxed()); + } +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list.h b/yql/essentials/udfs/common/python/bindings/py_list.h new file mode 100644 index 00000000000..9db170a7954 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_list.h @@ -0,0 +1,33 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyLazyListIteratorType; +extern PyTypeObject PyLazyListType; +extern PyTypeObject PyThinListIteratorType; +extern PyTypeObject PyThinListType; + +TPyObjectPtr ToPyLazyList( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyLazyGenerator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr callableObj); + +NKikimr::NUdf::TUnboxedValue FromPyLazyIterable( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iterableObj); + +NKikimr::NUdf::TUnboxedValue FromPyLazyIterator( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iteratorObj); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp new file mode 100644 index 00000000000..f16165fc54b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp @@ -0,0 +1,1025 @@ +#include "ut3/py_test_engine.h" + +#include <yql/essentials/public/udf/udf_ut_helpers.h> + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyListTest) { + Y_UNIT_TEST(FromPyEmptyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return []", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 0); + }); + } + + Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return [1, 2, 3, 4]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 4); + const auto it = value.GetListIterator(); + NUdf::TUnboxedValue item; + + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4); + UNIT_ASSERT(false == it.Next(item)); + }); + } + + Y_UNIT_TEST(ToPyEmptyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 0\n"); + } + + Y_UNIT_TEST(ToPyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 3\n" + " assert all(isinstance(v, float) for v in value)\n" + " assert list(value) == [0.1, 0.2, 0.3]\n"); + } + + Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return (1, 2, 3)", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 3); + + ui32 expected = 1; + auto it = value.GetListIterator(); + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + }); + } + + Y_UNIT_TEST(ThinListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0.1\n" + " assert next(it) == 0.2\n" + " assert next(it) == 0.3\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__reversed__' in dir(v)\n" + " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n" + ); + } + + Y_UNIT_TEST(LazyListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " assert '__reversed__' in dir(v)\n" + " it = iter(reversed(v))\n" + " assert next(it) == 2\n" + " assert next(it) == 1\n" + " assert next(it) == 0\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0\n" + " assert next(it) == 1\n" + " assert next(it) == 2\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(ThinListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n" + ); + } + + Y_UNIT_TEST(ThinListSliceOverReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(x):\n" + " e = list(reversed(range(0, 10)))\n" + " v = reversed(x)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n" + ); + } + + Y_UNIT_TEST(LazyListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert len(v) == len(e)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[3:]) == e[3:]\n" + " assert list(v[-2:]) == e[-2:]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n" + ); + } + + Y_UNIT_TEST(ThinListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" + ); + } + + Y_UNIT_TEST(LazyListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" + ); + } + + Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n" + ); + } + + Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n" + ); + } + + Y_UNIT_TEST(ThinListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n" + ); + } + + Y_UNIT_TEST(LazyListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n" + ); + } + + Y_UNIT_TEST(ThinListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = range(0, 10)\n" + " assert '__len__' in dir(v)\n" + " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n" + " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n" + " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n" + ); + } + + Y_UNIT_TEST(ThinListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{ + NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U) + }}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert len(v) == len(e)\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } + + Y_UNIT_TEST(LazyListToIndexDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert len(d) == 3\n" + " assert d[0] == 3\n" + " assert d[1] == 4\n" + " assert d[2] == 5\n" + " assert 3 not in d"); + } + + Y_UNIT_TEST(LazyListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + NUdf::TUnboxedValue *items = nullptr; + return vb.NewArray(1U, items); + }, + "def Test(value):\n" + " assert value\n" + ); + } + + Y_UNIT_TEST(LazyListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " assert not value\n" + ); + } + + Y_UNIT_TEST(ThinListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " assert value\n" + ); + } + + Y_UNIT_TEST(ThinListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert not value\n" + ); + } + + Y_UNIT_TEST(LazyListHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert b\n"); + } + + Y_UNIT_TEST(LazyListEmptyHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert not b\n"); + } + + Y_UNIT_TEST(LazyIndexDictContains) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert 0 in d\n" + " assert 1 in d\n" + " assert 2 in d\n" + " assert 3 not in d\n" + " assert -1 not in d"); + } + + Y_UNIT_TEST(LazyIndexDictIter) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " i, j = 0, 3\n" + " for k, v in d.items():\n" + " assert i == k\n" + " assert j == v\n" + " i, j = i+1, j+1"); + } + + Y_UNIT_TEST(LazyIndexDictGet) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert d.get(1) == 4\n" + " assert d.get(5) == None\n" + " assert d.get(5, 10) == 10\n"); + } + + Y_UNIT_TEST(FromPyGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); + + const auto it = value.GetListIterator(); + ui32 expected = 0; + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + }); + } + + Y_UNIT_TEST(FromPyIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" +#if PY_MAJOR_VERSION >= 3 + " return range(10)\n", +#else + " return xrange(10)\n", +#endif + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); + + const auto it = value.GetListIterator(); + ui32 expected = 0U; + for (NUdf::TUnboxedValue item; it.Next(item);) { + UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++); + } + + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + UNIT_ASSERT(value.HasFastListLength()); + }); + } + + Y_UNIT_TEST(FromPyCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T([1, 2])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasListItems()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 2); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 2); + } + + UNIT_ASSERT(false == it.Skip()); + }); + } + + Y_UNIT_TEST(FromPyIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " return iter(range(2))\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + + UNIT_ASSERT(false == it.Skip()); + }); + } + + Y_UNIT_TEST(FromPyGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " yield 0\n" + " yield 1\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + + UNIT_ASSERT(false == it.Skip()); + }); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp new file mode 100644 index 00000000000..c55e25891d2 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp @@ -0,0 +1,359 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +#define PY_CHECKER(Name, PyType, AsType, Type) \ + struct TPy##Name##Checker { \ + void operator()(PyObject* pyVal, Type expected) { \ + UNIT_ASSERT(Py##PyType##_Check(pyVal)); \ + Type val = Py##PyType##_As##AsType(pyVal); \ + UNIT_ASSERT(val != static_cast<Type>(-1) || !PyErr_Occurred()); \ + UNIT_ASSERT_EQUAL(val, expected); \ + } \ + }; + +#if PY_MAJOR_VERSION >= 3 +PY_CHECKER(Long, Long, Long, long) +#else +PY_CHECKER(Int, Int, Long, long) +#endif + +#ifdef HAVE_LONG_LONG +PY_CHECKER(LLong, Long, LongLong, long long) +PY_CHECKER(Ulong, Long, UnsignedLongLong, unsigned long long) +#else +PY_CHECKER(LLong, Long, Long, long) +PY_CHECKER(Ulong, Long, UnsignedLong, unsigned long) +#endif + +PY_CHECKER(Float, Float, Double, long) + +#undef PY_CHECKER + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyNumberTest) { + template <typename T, typename TPyChecker> + void TestCastsInRange(T begin, T end) { + for (T i = begin; i < end; i++) { + TPyObjectPtr pyVal = PyCast<T>(i); + UNIT_ASSERT(pyVal.Get() != nullptr); + + TPyChecker c; + c(pyVal.Get(), i); + + T cppVal = PyCast<T>(pyVal.Get()); + UNIT_ASSERT_EQUAL(cppVal, i); + } + } + + template <typename T, typename TPyChecker, int range = 10> + void TestSignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyChecker>(-range, range); + TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>()); + } + + template <typename T, typename TPyDownChecker, + typename TPyUpChecker = TPyDownChecker, int range = 10> + void TestUnsignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>()); + } + + Y_UNIT_TEST(Bool) { + TPythonTestEngine engine; + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true); + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false); + + TPyObjectPtr list = PyList_New(0); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false); + bool res1; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1)); + UNIT_ASSERT_EQUAL(res1, false); + + PyList_Append(list.Get(), Py_None); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true); + bool res2; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2)); + UNIT_ASSERT_EQUAL(res2, true); + } + + Y_UNIT_TEST(Float) { + TestSignedCasts<float, TPyFloatChecker>(); + } + + Y_UNIT_TEST(Double) { + TestUnsignedCasts<double, TPyFloatChecker>(); + } + + Y_UNIT_TEST(I64) { + TestSignedCasts<i64, TPyLLongChecker>(); + } + + Y_UNIT_TEST(Ui64) { + TestUnsignedCasts<ui64, TPyUlongChecker>(); + } + +#if PY_MAJOR_VERSION >= 3 + Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyLongChecker>(); + } + + Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyLongChecker>(); + } + + Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyLongChecker>(); + } + + Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyLongChecker>(); + } + + Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyLongChecker>(); + } + + Y_UNIT_TEST(Ui32) { + TestUnsignedCasts<ui32, TPyLongChecker>(); + } + Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyLong_FromLong(expected); + + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } + + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } + + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + } + +#else + Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyIntChecker>(); + } + + Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyIntChecker>(); + } + + Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyIntChecker>(); + } + + Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyIntChecker>(); + } + + Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyIntChecker>(); + } + + Y_UNIT_TEST(Ui32) { + if (sizeof(long) == 4) { + TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>(); + } else { + TestUnsignedCasts<ui32, TPyIntChecker>(); + } + } + + Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyInt_FromLong(expected); + + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } + + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } + + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + } +#endif + + + Y_UNIT_TEST(ImplicitLongCasts) { + TPythonTestEngine engine; + i64 expected = static_cast<i64>(Max<ui32>()) + 10; + TPyObjectPtr pyLong; + #ifdef HAVE_LONG_LONG + pyLong = PyLong_FromLongLong(expected); + #else + pyLong = PyLong_FromLong(expected) + #endif + + { // signed + i64 actual = PyCast<i64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); + } + + { // unsigned + ui64 actual = PyCast<ui64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + } + + { // to float + float f = PyCast<float>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyLong.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyLong.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i8 tmp; + UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp)); + } + + Y_UNIT_TEST(HugeLongOverflow) { + TPythonTestEngine engine; + TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0); + TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()"); + UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number + + ui64 resUI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64)); + + i64 resI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64)); + + ui32 resUI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32)); + + i32 resI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32)); + + ui16 resUI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16)); + + i16 resI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16)); + + ui8 resUI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8)); + + i8 resI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8)); + } + + Y_UNIT_TEST(ImplicitFloatCasts) { + TPythonTestEngine engine; + double expected = 3.14159; + TPyObjectPtr pyFloat = PyFloat_FromDouble(expected); + + { // to float + float f = PyCast<float>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyFloat.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + } + + { // to double + double d = PyCast<double>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyFloat.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + } + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp new file mode 100644 index 00000000000..d13ea65da64 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp @@ -0,0 +1,56 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(FromPyNone) { + Y_UNIT_TEST(FromPyNone) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return None", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value); + }); + } + + Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return 42", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42); + }); + } + + Y_UNIT_TEST(ToPyNone) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(); + }, + "def Test(value):\n" + " assert value == None\n"); + } + + Y_UNIT_TEST(ToPyFilledOptional) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + const TOptionalType* optType = + static_cast<const TOptionalType*>(type); + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items); + items[0] = vb.NewString("test string"); + items[1] = NUdf::TUnboxedValuePod(false); + return NUdf::TUnboxedValue(tuple); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 2\n" + " assert value == ('test string', False)\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_ptr.h b/yql/essentials/udfs/common/python/bindings/py_ptr.h new file mode 100644 index 00000000000..704629b86b7 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_ptr.h @@ -0,0 +1,69 @@ +#pragma once + +#include <Python.h> // PyObject + +#include <yql/essentials/public/udf/udf_ptr.h> + +namespace NPython { + +template <typename T> +class TPyPtrOps +{ +public: + static inline void Ref(T* t) { + Y_ASSERT(t); + Py_INCREF(t); + } + + static inline void UnRef(T* t) { + Y_ASSERT(t); + Py_DECREF(t); + } + + static inline ui32 RefCount(const T* t) { + Y_ASSERT(t); + return t->ob_refcnt; + } +}; + +class TPyObjectPtr: + public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>> +{ + using TSelf = NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>; + +public: + inline TPyObjectPtr() + { + } + + inline TPyObjectPtr(PyObject* p) + : TSelf(p, STEAL_REF) // do not increment refcounter by default + { + } + + inline TPyObjectPtr(PyObject* p, AddRef) + : TSelf(p) + { + } + + inline void ResetSteal(PyObject* p) { + TSelf::Reset(p, STEAL_REF); + } + + inline void ResetAddRef(PyObject* p) { + TSelf::Reset(p); + } + + inline void Reset() { + TSelf::Reset(); + } + + template <class T> + inline T* GetAs() const { + return reinterpret_cast<T*>(Get()); + } + + void Reset(PyObject* p) = delete; +}; + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.cpp b/yql/essentials/udfs/common/python/bindings/py_resource.cpp new file mode 100644 index 00000000000..ebb096029ad --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_resource.cpp @@ -0,0 +1,116 @@ +#include "py_resource.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> + +using namespace NKikimr; + +namespace NPython { +namespace { + +void DestroyResourceCapsule(PyObject* obj) { + if (auto* ptr = PyCapsule_GetPointer(obj, ResourceCapsuleName)) { + delete reinterpret_cast<NUdf::TUnboxedValue*>(ptr); + } +} + +///////////////////////////////////////////////////////////////////////////// +// TResource +///////////////////////////////////////////////////////////////////////////// +class TResource final: public NUdf::TBoxedValue +{ +public: + TResource(PyObject* value, const NUdf::TStringRef& tag) + : Value_(value, TPyObjectPtr::ADD_REF), Tag_(tag) + { + } + + ~TResource() { + TPyGilLocker lock; + Value_.Reset(); + } + +private: + NUdf::TStringRef GetResourceTag() const override { + return Tag_; + } + + void* GetResource() final { + return Value_.Get(); + } + + TPyObjectPtr Value_; + const NUdf::TStringRef Tag_; +}; + +} // namespace + +const char ResourceCapsuleName[] = "YqlResourceCapsule"; + +TPyObjectPtr ToPyResource( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ +// TODO NILE-43 +#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) + NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); + auto tag = inpector.GetTag(); + if (tag == ctx->PyCtx->ResourceTag) { + PyObject* p = reinterpret_cast<PyObject*>(value.GetResource()); + return TPyObjectPtr(p, TPyObjectPtr::ADD_REF); + } +#else + Y_UNUSED(type); + if (value.GetResourceTag() == ctx->PyCtx->ResourceTag) { + PyObject* p = reinterpret_cast<PyObject*>(value.GetResource()); + return TPyObjectPtr(p, TPyObjectPtr::ADD_REF); + } +#endif + auto resource = MakeHolder<NUdf::TUnboxedValue>(value); + + return PyCapsule_New(resource.Release(), ResourceCapsuleName, &DestroyResourceCapsule); +} + +NUdf::TUnboxedValue FromPyResource( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ +// TODO NILE-43 +#if false && UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) + NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); + auto tag = inpector.GetTag(); + if (tag == ctx->PyCtx->ResourceTag) { + return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag)); + } + + if (PyCapsule_IsValid(value, ResourceCapsuleName)) { + auto* resource = reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName)); + auto valueTag = resource->GetResourceTag(); + if (valueTag != tag) { + throw yexception() << "Mismatch of resource tag, expected: " + << tag << ", got: " << valueTag; + } + + return *resource; + } + + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is not a valid resource with tag " << tag; +#else + Y_UNUSED(type); + if (PyCapsule_CheckExact(value)) { + if (!PyCapsule_IsValid(value, ResourceCapsuleName)) { + throw yexception() << "Python object " << PyObjectRepr(value) << " is not a valid resource capsule"; + } + return *reinterpret_cast<NUdf::TUnboxedValue*>(PyCapsule_GetPointer(value, ResourceCapsuleName)); + } + return NUdf::TUnboxedValuePod(new TResource(value, ctx->PyCtx->ResourceTag)); +#endif +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.h b/yql/essentials/udfs/common/python/bindings/py_resource.h new file mode 100644 index 00000000000..b46b84c84b1 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_resource.h @@ -0,0 +1,20 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern const char ResourceCapsuleName[]; + +TPyObjectPtr ToPyResource( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyResource( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp new file mode 100644 index 00000000000..aaa9899c4f1 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp @@ -0,0 +1,81 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +extern const char SimpleDataTag[] = "SimpleData"; +extern const char PythonTestTag[] = PYTHON_TEST_TAG; + +struct TSimpleData { + TString Name; + ui32 Age; + + TSimpleData(const TString& name, ui32 age) + : Name(name) + , Age(age) + {} +}; + +using TSimpleDataResource = NUdf::TBoxedResource<TSimpleData, SimpleDataTag>; + +Y_UNIT_TEST_SUITE(TPyResourceTest) { + Y_UNIT_TEST(MkqlObject) { + TPythonTestEngine engine; + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99)); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert type(value).__name__ == 'PyCapsule'\n" + " return value\n"); + UNIT_ASSERT(!!pyValue); + + engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>( + pyValue.Get(), + "import yql\n" + "\n" + "def Test(value):\n" + " return value\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value);; + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag); + auto simpleData = + reinterpret_cast<TSimpleData*>(value.GetResource()); + UNIT_ASSERT_EQUAL(simpleData->Age, 99); + UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel"); + }); + } + + Y_UNIT_TEST(PythonObject) { + TPythonTestEngine engine; + NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>( + "class CustomStruct:\n" + " def __init__(self, name, age):\n" + " self.name = name\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return CustomStruct('Jamel', 97)\n"); + UNIT_ASSERT(mkqlValue); + UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag); + + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>( + [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return mkqlValue; + }, + "def Test(value):\n" + " assert isinstance(value, CustomStruct)\n" + " assert value.age, 97\n" + " assert value.name, 'Jamel'\n"); + UNIT_ASSERT(!!pyValue); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.cpp b/yql/essentials/udfs/common/python/bindings/py_stream.cpp new file mode 100644 index 00000000000..3d9aecdc00b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_stream.cpp @@ -0,0 +1,343 @@ +#include "py_stream.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { + +// will be initialized in InitYqlModule() +PyObject* PyYieldIterationException = nullptr; + +////////////////////////////////////////////////////////////////////////////// +// TPyStream +////////////////////////////////////////////////////////////////////////////// +struct TPyStream { + PyObject_HEAD; + TPyCastContext::TPtr CastCtx; + TPyCleanupListItem<NUdf::IBoxedValuePtr> Value; + const NUdf::TType* ItemType; + + inline static TPyStream* Cast(PyObject* o) { + return reinterpret_cast<TPyStream*>(o); + } + + inline static void Dealloc(PyObject* self) { + delete Cast(self); + } + + inline static PyObject* Repr(PyObject* self) { + Y_UNUSED(self); + return PyRepr("<yql.TStream>").Release(); + } + + static PyObject* New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value); + + static PyObject* Next(PyObject* self); +}; + +#if PY_MAJOR_VERSION >= 3 +#define Py_TPFLAGS_HAVE_ITER 0 +#endif + +PyTypeObject PyStreamType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.TStream"), + INIT_MEMBER(tp_basicsize , sizeof(TPyStream)), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , TPyStream::Dealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , TPyStream::Repr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc , "yql.TStream object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , PyObject_SelfIter), + INIT_MEMBER(tp_iternext , TPyStream::Next), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +PyObject* TPyStream::New( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value) +{ + TPyStream* stream = new TPyStream; + PyObject_INIT(stream, &PyStreamType); + stream->CastCtx = castCtx; + stream->Value.Set(castCtx->PyCtx, value); + + const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type); + stream->ItemType = inspector.GetItemType(); + + return reinterpret_cast<PyObject*>(stream); +} + +PyObject* TPyStream::Next(PyObject* self) { + PY_TRY { + TPyStream* stream = Cast(self); + + NUdf::TUnboxedValue item; + auto status = NUdf::TBoxedValueAccessor::Fetch(*stream->Value.Get(), item); + + switch (status) { + case NUdf::EFetchStatus::Ok: + return ToPyObject(stream->CastCtx, stream->ItemType, item) + .Release(); + case NUdf::EFetchStatus::Finish: + return nullptr; + case NUdf::EFetchStatus::Yield: + PyErr_SetNone(PyYieldIterationException); + return nullptr; + default: + Y_ABORT("Unknown stream status"); + } + } PY_CATCH(nullptr) +} + +////////////////////////////////////////////////////////////////////////////// +// TStreamOverPyIter +////////////////////////////////////////////////////////////////////////////// +class TStreamOverPyIter final: public NUdf::TBoxedValue { +public: + TStreamOverPyIter( + TPyCastContext::TPtr castCtx, + const NUdf::TType* itemType, + TPyObjectPtr pyIter, + TPyObjectPtr pyIterable, + TPyObjectPtr pyGeneratorCallable, + TPyObjectPtr pyGeneratorCallableClosure, + TPyObjectPtr pyGeneratorCallableArgs) + : CastCtx_(std::move(castCtx)) + , ItemType_(itemType) + , PyIter_(std::move(pyIter)) + , PyIterable_(std::move(pyIterable)) + , PyGeneratorCallable_(std::move(pyGeneratorCallable)) + , PyGeneratorCallableClosure_(std::move(pyGeneratorCallableClosure)) + , PyGeneratorCallableArgs_(std::move(pyGeneratorCallableArgs)) + { + } + + ~TStreamOverPyIter() { + TPyGilLocker lock; + PyIter_.Reset(); + PyIterable_.Reset(); + PyGeneratorCallableArgs_.Reset(); + PyGeneratorCallableClosure_.Reset(); + PyGeneratorCallable_.Reset(); + } + +private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + try { + TPyGilLocker lock; + TPyObjectPtr next(PyIter_Next(PyIter_.Get())); + if (next) { + if (PyErr_GivenExceptionMatches(next.Get(), PyYieldIterationException)) { + return NUdf::EFetchStatus::Yield; + } + + result = FromPyObject(CastCtx_, ItemType_, next.Get()); + return NUdf::EFetchStatus::Ok; + } + + if (PyObject* ex = PyErr_Occurred()) { + if (PyErr_GivenExceptionMatches(ex, PyYieldIterationException)) { + PyErr_Clear(); + TPyObjectPtr iterable; + TPyObjectPtr iter; + if (PyIterable_) { + PyIter_.Reset(); + iterable = PyIterable_; + } else if (PyGeneratorCallable_) { + PyIter_.Reset(); + TPyObjectPtr result(PyObject_CallObject(PyGeneratorCallable_.Get(), PyGeneratorCallableArgs_.Get())); + if (!result) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).data()); + } + + if (PyGen_Check(result.Get())) { + iterable = std::move(result); + } else if (PyIter_Check(result.Get())) { + iter = std::move(result); + } else { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Expected iterator or generator, but got " << PyObjectRepr(result.Get())).data()); + } + } else { + return NUdf::EFetchStatus::Yield; + } + + if (!iter) { + iter.ResetSteal(PyObject_GetIter(iterable.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + } + + PyIter_.ResetAddRef(iter.Get()); + return NUdf::EFetchStatus::Yield; + } + + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + return NUdf::EFetchStatus::Finish; + } + catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).data()); + } + } + +private: + TPyCastContext::TPtr CastCtx_; + const NUdf::TType* ItemType_; + TPyObjectPtr PyIter_; + TPyObjectPtr PyIterable_; + TPyObjectPtr PyGeneratorCallable_; + TPyObjectPtr PyGeneratorCallableClosure_; + TPyObjectPtr PyGeneratorCallableArgs_; +}; + + +////////////////////////////////////////////////////////////////////////////// +// public functions +////////////////////////////////////////////////////////////////////////////// +TPyObjectPtr ToPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value) +{ + return TPyStream::New(castCtx, type, value.AsBoxed()); +} + +NKikimr::NUdf::TUnboxedValue FromPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const TPyObjectPtr& value, + const TPyObjectPtr& originalCallable, + const TPyObjectPtr& originalCallableClosure, + const TPyObjectPtr& originalCallableArgs +) +{ + const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type); + const NUdf::TType* itemType = inspector.GetItemType(); + + if (PyGen_Check(value.Get())) { + TPyObjectPtr iter(PyObject_GetIter(value.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, + originalCallable, originalCallableClosure, originalCallableArgs)); + } + + if (PyIter_Check(value.Get()) +#if PY_MAJOR_VERSION < 3 + // python 2 iterators must also implement "next" method + && 1 == PyObject_HasAttrString(value.Get(), "next") +#endif + ) { + TPyObjectPtr iter(value.Get(), TPyObjectPtr::ADD_REF); + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, + originalCallable, originalCallableClosure, originalCallableArgs)); + } + + // assume that this function will returns generator + if (PyCallable_Check(value.Get())) { + TPyObjectPtr generator(PyObject_CallObject(value.Get(), nullptr)); + if (!generator || !PyGen_Check(generator.Get())) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected generator as a result of function call").data()); + } + TPyObjectPtr iter(PyObject_GetIter(generator.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + + TPyObjectPtr callableClosure; + if (PyFunction_Check(value.Get())) { + PyObject* closure = PyFunction_GetClosure(value.Get()); + if (closure) { + callableClosure = TPyObjectPtr(closure, TPyObjectPtr::ADD_REF); + } + } + + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, + originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr)); + } + + // must be after checking for callable + if (PySequence_Check(value.Get()) || PyObject_HasAttrString(value.Get(), "__iter__")) { + TPyObjectPtr iter(PyObject_GetIter(value.Get())); + if (!iter) { + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).data()); + } + return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), originalCallable ? value : nullptr, nullptr, nullptr, nullptr)); + } + + UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected iterator, generator, generator factory, " + "or iterable object, but got " << PyObjectRepr(value.Get())).data()); +} + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.h b/yql/essentials/udfs/common/python/bindings/py_stream.h new file mode 100644 index 00000000000..f677e23930d --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_stream.h @@ -0,0 +1,24 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyStreamType; +extern PyObject* PyYieldIterationException; + +TPyObjectPtr ToPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyStream( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const TPyObjectPtr& value, + const TPyObjectPtr& originalCallable, + const TPyObjectPtr& originalCallableClosure, + const TPyObjectPtr& originalCallableArgs); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp new file mode 100644 index 00000000000..4a24dd1a138 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp @@ -0,0 +1,208 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyStreamTest) { + void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; + + while (true) { + status = value.Fetch(item); + if (status != NUdf::EFetchStatus::Ok) break; + + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish); + UNIT_ASSERT_EQUAL(expected, 10); + } + + struct TTestStream final: NUdf::TBoxedValue { + TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>()) + : Current_(0) + , YieldOn_(yieldOn) + , MaxValue_(maxValue) + { + } + + private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + if (Current_ == YieldOn_) { + return NUdf::EFetchStatus::Yield; + } else if (Current_ >= MaxValue_) { + return NUdf::EFetchStatus::Finish; + } + result = NUdf::TUnboxedValuePod(Current_++); + return NUdf::EFetchStatus::Ok; + } + + ui32 Current_, YieldOn_, MaxValue_; + }; + + Y_UNIT_TEST(FromGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return iter(range(10))\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" +#if PY_MAJOR_VERSION >= 3 + " return range(10)\n", +#else + " return xrange(10)\n", +#endif + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T(list(range(10)))\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(FromList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + Ui32StreamValidator); + } + + Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value):\n" + " import yql\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"); + } + + Y_UNIT_TEST(ToPythonAndBackAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value): return value", + Ui32StreamValidator + ); + } + + Y_UNIT_TEST(YieldingStreamFromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "import yql\n" + "def Test():\n" + " yield 0\n" + " yield 1\n" + " yield yql.TYieldIteration\n" + " yield 2\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; + + while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield); + UNIT_ASSERT_EQUAL(expected, 2); + }); + } + + Y_UNIT_TEST(YieldingStreamFromCpp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(5, 2)); + }, + "import yql\n" + "def Test(value):\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert next(value) == 0\n" + " assert next(value) == 1\n" + " try:\n" + " next(value)\n" + " except yql.TYieldIteration:\n" + " pass\n" + " else:\n" + " assert False, 'Expected yql.TYieldIteration'\n"); + } + + Y_UNIT_TEST(FromCppListIterator) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue *items = nullptr; + const auto a = vb.NewArray(10U, items); + ui32 i = 0U; + std::generate_n(items, 10U, [&i](){ return NUdf::TUnboxedValuePod(i++); }); + return a; + }, + "def Test(value): return iter(value)", + Ui32StreamValidator + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp new file mode 100644 index 00000000000..444b7b0c5b0 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp @@ -0,0 +1,98 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyStringTest) { + template <typename TStringType> + void TestStringCasts() { + TStringType testStr1(TStringBuf("test string")); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); + + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + TStringType testStr2(TStringBuf("another test string")); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); + } + + template <typename TStringType> + void TestBinaryStringCasts() { + TStringType testStr1(TStringBuf("\xa0\xa1"sv)); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); + + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv)); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); + } + + template <typename TStringType> + void TestUtf8StringCasts() { + const TStringType testStr1(TStringBuf("тестовая строка")); + TStringBuf strBuf1 = testStr1; + const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data()); + const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get()); + const auto value = PyCast<TStringType>(utf8.Get()); + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + const TStringType testStr2(TStringBuf("еще одна тестовая строка")); + TStringBuf strBuf2 = testStr2; + const auto str2 = ToPyUnicode<TStringType>(testStr2); + + UNIT_ASSERT(PyUnicode_Check(str2.Get())); + + Py_ssize_t size = 0U; +#if PY_MAJOR_VERSION >= 3 + const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size); +#else + char* buf = nullptr; + const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get()); + const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); +#endif + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); + } + + Y_UNIT_TEST(Simple) { + TestStringCasts<TString>(); + TestStringCasts<TStringBuf>(); + TestStringCasts<NUdf::TStringRef>(); + } + + Y_UNIT_TEST(Utf8) { + TestUtf8StringCasts<TString>(); + TestUtf8StringCasts<TStringBuf>(); + TestUtf8StringCasts<NUdf::TStringRef>(); + } + + Y_UNIT_TEST(Binary) { + TestBinaryStringCasts<TString>(); + TestBinaryStringCasts<TStringBuf>(); + TestBinaryStringCasts<NUdf::TStringRef>(); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.cpp b/yql/essentials/udfs/common/python/bindings/py_struct.cpp new file mode 100644 index 00000000000..a4ab99ee32c --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_struct.cpp @@ -0,0 +1,188 @@ +#include "py_struct.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/string/cast.h> +#include <util/string/join.h> +#include <util/string/builder.h> + +using namespace NKikimr; + +namespace NPython { + +namespace { + +TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKikimr::NUdf::TType* type, const NUdf::TStructTypeInspector& inspector) +{ + const auto it = ctx->StructTypes.emplace(type, TPyObjectPtr()); + if (it.second) { +#if PY_MAJOR_VERSION >= 3 + std::vector<PyStructSequence_Field> fields(inspector.GetMembersCount() + 1U); + for (ui32 i = 0U; i < inspector.GetMembersCount(); ++i) { + fields[i] = {const_cast<char*>(inspector.GetMemberName(i).Data()), nullptr}; + } + fields.back() = {nullptr, nullptr}; + + PyStructSequence_Desc desc = { + INIT_MEMBER(name, "yql.Struct"), + INIT_MEMBER(doc, nullptr), + INIT_MEMBER(fields, fields.data()), + INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount())) + }; + + const auto typeObject = new PyTypeObject(); + if (0 > PyStructSequence_InitType2(typeObject, &desc)) { + throw yexception() << "can't create struct type: " << GetLastErrorAsString(); + } + + it.first->second.ResetSteal(reinterpret_cast<PyObject*>(typeObject)); + } + + const TPyObjectPtr object = PyStructSequence_New(it.first->second.GetAs<PyTypeObject>()); +#else + const auto className = TString("yql.Struct_") += ToString(ctx->StructTypes.size()); + PyObject* metaclass = (PyObject *) &PyClass_Type; + const TPyObjectPtr name = PyRepr(TStringBuf(className)); + const TPyObjectPtr bases = PyTuple_New(0); + const TPyObjectPtr dict = PyDict_New(); + + TPyObjectPtr newClass = PyObject_CallFunctionObjArgs( + metaclass, name.Get(), bases.Get(), dict.Get(), + nullptr); + if (!newClass) { + throw yexception() << "can't create new type: " << GetLastErrorAsString(); + } + + it.first->second = std::move(newClass); + } + + Y_UNUSED(inspector); + const TPyObjectPtr object = PyInstance_New(it.first->second.Get(), nullptr, nullptr); +#endif + if (!object) { + throw yexception() << "can't struct instance: " << GetLastErrorAsString(); + } + return object; +} + +} + +TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const TPyObjectPtr object = CreateNewStrucInstance(ctx, type, inspector); + const auto membersCount = inspector.GetMembersCount(); + + if (auto ptr = value.GetElements()) { + for (Py_ssize_t i = 0; i < membersCount; ++i) { +#if PY_MAJOR_VERSION >= 3 + auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++); + PyStructSequence_SetItem(object.Get(), i, item.Release()); +#else + const TStringBuf name = inspector.GetMemberName(i); + const auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++); + if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { + throw yexception() + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); + } +#endif + } + } else { + for (Py_ssize_t i = 0; i < membersCount; ++i) { +#if PY_MAJOR_VERSION >= 3 + auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i)); + PyStructSequence_SetItem(object.Get(), i, item.Release()); +#else + const TStringBuf name = inspector.GetMemberName(i); + const auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i)); + if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { + throw yexception() + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); + } +#endif + } + } + + return object; +} + +NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value) +{ + NUdf::TUnboxedValue* items = nullptr; + const NUdf::TStructTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto membersCount = inspector.GetMembersCount(); + auto mkqlStruct = ctx->ValueBuilder->NewArray(membersCount, items); + + TVector<TString> errors; + if (PyDict_Check(value)) { + for (ui32 i = 0; i < membersCount; i++) { + TStringBuf memberName = inspector.GetMemberName(i); + auto memberType = inspector.GetMemberType(i); + // borrowed reference - no need to manage ownership + PyObject* item = PyDict_GetItemString(value, memberName.data()); + if (!item) { + TPyObjectPtr bytesMemberName = PyBytes_FromStringAndSize(memberName.data(), memberName.size()); + item = PyDict_GetItem(value, bytesMemberName.Get()); + } + if (!item) { + if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional) { + items[i] = NUdf::TUnboxedValue(); + continue; + } + + errors.push_back(TStringBuilder() << "Dict has no item '" << memberName << "'"); + continue; + } + + try { + items[i] = FromPyObject(ctx, inspector.GetMemberType(i), item); + } catch (const yexception& e) { + errors.push_back(TStringBuilder() << "Failed to convert dict item '" << memberName << "' - " << e.what()); + } + } + + if (!errors.empty()) { + throw yexception() << "Failed to convert dict to struct\n" << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value); + } + } else { + for (ui32 i = 0; i < membersCount; i++) { + TStringBuf memberName = inspector.GetMemberName(i); + auto memberType = inspector.GetMemberType(i); + TPyObjectPtr attr = PyObject_GetAttrString(value, memberName.data()); + if (!attr) { + if (ctx->PyCtx->TypeInfoHelper->GetTypeKind(memberType) == NUdf::ETypeKind::Optional && + PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + items[i] = NUdf::TUnboxedValue(); + continue; + } + + errors.push_back(TStringBuilder() << "Object has no attr '" << memberName << "' , error: " << GetLastErrorAsString()); + continue; + } + + try { + items[i] = FromPyObject(ctx, memberType, attr.Get()); + } catch (const yexception& e) { + errors.push_back(TStringBuilder() << "Failed to convert object attr '" << memberName << "' - " << e.what()); + } + } + + if (!errors.empty()) { + throw yexception() << "Failed to convert object to struct\n" << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value); + } + } + + return mkqlStruct; +} + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.h b/yql/essentials/udfs/common/python/bindings/py_struct.h new file mode 100644 index 00000000000..79a380283fb --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_struct.h @@ -0,0 +1,17 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyStruct( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyStruct( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp new file mode 100644 index 00000000000..a97507f5499 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp @@ -0,0 +1,307 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyStructTest) { + Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age, name):\n" + " self.age = age\n" + " self.name = name\n" + "\n" + "def Test():\n" + " return Person(99, 'Jamel')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyObjectMissingOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField("name", optionalStringType, &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age):\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return Person(99)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyDict) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'name': 'Jamel', 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyDictMissingOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField("name", optionalStringType, &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) { + TPythonTestEngine engine; + + ui32 ageIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("a\0ge", &ageIdx) + .Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { b'a\\0ge': 99 }\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); + } + + Y_UNIT_TEST(FromPyNamedTuple) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Person = namedtuple('Person', 'name age')\n" + " return Person(age=13, name='Tony')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13); + }); + } + + Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<int>("age", &ageIdx) + .AddField("name", optionalStringType, &nameIdx) + .Build(); + + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Pers = namedtuple('Person', 'name age')\n" + " return Pers(name=None, age=15)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15); + }); + } + + Y_UNIT_TEST(FromPyEmptyStruct) { + TPythonTestEngine engine; + auto emptyStruct = engine.GetTypeBuilder().Struct()->Build(); + + engine.ToMiniKQL(emptyStruct, + "class Empty: pass\n" + "\n" + "def Test():\n" + " return Empty()\n", + [](const NUdf::TUnboxedValuePod&) {}); + } + + Y_UNIT_TEST(ToPyObject) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0; + auto addressType = engine.GetTypeBuilder().Struct()-> + AddField<NUdf::TUtf8>("city", &cityIdx) + .AddField<NUdf::TUtf8>("street", &streetIdx) + .AddField<ui16>("building", &buildingIdx) + .Build(); + + auto personType = engine.GetTypeBuilder().Struct()-> + AddField<ui16>("age", &ageIdx) + .AddField<NUdf::TUtf8>("name", &nameIdx) + .AddField("address", addressType, &addressIdx) + .Build(); + + + engine.ToPython(personType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97)); + items[nameIdx] = vb.NewString("Jamel"); + NUdf::TUnboxedValue* items2 = nullptr; + items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2); + items2[cityIdx] = vb.NewString("Moscow");; + items2[streetIdx] = vb.NewString("L'va Tolstogo"); + items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16)); + return new_struct; + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" + " assert value.name == 'Jamel'\n" + " assert value.age == 97\n" + " assert value.address.city == 'Moscow'\n" + " assert value.address.building == 16\n" + ); + } + + Y_UNIT_TEST(ToPyObjectKeywordsAsFields) { + TPythonTestEngine engine; + + ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0; + auto structType = engine.GetTypeBuilder().Struct()-> + AddField<NUdf::TUtf8>("pass", &passIdx) + .AddField<NUdf::TUtf8>("while", &whileIdx) + .AddField<NUdf::TUtf8>("if", &ifIdx) + .AddField<NUdf::TUtf8>("not", ¬Idx) + .Build(); + + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ifIdx] = vb.NewString("You"); + items[whileIdx] = vb.NewString("Shall"); + items[notIdx] = vb.NewString("Not"); + items[passIdx] = vb.NewString("Pass"); + return new_struct; + }, + "def Test(value):\n" + " assert getattr(value, 'if') == 'You'\n" + " assert getattr(value, 'while') == 'Shall'\n" + " assert getattr(value, 'not') == 'Not'\n" + " assert getattr(value, 'pass') == 'Pass'\n" + ); + } + +#if PY_MAJOR_VERSION >= 3 // TODO: Fix for python 2 + Y_UNIT_TEST(ToPyObjectTryModify) { + TPythonTestEngine engine; + + ui32 field1Idx = 0, field2Idx = 0; + auto structType = engine.GetTypeBuilder().Struct()-> + AddField<NUdf::TUtf8>("field1", &field1Idx) + .AddField<NUdf::TUtf8>("field2", &field2Idx) + .Build(); + + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[field1Idx] = NUdf::TUnboxedValuePod::Zero(); + items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty"); + return new_struct; + }, + "def Test(value):\n" + " try:\n" + " setattr(value, 'field1', 17)\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " value.field2 = 18\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n" + ); + } +#endif + + Y_UNIT_TEST(ToPyObjectEmptyStruct) { + TPythonTestEngine engine; + + auto personType = engine.GetTypeBuilder().Struct()->Build(); + + engine.ToPython(personType, + [](const TType*, const NUdf::IValueBuilder& vb) { + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" +#if PY_MAJOR_VERSION >= 3 + " assert len(value) == 0\n" +#endif + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp new file mode 100644 index 00000000000..6cef25ea47f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp @@ -0,0 +1,61 @@ +#include "py_tuple.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +using namespace NKikimr; + +namespace NPython { + +TPyObjectPtr ToPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) +{ + const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + const auto elementsCount = inspector.GetElementsCount(); + + const TPyObjectPtr tuple(PyTuple_New(elementsCount)); + + if (auto ptr = value.GetElements()) { + for (ui32 i = 0U; i < elementsCount; ++i) { + auto item = ToPyObject(ctx, inspector.GetElementType(i), *ptr++); + PyTuple_SET_ITEM(tuple.Get(), i, item.Release()); + } + } else { + for (ui32 i = 0U; i < elementsCount; ++i) { + auto item = ToPyObject(ctx, inspector.GetElementType(i), value.GetElement(i)); + PyTuple_SET_ITEM(tuple.Get(), i, item.Release()); + } + } + + return tuple; +} + +NUdf::TUnboxedValue FromPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, PyObject* value) +{ + const NUdf::TTupleTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); + if (const TPyObjectPtr fast = PySequence_Fast(value, "Expected tuple or list.")) { + const Py_ssize_t itemsCount = PySequence_Fast_GET_SIZE(fast.Get()); + + if (itemsCount < 0 || inspector.GetElementsCount() != itemsCount) { + throw yexception() << "Tuple elements count mismatch."; + } + + NUdf::TUnboxedValue* tuple_items = nullptr; + const auto tuple = ctx->ValueBuilder->NewArray(inspector.GetElementsCount(), tuple_items); + for (Py_ssize_t i = 0; i < itemsCount; i++) { + const auto item = PySequence_Fast_GET_ITEM(fast.Get(), i); + *tuple_items++ = FromPyObject(ctx, inspector.GetElementType(i), item); + } + + return tuple; + } + + throw yexception() << "Expected Tuple or Sequence but got: " << PyObjectRepr(value); +} + +} diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.h b/yql/essentials/udfs/common/python/bindings/py_tuple.h new file mode 100644 index 00000000000..7d66af9b011 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.h @@ -0,0 +1,17 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyTuple( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyTuple( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp new file mode 100644 index 00000000000..a6b9b6cc3e4 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp @@ -0,0 +1,108 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyTupleTest) { + Y_UNIT_TEST(FromPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<>>( + "def Test(): return ()", + [](const NUdf::TUnboxedValuePod&) {}); + } + + Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return [1, 2, 3]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); + } + + Y_UNIT_TEST(FromPyIter) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return iter({1, 2, 3})", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); + } + + Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>( + "def Test(): return (1, float(2.3), '4')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + auto second = value.GetElement(1); + UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001); + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "4"); + }); + } + + Y_UNIT_TEST(FromPyTupleInTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>( + "def Test(): return (1, (2, float(3.4)), '5')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1); + + auto second = value.GetElement(1); + UNIT_ASSERT(second); + UNIT_ASSERT(second.IsBoxed()); + UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2); + UNIT_ASSERT_DOUBLES_EQUAL( + second.GetElement(1).Get<float>(), 3.4, 0.0001); + + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "5"); + }); + } + + Y_UNIT_TEST(ToPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 0\n" + " assert value == ()\n"); + } + + Y_UNIT_TEST(ToPyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + items[0] = vb.NewString("111"); + items[1] = NUdf::TUnboxedValuePod((ui64) 2); + items[2] = NUdf::TUnboxedValuePod((ui8) 3); + items[3] = NUdf::TUnboxedValuePod((float) 4.5); + return tuple; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 4\n" + " assert value == ('111', 2, 3, 4.5)\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp new file mode 100644 index 00000000000..e9f5971c78f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp @@ -0,0 +1,85 @@ +#include "py_variant.h" +#include "ut3/py_test_engine.h" +#include <yql/essentials/minikql/mkql_type_ops.h> + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyTzDateTest) { + Y_UNIT_TEST(FromDate) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDate>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); + } + + Y_UNIT_TEST(FromDatetime) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDatetime>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui32>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); + } + + Y_UNIT_TEST(FromTimestamp) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzTimestamp>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui64>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); + } + + Y_UNIT_TEST(ToDate) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDate>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui16)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); + } + + Y_UNIT_TEST(ToDatetime) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDatetime>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui32)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); + } + + Y_UNIT_TEST(ToTimestamp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzTimestamp>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui64)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.cpp b/yql/essentials/udfs/common/python/bindings/py_utils.cpp new file mode 100644 index 00000000000..d1e0e8b4846 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_utils.cpp @@ -0,0 +1,89 @@ +#include "py_utils.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_gil.h" + +#include <util/generic/yexception.h> +#include <util/string/split.h> + +#include <regex> + + +namespace NPython { + +TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern) { + for (auto c : asciiStr) { + Y_ABORT_UNLESS((c&0x80) == 0, "expected ascii"); + } + + Py_ssize_t size = static_cast<Py_ssize_t>(asciiStr.size()); +#if PY_MAJOR_VERSION >= 3 + TPyObjectPtr pyStr = PyUnicode_FromStringAndSize(asciiStr.data(), size); +#else + TPyObjectPtr pyStr = PyString_FromStringAndSize(asciiStr.data(), size); +#endif + Y_ABORT_UNLESS(pyStr, "Can't get repr string"); + if (!intern) { + return pyStr; + } + + PyObject* tmp = pyStr.Release(); +#if PY_MAJOR_VERSION >= 3 + PyUnicode_InternInPlace(&tmp); +#else + PyString_InternInPlace(&tmp); +#endif + return TPyObjectPtr(tmp); +} + +TString PyObjectRepr(PyObject* value) { + static constexpr size_t maxLen = 1000; + static constexpr std::string_view truncSuffix = "(truncated)"; + const TPyObjectPtr repr(PyObject_Repr(value)); + if (!repr) { + return TString("repr error: ") + GetLastErrorAsString(); + } + + TString string; + if (!TryPyCast(repr.Get(), string)) { + string = "can't get repr as string"; + } + if (string.size() > maxLen) { + string.resize(maxLen - truncSuffix.size()); + string += truncSuffix; + } + return string; +} + +bool HasEncodingCookie(const TString& source) { + // + // To define a source code encoding, a magic comment must be placed + // into the source files either as first or second line in the file. + // + // See https://www.python.org/dev/peps/pep-0263 for more details. + // + + static std::regex encodingRe( + "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*"); + + int i = 0; + for (const auto& it: StringSplitter(source).Split('\n')) { + if (i++ == 2) break; + + TStringBuf line = it.Token(); + if (std::regex_match(line.begin(), line.end(), encodingRe)) { + return true; + } + } + return false; +} + +void PyCleanup() { + TPyGilLocker lock; + PyErr_Clear(); + PySys_SetObject("last_type", Py_None); + PySys_SetObject("last_value", Py_None); + PySys_SetObject("last_traceback", Py_None); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.h b/yql/essentials/udfs/common/python/bindings/py_utils.h new file mode 100644 index 00000000000..0c5ef058f1a --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_utils.h @@ -0,0 +1,28 @@ +#pragma once + +#include "py_ptr.h" + +#include <util/generic/strbuf.h> + +#ifdef _win_ +#define INIT_MEMBER(member, value) value //member +#else +#define INIT_MEMBER(member, value) .member = (value) +#endif + +namespace NPython { + +TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern = false); + +template <size_t size> +TPyObjectPtr PyRepr(const char(&str)[size]) { + return PyRepr(TStringBuf(str, size - 1), true); +} + +TString PyObjectRepr(PyObject* value); + +bool HasEncodingCookie(const TString& source); + +void PyCleanup(); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp new file mode 100644 index 00000000000..ce521689b40 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp @@ -0,0 +1,37 @@ +#include "py_utils.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyUtilsTest) { + + Y_UNIT_TEST(EncodingCookie) { + UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-")); + UNIT_ASSERT(HasEncodingCookie( + "# This Python file uses the following encoding: utf-8")); + + // encoding commend on second line + UNIT_ASSERT(HasEncodingCookie( + "#!/usr/local/bin/python\n" + "# -*- coding: iso-8859-15 -*-\n" + "print 'hello'")); + + // missing "coding:" prefix + UNIT_ASSERT(false == HasEncodingCookie("# latin-1")); + + // encoding comment not on line 1 or 2 + UNIT_ASSERT(false == HasEncodingCookie( + "#!/usr/local/bin/python\n" + "#\n" + "# -*- coding: latin-1 -*-\n")); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.cpp b/yql/essentials/udfs/common/python/bindings/py_variant.cpp new file mode 100644 index 00000000000..ab222b34323 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_variant.cpp @@ -0,0 +1,97 @@ +#include "py_variant.h" +#include "py_cast.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> + + +using namespace NKikimr; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// public functions +////////////////////////////////////////////////////////////////////////////// +TPyObjectPtr ToPyVariant( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + auto& th = *castCtx->PyCtx->TypeInfoHelper; + NUdf::TVariantTypeInspector varInsp(th, type); + const NUdf::TType* subType = varInsp.GetUnderlyingType(); + ui32 index = value.GetVariantIndex(); + auto item = value.GetVariantItem(); + + const NUdf::TType* itemType = nullptr; + if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) { + itemType = tupleInsp.GetElementType(index); + TPyObjectPtr pyIndex = PyCast<ui32>(index); + TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item); + return PyTuple_Pack(2, pyIndex.Get(), pyItem.Get()); + } else if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { + itemType = structInsp.GetMemberType(index); + TPyObjectPtr pyName = ToPyUnicode<NUdf::TStringRef>( + structInsp.GetMemberName(index)); + TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item); + return PyTuple_Pack(2, pyName.Get(), pyItem.Get()); + } + + throw yexception() << "Cannot get Variant item type"; +} + +NUdf::TUnboxedValue FromPyVariant( + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) +{ + PY_ENSURE(PyTuple_Check(value), + "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); + + Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); + PY_ENSURE(tupleSize == 2, + "Expected to get Tuple with 2 elements, but got " + << tupleSize << " elements"); + + auto& th = *castCtx->PyCtx->TypeInfoHelper; + NUdf::TVariantTypeInspector varInsp(th, type); + const NUdf::TType* subType = varInsp.GetUnderlyingType(); + + PyObject* el0 = PyTuple_GET_ITEM(value, 0); + PyObject* el1 = PyTuple_GET_ITEM(value, 1); + + ui32 index; + NUdf::TStringRef name; + if (TryPyCast(el0, index)) { + if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) { + PY_ENSURE(index < tupleInsp.GetElementsCount(), + "Index must be < " << tupleInsp.GetElementsCount() + << ", but got " << index); + auto* itemType = tupleInsp.GetElementType(index); + return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); + } else { + throw yexception() << "Cannot convert " << PyObjectRepr(value) + << " underlying Variant type is not a Tuple"; + } + } else if (TryPyCast(el0, name)) { + if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { + ui32 index = structInsp.GetMemberIndex(name); + PY_ENSURE(index < structInsp.GetMembersCount(), + "Unknown member name: " << TStringBuf(name)); + auto* itemType = structInsp.GetMemberType(index); + return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); + } else { + throw yexception() << "Cannot convert " << PyObjectRepr(value) + << " underlying Variant type is not a Struct"; + } + } else { + throw yexception() + << "Expected first Tuple element to either be an int " + "or a str, but got " << Py_TYPE(el0)->tp_name; + } +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.h b/yql/essentials/udfs/common/python/bindings/py_variant.h new file mode 100644 index 00000000000..ca97123183b --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_variant.h @@ -0,0 +1,17 @@ +#pragma once + +#include "py_ctx.h" + +namespace NPython { + +TPyObjectPtr ToPyVariant( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyVariant( + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp new file mode 100644 index 00000000000..77ab9bc6e8a --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp @@ -0,0 +1,101 @@ +#include "py_variant.h" +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyVariantTest) { + Y_UNIT_TEST(FromPyWithIndex) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>( + "def Test():\n" + " return (2, 'hello')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello"); + }); + } + + Y_UNIT_TEST(FromPyWithName) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> + AddField<ui32>("age", &ageIdx) + .AddField<char*>("name", &nameIdx) + .Build(); + + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant()->Over(personType).Build(); + + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('age', 99)\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99); + }); + + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('name', 'Jamel')\n", + [nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel"); + }); + } + + Y_UNIT_TEST(ToPyWithIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TVariant<float, ui32, char*>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32) 42)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (1, 42)\n"); + } + + Y_UNIT_TEST(ToPyWithName) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> + AddField<ui32>("age", &ageIdx) + .AddField<NUdf::TUtf8>("name", &nameIdx) + .Build(); + + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant()->Over(personType).Build(); + + engine.ToPython( + variantType, + [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32) 99)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('age', 99)\n" + ); + + engine.ToPython( + variantType, + [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(nameIdx, vb.NewString("Jamel")); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('name', 'Jamel')\n" + ); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_void.cpp b/yql/essentials/udfs/common/python/bindings/py_void.cpp new file mode 100644 index 00000000000..ef72c052fbc --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_void.cpp @@ -0,0 +1,117 @@ +#include "py_void.h" +#include "py_errors.h" +#include "py_utils.h" + +#include <yql/essentials/public/udf/udf_value.h> + +using namespace NKikimr; + +namespace NPython { +namespace { + +static PyObject* VoidRepr(PyObject*) { + return PyRepr("yql.Void").Release(); +} + +static void VoidDealloc(PyObject*) { + Py_FatalError("Deallocating yql.Void"); +} + +} // namespace + +PyTypeObject PyVoidType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + INIT_MEMBER(tp_name , "yql.Void"), + INIT_MEMBER(tp_basicsize , 0), + INIT_MEMBER(tp_itemsize , 0), + INIT_MEMBER(tp_dealloc , VoidDealloc), +#if PY_VERSION_HEX < 0x030800b4 + INIT_MEMBER(tp_print , nullptr), +#else + INIT_MEMBER(tp_vectorcall_offset, 0), +#endif + INIT_MEMBER(tp_getattr , nullptr), + INIT_MEMBER(tp_setattr , nullptr), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_as_async , nullptr), +#else + INIT_MEMBER(tp_compare , nullptr), +#endif + INIT_MEMBER(tp_repr , VoidRepr), + INIT_MEMBER(tp_as_number , nullptr), + INIT_MEMBER(tp_as_sequence , nullptr), + INIT_MEMBER(tp_as_mapping , nullptr), + INIT_MEMBER(tp_hash , nullptr), + INIT_MEMBER(tp_call , nullptr), + INIT_MEMBER(tp_str , nullptr), + INIT_MEMBER(tp_getattro , nullptr), + INIT_MEMBER(tp_setattro , nullptr), + INIT_MEMBER(tp_as_buffer , nullptr), + INIT_MEMBER(tp_flags , 0), + INIT_MEMBER(tp_doc , "yql.Void object"), + INIT_MEMBER(tp_traverse , nullptr), + INIT_MEMBER(tp_clear , nullptr), + INIT_MEMBER(tp_richcompare , nullptr), + INIT_MEMBER(tp_weaklistoffset , 0), + INIT_MEMBER(tp_iter , nullptr), + INIT_MEMBER(tp_iternext , nullptr), + INIT_MEMBER(tp_methods , nullptr), + INIT_MEMBER(tp_members , nullptr), + INIT_MEMBER(tp_getset , nullptr), + INIT_MEMBER(tp_base , nullptr), + INIT_MEMBER(tp_dict , nullptr), + INIT_MEMBER(tp_descr_get , nullptr), + INIT_MEMBER(tp_descr_set , nullptr), + INIT_MEMBER(tp_dictoffset , 0), + INIT_MEMBER(tp_init , nullptr), + INIT_MEMBER(tp_alloc , nullptr), + INIT_MEMBER(tp_new , nullptr), + INIT_MEMBER(tp_free , nullptr), + INIT_MEMBER(tp_is_gc , nullptr), + INIT_MEMBER(tp_bases , nullptr), + INIT_MEMBER(tp_mro , nullptr), + INIT_MEMBER(tp_cache , nullptr), + INIT_MEMBER(tp_subclasses , nullptr), + INIT_MEMBER(tp_weaklist , nullptr), + INIT_MEMBER(tp_del , nullptr), + INIT_MEMBER(tp_version_tag , 0), +#if PY_MAJOR_VERSION >= 3 + INIT_MEMBER(tp_finalize , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b1 + INIT_MEMBER(tp_vectorcall , nullptr), +#endif +#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 + INIT_MEMBER(tp_print , nullptr), +#endif +}; + +PyObject PyVoidObject = { + _PyObject_EXTRA_INIT + 1, &PyVoidType +}; + +TPyObjectPtr ToPyVoid( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + Y_UNUSED(ctx); + Y_UNUSED(type); + Y_UNUSED(value); + return TPyObjectPtr(&PyVoidObject, TPyObjectPtr::ADD_REF); +} + +NUdf::TUnboxedValue FromPyVoid( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* value) +{ + Y_UNUSED(ctx); + Y_UNUSED(type); + Y_UNUSED(value); + PY_ENSURE(value == &PyVoidObject, "Expected object of yql.Void type"); + return NUdf::TUnboxedValue::Void(); +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void.h b/yql/essentials/udfs/common/python/bindings/py_void.h new file mode 100644 index 00000000000..3c8203ab6e8 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_void.h @@ -0,0 +1,21 @@ +#pragma once + +#include "py_ptr.h" +#include "py_ctx.h" + +namespace NPython { + +extern PyTypeObject PyVoidType; +extern PyObject PyVoidObject; + +TPyObjectPtr ToPyVoid( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); + +NKikimr::NUdf::TUnboxedValue FromPyVoid( + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp new file mode 100644 index 00000000000..7fbeca20437 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp @@ -0,0 +1,37 @@ +#include "ut3/py_test_engine.h" + +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NPython; + +Y_UNIT_TEST_SUITE(TPyVoidTest) { + Y_UNIT_TEST(FromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<void>( + "import yql\n" + "\n" + "def Test():\n" + " return yql.Void\n", + [](const NUdf::TUnboxedValue& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(false == value.IsBoxed()); + }); + } + + Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<void>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); Y_UNUSED(vb); + return NUdf::TUnboxedValue::Void(); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value) == 'yql.Void'\n" + " assert repr(value) == 'yql.Void'\n" + " assert isinstance(value, yql.TVoid)\n" + " assert value is yql.Void\n"); + } +} diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp new file mode 100644 index 00000000000..5d1497f7c76 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp @@ -0,0 +1,251 @@ +#include "py_yql_module.h" + +#include "py_void.h" +#include "py_iterator.h" +#include "py_list.h" +#include "py_dict.h" +#include "py_stream.h" +#include "py_utils.h" +#include "py_callable.h" + +#include <library/cpp/resource/resource.h> +#include <yql/essentials/udfs/common/python/python_udf/python_udf.h> + +namespace NPython { + +static PyMethodDef ModuleMethods[] = { + { nullptr, nullptr, 0, nullptr } /* sentinel */ +}; + +#define MODULE_NAME "yql" + +#if PY_MAJOR_VERSION >= 3 +#define MODULE_NAME_TYPING "yql.typing" +#endif + +#define MODULE_INITIALIZED_ATTRIBUTE "_initialized" + +PyDoc_STRVAR(ModuleDoc, + "This module provides YQL specific types for Python."); + +#if PY_MAJOR_VERSION >= 3 +PyDoc_STRVAR(ModuleDocTyping, + "This module provides annotations for YQL types for Python."); +#endif + +PyDoc_STRVAR(StopIterationException_doc, + "Can be throwed to yield stream iteration."); + +#define PREPARE_TYPE(Name, Type) \ + do { \ + if (PyType_Ready(Type) < 0) { \ + throw yexception() << "Can't prepare type: " << (Name); \ + } \ + } while (0) + +#define REGISTER_TYPE(Name, Type) \ + do { \ + PREPARE_TYPE(Name, Type); \ + Py_INCREF(Type); \ + if (PyModule_AddObject(module, (Name), (PyObject*) Type) < 0) { \ + throw yexception() << "Can't add type: " << (Name); \ + } \ + } while (0) + +#define REGISTER_OBJECT(Name, Object) \ + do { \ + if (PyDict_SetItemString(dict, (Name), (PyObject *) (Object)) < 0) \ + throw yexception() << "Can't register object: " << (Name); \ + } while (0) + +#define REGISTER_EXCEPTION(Name, Object, Doc) \ + do { \ + if (!Object) { \ + Object = PyErr_NewExceptionWithDoc((char*) MODULE_NAME "." Name, Doc, nullptr, nullptr); \ + if (!Object) { \ + throw yexception() << "Can't register exception: " << (Name); \ + } \ + REGISTER_OBJECT(Name, Object); \ + } \ + } while (0) + +#if PY_MAJOR_VERSION >= 3 +static PyModuleDef ModuleDefinition = { + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME), + INIT_MEMBER(m_doc, ModuleDoc), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, ModuleMethods), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), +}; + +static PyModuleDef ModuleDefinitionTyping = { + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME_TYPING), + INIT_MEMBER(m_doc, ModuleDocTyping), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, nullptr), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), +}; + +PyMODINIT_FUNC PyInit_YQL(void) +{ + auto mod = PyModule_Create(&ModuleDefinition); + PyModule_AddObject(mod, "__path__", Py_BuildValue("()")); + return mod; +} + +void go_throw(); + +PyMODINIT_FUNC PyInit_YQLTyping(void) +{ + return PyModule_Create(&ModuleDefinitionTyping); +} +#else +PyMODINIT_FUNC PyInit_YQL(void) +{ + Py_InitModule3(MODULE_NAME, ModuleMethods, ModuleDoc); +} +#endif + +void PrepareYqlModule() { + PyImport_AppendInittab(MODULE_NAME, &PyInit_YQL); +#if PY_MAJOR_VERSION >= 3 + PyImport_AppendInittab(MODULE_NAME_TYPING, &PyInit_YQLTyping); +#endif +} + +#if PY_MAJOR_VERSION >= 3 +void RegisterRuntimeModule(const char* name, PyObject* module) { + if (!module || !PyModule_Check(module)) { + throw yexception() << "Invalid object for module " << name; + } + + // borrowed reference + PyObject* modules = PyImport_GetModuleDict(); + if (!modules || !PyDict_CheckExact(modules)) { + throw yexception() << "Can't get sys.modules dictionary"; + } + + if (PyDict_SetItemString(modules, name, module) < 0) { + throw yexception() << "Can't register module " << name; + } +} +#endif + +void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone) { + TPyObjectPtr m = PyImport_ImportModule(MODULE_NAME); + if (!standalone && !m) { + PyErr_Clear(); +#if PY_MAJOR_VERSION >= 3 + m = PyInit_YQL(); + RegisterRuntimeModule(MODULE_NAME, m.Get()); +#else + PyInit_YQL(); +#endif + m = PyImport_ImportModule(MODULE_NAME); + } + + PyObject* module = m.Get(); + + if (!module) { + throw yexception() << "Can't get YQL module."; + } + + TPyObjectPtr initialized = PyObject_GetAttrString(module, MODULE_INITIALIZED_ATTRIBUTE); + if (!initialized) { + PyErr_Clear(); + } else if (initialized.Get() == Py_True) { + return; + } + + PyObject* dict = PyModule_GetDict(module); + + REGISTER_TYPE("TVoid", &PyVoidType); + REGISTER_OBJECT("Void", &PyVoidObject); + + PREPARE_TYPE("TIterator", &PyIteratorType); + PREPARE_TYPE("TPairIterator", &PyPairIteratorType); + + PREPARE_TYPE("TDict", &PyLazyDictType); + PREPARE_TYPE("TSet", &PyLazySetType); + + PREPARE_TYPE("TLazyListIterator", &PyLazyListIteratorType); + PREPARE_TYPE("TLazyList", &PyLazyListType); + PREPARE_TYPE("TThinListIterator", &PyThinListIteratorType); + PREPARE_TYPE("TThinList", &PyThinListType); + + PREPARE_TYPE("TStream", &PyStreamType); + PREPARE_TYPE("TCallable", &PyCallableType); + + REGISTER_EXCEPTION("TYieldIteration", PyYieldIterationException, StopIterationException_doc); + +#if PY_MAJOR_VERSION >= 3 + if (pythonFlavor == NYql::NUdf::EPythonFlavor::Arcadia) { + if (!standalone) { + TPyObjectPtr typingModule = PyImport_ImportModule(MODULE_NAME_TYPING); + if (!typingModule) { + PyErr_Clear(); + typingModule = PyInit_YQLTyping(); + RegisterRuntimeModule(MODULE_NAME_TYPING, typingModule.Get()); + } + } + + const auto typing = NResource::Find(TStringBuf("typing.py")); + const auto rc = PyRun_SimpleStringFlags(typing.c_str(), nullptr); + + if (rc < 0) { + // Not sure if PyErr_Print() works after PyRun_SimpleStringFlags, + // but just in case... + PyErr_Print(); + ythrow yexception() << "Can't parse YQL type annotations module"; + } + + auto processError = [&] (PyObject* obj, TStringBuf message) { + if (obj) { + return; + } + PyObject *ptype, *pvalue, *ptraceback; + PyErr_Fetch(&ptype, &pvalue, &ptraceback); + if (pvalue) { + auto pstr = PyObject_Str(pvalue); + if (pstr) { + if (auto err_msg = PyUnicode_AsUTF8(pstr)) { + Cerr << err_msg << Endl; + } + } + PyErr_Restore(ptype, pvalue, ptraceback); + } + ythrow yexception() << "Can't setup YQL type annotations module: " << message; + }; + + auto main = PyImport_ImportModule("__main__"); + processError(main, "PyImport_ImportModule"); + auto function = PyObject_GetAttrString(main, "main"); + processError(function, "PyObject_GetAttrString"); + auto args = PyTuple_New(0); + processError(args, "PyTuple_New"); + auto result = PyObject_CallObject(function, args); + processError(result, "PyObject_CallObject"); + + Py_DECREF(result); + Py_DECREF(args); + Py_DECREF(function); + Py_DECREF(main); + } +#endif + + REGISTER_OBJECT(MODULE_INITIALIZED_ATTRIBUTE, Py_True); +} + +void TermYqlModule() { + PyYieldIterationException = nullptr; +} + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.h b/yql/essentials/udfs/common/python/bindings/py_yql_module.h new file mode 100644 index 00000000000..970471d029e --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.h @@ -0,0 +1,11 @@ +#pragma once + +#include <yql/essentials/udfs/common/python/python_udf/python_udf.h> + +namespace NPython { + +void PrepareYqlModule(); +void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone = true); +void TermYqlModule(); + +} // namspace NPython diff --git a/yql/essentials/udfs/common/python/bindings/typing.py b/yql/essentials/udfs/common/python/bindings/typing.py new file mode 100644 index 00000000000..0e53ad1e0a4 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/typing.py @@ -0,0 +1,188 @@ +def main(): + import importlib.abc + import importlib.machinery + import sys + + class Finder(importlib.abc.MetaPathFinder): + def find_spec(self, fullname, path, target=None): + if fullname in sys.builtin_module_names: + return importlib.machinery.ModuleSpec( + fullname, + importlib.machinery.BuiltinImporter, + ) + + sys.meta_path.append(Finder()) + + try: + import yandex.type_info.type_base as ti_base + import yandex.type_info.typing as ti_typing + import six + except ImportError as e: + raise ImportError( + str(e) + ". Make sure that library/python/type_info is in your PEERDIR list" + ) + + from yql import typing + + AutoMap = ti_base.make_primitive_type("AutoMap") + + def _format_arg(arg): + res = [] + if arg[0]: + res.append("{}:".format(ti_base.quote_string(arg[0]))) + res.append(str(arg[1])) + if arg[2]: + res.append("{Flags:") + res.append(",".join(str(x) for x in sorted(list(arg[2])))) + res.append("}") + return "".join(res) + + Stream = ti_typing._SingleArgumentGeneric("Stream") + + @six.python_2_unicode_compatible + class GenericResourceAlias(ti_base.Type): + REQUIRED_ATTRS = ti_base.Type.REQUIRED_ATTRS + ["tag"] + + def __str__(self): + return u"{}<{}>".format(self.name, ti_base.quote_string(self.tag)) + + def to_yson_type(self): + return {"type_name": self.yt_type_name, "tag": self.tag} + + class GenericResource(ti_base.Generic): + def __getitem__(self, params): + if not isinstance(params, str): + raise ValueError("Expected str, but got: {}".format(ti_base._with_type(params))) + + attrs = { + "name": self.name, + "yt_type_name": self.yt_type_name, + "tag": params, + } + + return GenericResourceAlias(attrs) + + def from_dict(self): + raise NotImplementedError() + + Resource = GenericResource("Resource") + + def _extract_arg_info(param): + name = "" + arg_type = param + flags = set() + if isinstance(param, slice): + name = param.start + if name is None: + name = "" + if not isinstance(name, str): + raise ValueError("Expected str as argument name but got: {}".format(ti_base._with_type(name))) + arg_type = param.stop + ti_base.validate_type(arg_type) + if param.step is not None: + for x in param.step: + if x != AutoMap: + raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x))) + flags.add(x) + else: + ti_base.validate_type(arg_type) + return (name, arg_type, flags) + + @six.python_2_unicode_compatible + class GenericCallableAlias(ti_base.Type): + def __str__(self): + return ("Callable<(" + + ",".join(_format_arg(x) for x in self.args[:len(self.args)-self.optional_args]) + + ("," if len(self.args) > self.optional_args and self.optional_args else "") + + ("[" if self.optional_args else "") + + ",".join(_format_arg(x) for x in self.args[len(self.args)-self.optional_args:]) + + ("]" if self.optional_args else "") + + ")->" + str(getattr(self, "return")) + ">") + + def to_yson_type(self): + yson_repr = { + "optional_args": self.optional_args, + "return": getattr(self, "return"), + "args": self.args, + "type_name": self.yt_type_name, + } + return yson_repr + + + class GenericCallable(ti_base.Generic): + def __getitem__(self, params): + if not isinstance(params, tuple) or len(params) < 2 or not isinstance(params[0], int) or not ti_typing.is_valid_type(params[1]): + raise ValueError("Expected at least two arguments (integer and type of return value) but got: {}".format(ti_base._with_type(params))) + args = [] + for param in params[2:]: + name, arg_type, flags = _extract_arg_info(param) + args.append((name, arg_type, flags)) + + if params[0] < 0 or params[0] > len(args): + raise ValueError("Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]") + + attrs = { + "optional_args": params[0], + "return": params[1], + "args": args, + "name": "Tagged", + "yt_type_name": "tagged", + } + + return GenericCallableAlias(attrs) + + def from_dict(self): + raise NotImplementedError() + + Callable = GenericCallable("Callable") + + def parse_slice_arg(arg): + try: + return _format_arg(_extract_arg_info(arg)) + except ValueError: + pass + + typing.Type = ti_base.Type + typing.is_valid_type = ti_base.is_valid_type + typing.parse_slice_arg = parse_slice_arg + + typing.Bool = ti_typing.Bool + typing.Int8 = ti_typing.Int8 + typing.Uint8 = ti_typing.Uint8 + typing.Int16 = ti_typing.Int16 + typing.Uint16 = ti_typing.Uint16 + typing.Int32 = ti_typing.Int32 + typing.Uint32 = ti_typing.Uint32 + typing.Int64 = ti_typing.Int64 + typing.Uint64 = ti_typing.Uint64 + typing.Float = ti_typing.Float + typing.Double = ti_typing.Double + typing.String = ti_typing.String + typing.Utf8 = ti_typing.Utf8 + typing.Yson = ti_typing.Yson + typing.Json = ti_typing.Json + typing.Uuid = ti_typing.Uuid + typing.Date = ti_typing.Date + typing.Datetime = ti_typing.Datetime + typing.Timestamp = ti_typing.Timestamp + typing.Interval = ti_typing.Interval + typing.TzDate = ti_typing.TzDate + typing.TzDatetime = ti_typing.TzDatetime + typing.TzTimestamp = ti_typing.TzTimestamp + typing.Void = ti_typing.Void + typing.Null = ti_typing.Null + typing.EmptyTuple = ti_typing.EmptyTuple + typing.EmptyStruct = ti_typing.EmptyStruct + typing.Optional = ti_typing.Optional + typing.List = ti_typing.List + typing.Dict = ti_typing.Dict + typing.Tuple = ti_typing.Tuple + typing.Struct = ti_typing.Struct + typing.Variant = ti_typing.Variant + typing.Tagged = ti_typing.Tagged + typing.Decimal = ti_typing.Decimal + + typing.Stream = Stream + typing.Resource = Resource + typing.Callable = Callable + typing.AutoMap = AutoMap diff --git a/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h new file mode 100644 index 00000000000..a36e19fa32f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ut3/py_test_engine.h @@ -0,0 +1,227 @@ +#pragma once + +#include "py_cast.h" +#include "py_yql_module.h" +#include "py_utils.h" + +#include <yql/essentials/minikql/computation/mkql_computation_node_holders.h> +#include <yql/essentials/minikql/mkql_type_builder.h> +#include <yql/essentials/minikql/computation/mkql_value_builder.h> +#include <yql/essentials/udfs/common/python/python_udf/python_udf.h> + +#include <library/cpp/testing/unittest/registar.h> + +#define PYTHON_TEST_TAG "Python2Test" + + +using namespace NKikimr; +using namespace NMiniKQL; + +namespace NPython { + +////////////////////////////////////////////////////////////////////////////// +// TPyInitializer +////////////////////////////////////////////////////////////////////////////// +struct TPyInitializer { + TPyInitializer() { + PrepareYqlModule(); + Py_Initialize(); + InitYqlModule(NYql::NUdf::EPythonFlavor::Arcadia); + } + ~TPyInitializer() { + TermYqlModule(); + Py_Finalize(); + } +}; + +////////////////////////////////////////////////////////////////////////////// +// TPythonTestEngine +////////////////////////////////////////////////////////////////////////////// +class TPythonTestEngine { +public: + TPythonTestEngine() + : MemInfo_("Memory") + , Alloc_(__LOCATION__) + , Env_(Alloc_) + , TypeInfoHelper_(new TTypeInfoHelper) + , FunctionInfoBuilder_(Env_, TypeInfoHelper_, "", nullptr, {}) + { + HolderFactory_ = MakeHolder<THolderFactory>( + Alloc_.Ref(), + MemInfo_, + nullptr); + ValueBuilder_ = MakeHolder<TDefaultValueBuilder>(*HolderFactory_, NUdf::EValidatePolicy::Exception); + BindTerminator_ = MakeHolder<TBindTerminator>(ValueBuilder_.Get()); + Singleton<TPyInitializer>(); + CastCtx_ = MakeIntrusive<TPyCastContext>(&GetValueBuilder(), + MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition()) + ); + } + + ~TPythonTestEngine() { + PyCleanup(); + } + + NUdf::IFunctionTypeInfoBuilder& GetTypeBuilder() { + return FunctionInfoBuilder_; + } + + const NUdf::IValueBuilder& GetValueBuilder() const { + return *ValueBuilder_; + } + + template <typename TChecker> + void ToMiniKQL(NUdf::TType* udfType, const TStringBuf& script, TChecker&& checker) { + TPyObjectPtr result = RunPythonFunction(script); + UNIT_ASSERT_C(!!result, script); + + TType* type = static_cast<TType*>(udfType); + auto value = FromPyObject(CastCtx_, type, result.Get()); + checker(value); + } + + template <typename TExpectedType, typename TChecker> + void ToMiniKQL(const TStringBuf& script, TChecker&& checker) { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + ToMiniKQL<TChecker>(type, script, std::move(checker)); + } + + template <typename TChecker> + void ToMiniKQLWithArg( + NUdf::TType* udfType, PyObject* argValue, + const TStringBuf& script, TChecker&& checker) + { + TPyObjectPtr args = Py_BuildValue("(O)", argValue); + + auto result = RunPythonFunction(script, args.Get()); + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + + TType* type = static_cast<TType*>(udfType); + auto value = FromPyObject(CastCtx_, type, result.Get()); + checker(value); + } + + template <typename TExpectedType, typename TChecker> + void ToMiniKQLWithArg( + PyObject* argValue, + const TStringBuf& script, TChecker&& checker) + { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + ToMiniKQLWithArg<TChecker>(type, argValue, script, std::move(checker)); + } + + template <typename TMiniKQLValueBuilder> + TPyObjectPtr ToPython( + NUdf::TType* udfType, + TMiniKQLValueBuilder&& builder, + const TStringBuf& script) + { + try { + TType* type = static_cast<TType*>(udfType); + NUdf::TUnboxedValue value = builder(type, GetValueBuilder()); + TPyObjectPtr pyValue = ToPyObject(CastCtx_, type, value); + if (!pyValue || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("object execution error"); + } + TPyObjectPtr args = Py_BuildValue("(O)", pyValue.Get()); + + auto result = RunPythonFunction(script, args.Get()); + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + return result; + } catch (const yexception& e) { + Cerr << e << Endl; + UNIT_FAIL("cast error"); + } + + Py_RETURN_NONE; + } + + template <typename TExpectedType, typename TMiniKQLValueBuilder> + TPyObjectPtr ToPython(TMiniKQLValueBuilder&& builder, const TStringBuf& script) { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + return ToPython<TMiniKQLValueBuilder>(type, std::move(builder), script); + } + + NUdf::TUnboxedValue FromPython(NUdf::TType* udfType, const TStringBuf& script) { + auto result = RunPythonFunction(script); + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + + TType* type = static_cast<TType*>(udfType); + return FromPyObject(CastCtx_, type, result.Get()); + } + + template <typename TExpectedType> + NUdf::TUnboxedValue FromPython(const TStringBuf& script) { + auto type = GetTypeBuilder().SimpleType<TExpectedType>(); + return FromPython(type, script); + } + + template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder> + NUdf::TUnboxedValue ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script) { + const auto aType = GetTypeBuilder().SimpleType<TArgumentType>(); + const auto result = ToPython<TMiniKQLValueBuilder>(aType, std::move(builder), script); + + if (!result || PyErr_Occurred()) { + PyErr_Print(); + UNIT_FAIL("function execution error"); + } + + const auto rType = static_cast<TType*>(GetTypeBuilder().SimpleType<TReturnType>()); + return FromPyObject(CastCtx_, rType, result.Get()); + } + + template <typename TArgumentType, typename TReturnType = TArgumentType, typename TMiniKQLValueBuilder, typename TChecker> + void ToPythonAndBack(TMiniKQLValueBuilder&& builder, const TStringBuf& script, TChecker&& checker) { + const auto result = ToPythonAndBack<TArgumentType, TReturnType, TMiniKQLValueBuilder>(std::move(builder), script); + checker(result); + } + +private: + TPyObjectPtr RunPythonFunction( + const TStringBuf& script, PyObject* args = nullptr) + { + TString filename(TStringBuf("embedded:test.py")); + TPyObjectPtr code(Py_CompileString(script.data(), filename.data(), Py_file_input)); + if (!code) { + PyErr_Print(); + UNIT_FAIL("can't compile python script"); + } + + TString moduleName(TStringBuf("py_cast_ut")); + TPyObjectPtr module(PyImport_ExecCodeModule(moduleName.begin(), code.Get())); + if (!module) { + PyErr_Print(); + UNIT_FAIL("can't create python module"); + } + + TPyObjectPtr function(PyObject_GetAttrString(module.Get(), "Test")); + if (!function) { + PyErr_Print(); + UNIT_FAIL("function 'Test' is not found in module"); + } + return PyObject_CallObject(function.Get(), args); + } + +private: + TMemoryUsageInfo MemInfo_; + TScopedAlloc Alloc_; + TTypeEnvironment Env_; + const NUdf::ITypeInfoHelper::TPtr TypeInfoHelper_; + TFunctionTypeInfoBuilder FunctionInfoBuilder_; + THolder<THolderFactory> HolderFactory_; + THolder<TDefaultValueBuilder> ValueBuilder_; + THolder<TBindTerminator> BindTerminator_; + TPyCastContext::TPtr CastCtx_; +}; + +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/ut3/ya.make b/yql/essentials/udfs/common/python/bindings/ut3/ya.make new file mode 100644 index 00000000000..b9d500938c7 --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ut3/ya.make @@ -0,0 +1,37 @@ +IF (OS_LINUX) + IF (NOT WITH_VALGRIND) + UNITTEST_FOR(yql/essentials/udfs/common/python/bindings) + + SRCS( + py_callable_ut.cpp + py_cast_ut.cpp + py_dict_ut.cpp + py_list_ut.cpp + py_decimal_ut.cpp + py_number_ut.cpp + py_optional_ut.cpp + py_resource_ut.cpp + py_stream_ut.cpp + py_string_ut.cpp + py_struct_ut.cpp + py_tuple_ut.cpp + py_tzdate_ut.cpp + py_utils_ut.cpp + py_variant_ut.cpp + py_void_ut.cpp + ) + + USE_PYTHON3() + + PEERDIR( + library/python/type_info + yql/essentials/minikql/computation/llvm14 + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql/pg_dummy + ) + + YQL_LAST_ABI_VERSION() + + END() + ENDIF() +ENDIF() diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make new file mode 100644 index 00000000000..efb5b475c4f --- /dev/null +++ b/yql/essentials/udfs/common/python/bindings/ya.make @@ -0,0 +1,54 @@ +PY23_NATIVE_LIBRARY() + +YQL_ABI_VERSION(2 27 0) + +SRCS( + py_callable.cpp + py_cast.cpp + py_decimal.cpp + py_errors.cpp + py_dict.cpp + py_list.cpp + py_lazy_mkql_dict.cpp + py_lazy_mkql_list.cpp + py_iterator.cpp + py_resource.cpp + py_stream.cpp + py_struct.cpp + py_tuple.cpp + py_utils.cpp + py_variant.cpp + py_void.cpp + py_yql_module.cpp +) + +IF (USE_SYSTEM_PYTHON AND _SYSTEM_PYTHON27) + # we should be able to run on python 2.7.X versions + # with X ranging from 3 to (at least) 15 + # + # for now bindings already use some functionality from 2.7.15, + # which doesn't exist earlier versions + # (according symbols won't be loaded from system python) + # + # so we provide backported implementation for this scenario to work as intended + SRCS( + py27_backports.c + ) +ENDIF() + +RESOURCE( + typing.py typing.py +) + +PEERDIR( + yql/essentials/public/udf + yql/essentials/utils +) + +NO_COMPILER_WARNINGS() + +END() + +RECURSE_FOR_TESTS( + ut3 +) diff --git a/yql/essentials/udfs/common/python/main_py3/__main__.pyx b/yql/essentials/udfs/common/python/main_py3/__main__.pyx new file mode 100644 index 00000000000..6f4ca943584 --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/__main__.pyx @@ -0,0 +1,50 @@ +import os +import runpy +import importlib + +import __res + + +cdef env_entry_point = 'Y_PYTHON_ENTRY_POINT' + + +cdef extern from 'main.h': + pass + + +def find_pymain(): + py_main = __res.find('PY_MAIN') + + if isinstance(py_main, bytes): + py_main = py_main.decode('utf8') + + if isinstance(py_main, unicode): + return py_main + + return None + + +def run_main(): + entry_point = os.environ.pop(env_entry_point, None) + + if entry_point is None: + entry_point = find_pymain() + + if entry_point is None: + raise RuntimeError('No entry point found') + + module_name, colon, func_name = entry_point.partition(':') + + if not colon: + runpy._run_module_as_main(module_name, alter_argv=False) + return + + if not module_name: + module_name = 'library.python.runtime_py3.entry_points' + + module = importlib.import_module(module_name) + func = getattr(module, func_name) + func() + + +run_main() diff --git a/yql/essentials/udfs/common/python/main_py3/include/main.h b/yql/essentials/udfs/common/python/main_py3/include/main.h new file mode 100644 index 00000000000..c96402004e3 --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/include/main.h @@ -0,0 +1,12 @@ +#pragma once +#include <util/system/compiler.h> + +#ifdef __cplusplus +extern "C" { +#endif +Y_PUBLIC +int RunPython(int argc, char** argv); +#ifdef __cplusplus +} +#endif + diff --git a/yql/essentials/udfs/common/python/main_py3/main.cpp b/yql/essentials/udfs/common/python/main_py3/main.cpp new file mode 100644 index 00000000000..edc3c89ca5b --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/main.cpp @@ -0,0 +1,9 @@ +#include "main.h" + +extern "C" +int RunPythonImpl(int argc, char** argv); + +extern "C" +int RunPython(int argc, char** argv) { + return RunPythonImpl(argc, argv); +} diff --git a/yql/essentials/udfs/common/python/main_py3/ya.make b/yql/essentials/udfs/common/python/main_py3/ya.make new file mode 100644 index 00000000000..cc13fb77e4c --- /dev/null +++ b/yql/essentials/udfs/common/python/main_py3/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +USE_PYTHON3() + +ADDINCL( + yql/essentials/udfs/common/python/main_py3/include +) + +SRCS(GLOBAL main.cpp) + +BUILDWITH_CYTHON_C(__main__.pyx --embed=RunPythonImpl) + +END() diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json new file mode 100644 index 00000000000..dd55da78b53 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/result.json @@ -0,0 +1,61 @@ +{ + "test.test[Annotations]": [ + { + "checksum": "19c6d906cb8617cf9d2b5d484e09caf8", + "size": 7570, + "uri": "https://{canondata_backend}/212715/49b4751c22bd43fa7057cc92ae5cbedb40404f40/resource.tar.gz#test.test_Annotations_/results.txt" + } + ], + "test.test[BytesDecodeModeStrict]": [ + { + "checksum": "f8534cff0843faaf876c41e0875dcf05", + "size": 3120, + "uri": "https://{canondata_backend}/1775319/4c4fed0942b33bcc70d44f7dd2972a8e05c6db97/resource.tar.gz#test.test_BytesDecodeModeStrict_/results.txt" + } + ], + "test.test[Cleanup]": [ + { + "checksum": "036e77892757e48fa3fb319ed324b019", + "size": 954, + "uri": "https://{canondata_backend}/1871182/9909e0b25b15bb1f21d5def23fb072d64c82f07e/resource.tar.gz#test.test_Cleanup_/results.txt" + } + ], + "test.test[CustomYsonConverter]": [ + { + "checksum": "7716204e544d2fcb9313412c3919e66d", + "size": 1625, + "uri": "https://{canondata_backend}/1130705/576535b56a4e74992911431865e5edd0f7d55520/resource.tar.gz#test.test_CustomYsonConverter_/results.txt" + } + ], + "test.test[Data]": [ + { + "checksum": "f40e83806b294be420681fdfbf2133e8", + "size": 25268, + "uri": "https://{canondata_backend}/1031349/7065a0985fe0cd26a754a5bee7a4c808836a4692/resource.tar.gz#test.test_Data_/results.txt" + } + ], + "test.test[Excepthook]": [ + { + "uri": "file://test.test_Excepthook_/extracted" + } + ], + "test.test[GreedyInputContainers]": [ + { + "checksum": "02a619c86f180e8a4c536087d64bab6d", + "size": 1328, + "uri": "https://{canondata_backend}/995452/085d43bbd16f44afc51d6cafed42465a3d20215c/resource.tar.gz#test.test_GreedyInputContainers_/results.txt" + } + ], + "test.test[OptionalNested]": [ + { + "uri": "file://test.test_OptionalNested_/extracted" + } + ], + "test.test[Switch]": [ + { + "checksum": "e60320702512bdcecd5c663f387ee939", + "size": 9172, + "uri": "https://{canondata_backend}/1130705/493ee46b1e8f2e848ab928f97913d332cb4fffc7/resource.tar.gz#test.test_Switch_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted new file mode 100644 index 00000000000..b260fe7616b --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_Excepthook_/extracted @@ -0,0 +1,15 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:44:1: Fatal: Execution of node: Result + SELECT $udf(@@{"abc":1}@@); + ^ + <tmp_path>/program.sql:<main>:40:17: Fatal: Failed to execute: +CUSTOM_EXCEPTHOOK +True +Traceback (most recent call last): + File "embedded:f", line 31, in f +Exception + + + $udf = Python3::f(Callable<(String)->String>, $script); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted new file mode 100644 index 00000000000..413eb2f4ec0 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/canondata/test.test_OptionalNested_/extracted @@ -0,0 +1,14 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:12:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem + SELECT $optOptList("42"); + ^ + <tmp_path>/program.sql:<main>:12:8: Error: At function: Apply + SELECT $optOptList("42"); + ^ + <tmp_path>/program.sql:<main>:2:24: Error: At function: ScriptUdf + $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@ + ^ + <tmp_path>/program.sql:<main>:2:24: Error: Nested optionals are unsupported in script UDF + $optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@ + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql new file mode 100644 index 00000000000..3f845322e20 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Annotations.sql @@ -0,0 +1,67 @@ +--sanitizer ignore memory +$script = @@ +from yql.typing import * + +def primitive(a0:Bool,a1:Int8,a2:Uint8,a3:Int16,a4:Uint16,a5:Int32,a6:Uint32, + a7:Int64,a8:Uint64,a9:Float,a10:Double,a11:String,a12:Utf8,a13:Yson,a14:Json, + a15:Uuid,a16:Date,a17:Datetime,a18:Timestamp,a19:Interval,a20:TzDate, + a21:TzDatetime,a22:TzTimestamp)->Decimal(10,3): + pass + +def singletons(a0:Void,a1:Null,a2:EmptyStruct,a3:EmptyTuple)->Void: + pass + +def containers(a0:Optional[Int32],a1:List[List[Bool]],a2:Stream[String],a3:Dict[Int32,String], + a4:Tuple[Int32,String],a5:Tuple[Int32],a6:Struct["a":Int32,"b":String],a7:Struct["a":Int32], + a8:Variant[Int32,String],a9:Variant[Int32],a10:Variant["a":Int32,"b":String],a11:Variant["a":Int32])->List[String]: + pass + +def special(a0:Resource["Python3"],a1:Tagged[Int32,"foo"])->Void: + pass + +def c0()->Callable[0,Int32]: pass +def c1()->Callable[1,Int32,Optional[List[Int32]]]: pass +def c2()->Callable[1,Int32,Int32,Optional[List[Int32]]]: pass +def c3()->Callable[0,Int32,"a":Int32:{AutoMap}]: pass +def c4()->Callable[0,Int32,"":Int32:{AutoMap}]: pass +def c5()->Callable[0,Int32,"":Int32:{}]: pass +def c6()->Callable[0,Int32,"foo":Int32]: pass + +def f0(x:Optional[Int32]=None,y:Optional[Int32]=None)->Void: pass +def f1(x:Optional[Int32],y:Optional[Int32]=None)->Void: pass +def f2(x:Optional[Int32],y:Optional[Int32])->Void: pass +def f3(x:slice("",Int32,{AutoMap}), y:slice("name",String))->Void: pass + +@@; + +$t = ($name)->{ + return FormatType(EvaluateType( + ParseTypeHandle(Core::PythonFuncSignature(AsAtom("Python3"), $script, $name)))); +}; + +-- Singletons + +select $t("primitive"); +select $t("singletons"); + +-- Containers & Special + +select $t("containers"); +select $t("special"); + +-- Callable +select + $t("c0") as c0, + $t("c1") as c1, + $t("c2") as c2, + $t("c3") as c3, + $t("c4") as c4, + $t("c5") as c5, + $t("c6") as c6; + +-- Top level +select + $t("f0") as f0, + $t("f1") as f1, + $t("f2") as f2, + $t("f3") as f3; diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql new file mode 100644 index 00000000000..e540dbf38ab --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/BytesDecodeModeStrict.sql @@ -0,0 +1,11 @@ +--sanitizer ignore memory +$script = @@ +def f(string, uuid, yson): + return (string, str(type(string)), uuid, str(type(uuid)), yson, str(type(yson))) + +f._yql_bytes_decode_mode = 'strict' +@@; + +$udf = Python3::f(Callable<(String?, UUid?, Yson?)->Tuple<String?, String, UUid?, String, Yson?, String>>, $script); + +SELECT $udf("string", UUid('1812bc18-5838-4cde-98aa-287302697b90'), cast(@@{"abc"=1}@@ as yson)); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in new file mode 100644 index 00000000000..d5ddcb40830 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.in @@ -0,0 +1 @@ +{"key"="1";"subkey"="2";"value"="3"}; diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql new file mode 100644 index 00000000000..9db98402923 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Cleanup.sql @@ -0,0 +1,12 @@ +--sanitizer ignore memory +$udfScript = @@ +import yql +def mapper(records): + yql.g = records + for record in records: + yield dict(yid=b"bla", rnd=0.) +@@; + +$udf = Python3::mapper(Callable<(Stream<Struct<key:String, subkey:String, value:String>>)->Stream<Struct<yid:String, rnd:Double>>>, $udfScript); + +PROCESS Input using $udf(TableRows()); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql new file mode 100644 index 00000000000..43dd00cb3df --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/CustomYsonConverter.sql @@ -0,0 +1,20 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$script = @@ +import json + +def yloads(z): + return json.loads(str(z, 'latin-1').replace("=",":")) + +def ydumps(z): + return bytes(json.dumps(z).replace(":","="), 'latin-1') + +def f(s): + return (s.get("abc",0),s) + +f._yql_convert_yson = (yloads,ydumps) +@@; + +$udf = Python3::f(Callable<(Yson?)->Tuple<Int64, Yson?>>, $script); + +SELECT $udf(cast(@@{"abc"=1}@@ as yson)); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql new file mode 100644 index 00000000000..3f7de07d5c2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Data.sql @@ -0,0 +1,61 @@ +--sanitizer ignore memory +$data = AsTuple( + Bool("true"), + Bool("FalsE"), + Int8("-128"), + Int8("127"), + Uint8("0"), + Uint8("255"), + Int16("-32768"), + Int16("32767"), + Uint16("0"), + Uint16("65535"), + Int32("-2147483648"), + Int32("2147483647"), + Uint32("0"), + Uint32("4294967295"), + Int64("-9223372036854775808"), + Int64("9223372036854775807"), + Uint64("0"), + Uint64("18446744073709551615"), + Float("0"), + Float("1"), + Float("-1e30"), + Float("-inf"), + Float("+inf"), + Float("nan"), + Double("0"), + Double("1"), + Double("-1e300"), + Double("-inf"), + Double("+inf"), + Double("nan"), + String("foo\xffbar"), + Utf8("привет"), + Yson("<a=1>[3;%false]"), + Json(@@{"a":1,"b":null}@@), + Date("2000-01-01"), + Datetime("2000-01-01T01:02:03Z"), + Timestamp("2000-01-01T01:02:03.4Z"), + Interval("P1DT12H"), + TzDate("2000-01-01,Europe/Moscow"), + TzDatetime("2000-01-01T01:02:03,Europe/Moscow"), + TzTimestamp("2000-01-01T01:02:03.4,Europe/Moscow"), + Uuid('31323334-3536-3738-393a-3b3c3d3e3f40'), + Decimal('3.1415926535897932384626433832795029', 35, 34), + Decimal('-.00000000000000000000000000000000001', 35, 35), + Decimal('NAN', 10, 5), + Decimal('-iNf', 1, 0) +); + +$type = CallableType(0, + TypeOf($data), + TypeOf($data) +); + +$f = Python3::f($type, @@ +def f(x): + return x +@@); + +select $data, $f($data); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg new file mode 100644 index 00000000000..5dae597903c --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.cfg @@ -0,0 +1 @@ +xfail diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql new file mode 100644 index 00000000000..100086c9e4e --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Excepthook.sql @@ -0,0 +1,23 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$script = @@ +import sys +import traceback + + +def excepthook(*args): + print('CUSTOM_EXCEPTHOOK', file=sys.stderr) + print(all(_ for _ in args), file=sys.stderr) + print("".join(traceback.format_exception(*args)), file=sys.stderr) + + +sys.excepthook = excepthook + + +def f(string): + raise Exception() +@@; + +$udf = Python3::f(Callable<(String)->String>, $script); + +SELECT $udf(@@{"abc":1}@@); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql new file mode 100644 index 00000000000..a43af8791d6 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/GreedyInputContainers.sql @@ -0,0 +1,19 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$s = @@ +def list_func(lst): + return lst.count(1) +list_func._yql_lazy_input = False +@@; + +$u = Python3::list_func(Callable<(List<Int32>)->Int32>, $s); +select $u(AsList(1,2,3)); + +$s = @@ +def dict_func(dict): + return list(dict.values()).count(b"b") +dict_func._yql_lazy_input = False +@@; + +$v = Python3::dict_func(Callable<(Dict<Int32, String>)->Int32>, $s); +select $v(AsDict(AsTuple(1,"a"),AsTuple(2,"b"))); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg new file mode 100644 index 00000000000..5dae597903c --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.cfg @@ -0,0 +1 @@ +xfail diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql new file mode 100644 index 00000000000..33396f036a7 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/OptionalNested.sql @@ -0,0 +1,7 @@ +--sanitizer ignore memory +$optOptList = Python3::opt_opt_list(Callable<(String)->List<String>??>, @@ +def opt_opt_list(in_str): + return [in_str] if len(in_str) % 2 == 0 else None +@@); + +SELECT $optOptList("42"); diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.in diff --git a/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql new file mode 100644 index 00000000000..c2576a72e45 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/cases/Switch.sql @@ -0,0 +1,92 @@ +--sanitizer ignore memory +/* syntax version 1 */ +$x = AsList(1,2,3); + +$s1 = @@ +def f(input): + for x in input: + yield x +@@; + +$s2 = @@ +class Iter: + def __init__(self, input): + self.input = input + + def __next__(self): + return next(self.input) +@@; + +$s3 = @@ +class CallableIter: + def __init__(self, input): + self.input = input + + def __call__(self): + def f(input): + for x in input: + yield x + + return f(self.input) +@@; + +$s4 = @@ +class Iterable: + def __init__(self, input): + self.input = input + + def __iter__(self): + return iter(self.input) +@@; + +$f1 = Python3::f(Callable<(Stream<Int32>)->Stream<Int32>>, $s1); + +$f2 = Python3::Iter(Callable<(Stream<Int32>)->Stream<Int32>>, $s2); + +$f3 = Python3::CallableIter(Callable<(Stream<Int32>)->Stream<Int32>>, $s3); + +$f4 = Python3::Iterable(Callable<(Stream<Int32>)->Stream<Int32>>, $s4); + +$g = ($stream)->{ + return $stream; +}; + +select Yql::Collect($g(Yql::Iterator($x, Yql::DependsOn("A1")))); + +select Yql::Collect($f1(Yql::Iterator($x, Yql::DependsOn("A2")))); + +select Yql::Collect($f2(Yql::Iterator($x, Yql::DependsOn("A3")))); + +select Yql::Collect($f3(Yql::Iterator($x, Yql::DependsOn("A4")))); + +select Yql::Collect($f4(Yql::Iterator($x, Yql::DependsOn("A5")))); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B1")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $g)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B2")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f1)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B3")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f2)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B4")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f3)); + +select Yql::Collect(Yql::Switch( + Yql::Iterator($x, Yql::DependsOn("B5")), + AsAtom('0'), + AsTuple(AsAtom('0')), + $f4)); diff --git a/yql/essentials/udfs/common/python/python3_small/test/ya.make b/yql/essentials/udfs/common/python/python3_small/test/ya.make new file mode 100644 index 00000000000..ac03d946685 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/test/ya.make @@ -0,0 +1,10 @@ +YQL_UDF_TEST_CONTRIB() + +TIMEOUT(300) +SIZE(MEDIUM) + +DEPENDS( + yql/essentials/udfs/common/python/python3_small +) + +END() diff --git a/yql/essentials/udfs/common/python/python3_small/ya.make b/yql/essentials/udfs/common/python/python3_small/ya.make new file mode 100644 index 00000000000..f815fa8d757 --- /dev/null +++ b/yql/essentials/udfs/common/python/python3_small/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(python3_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME Python3 + RESOURCE_NAME Python3 +) + +PEERDIR( + yql/essentials/public/udf +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h new file mode 100644 index 00000000000..a4e393b4868 --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h @@ -0,0 +1,111 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_registrator.h> +#include <yql/essentials/public/udf/udf_terminator.h> +#include <yql/essentials/udfs/common/python/bindings/py_ptr.h> +#include <yql/essentials/udfs/common/python/bindings/py_callable.h> +#include <yql/essentials/udfs/common/python/bindings/py_cast.h> +#include <yql/essentials/udfs/common/python/bindings/py_errors.h> +#include <yql/essentials/udfs/common/python/bindings/py_gil.h> +#include <yql/essentials/udfs/common/python/bindings/py_utils.h> +#include <yql/essentials/udfs/common/python/bindings/py_yql_module.h> + +#include <util/generic/yexception.h> +#include <util/stream/str.h> +#include <util/stream/printf.h> +#include <util/string/builder.h> +#include <util/string/cast.h> + +using namespace NYql::NUdf; +using namespace NPython; + +////////////////////////////////////////////////////////////////////////////// +// TPythonFunctionFactory +////////////////////////////////////////////////////////////////////////////// +class TPythonFunctionFactory: public TBoxedValue +{ +public: + TPythonFunctionFactory( + const TStringRef& name, + const TStringRef& tag, + const TType* functionType, + ITypeInfoHelper::TPtr&& helper, + const NYql::NUdf::TSourcePosition& pos) + : Ctx(new TPyContext(helper, tag, pos)) + , FunctionName(name) + , FunctionType_(functionType) + { + } + + ~TPythonFunctionFactory() { + Ctx->Cleanup(); + PyCleanup(); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override + { + TPyCastContext::TPtr castCtx = MakeIntrusive<TPyCastContext>(valueBuilder, Ctx); + + // for get propper c-compatible null-terminating string + TString source(args[0].AsStringRef()); + + TPyGilLocker lock; + TPyObjectPtr module = CompileModule(FunctionName, source); + if (!module) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to compile module: " << GetLastErrorAsString()).data()); + } + + TPyObjectPtr function(PyObject_GetAttrString(module.Get(), FunctionName.data())); + if (!function) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to find entry point: " << GetLastErrorAsString()).data()); + } + + if (!PyCallable_Check(function.Get())) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Entry point is not a callable").data()); + } + + try { + SetupCallableSettings(castCtx, function.Get()); + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << Ctx->Pos << "Failed to setup callable settings: " + << e.what()).data()); + } + return FromPyCallable(castCtx, FunctionType_, function.Release()); + } + + static TPyObjectPtr CompileModule(const TString& name, const TString& source) { + unsigned int moduleNum = AtomicCounter++; + TString filename(TStringBuf("embedded:")); + filename += name; + + TPyObjectPtr module, code; + if (HasEncodingCookie(source)) { + code.ResetSteal(Py_CompileString(source.data(), filename.data(), Py_file_input)); + } else { + PyCompilerFlags cflags; + cflags.cf_flags = PyCF_SOURCE_IS_UTF8; + + code.ResetSteal(Py_CompileStringFlags( + source.data(), filename.data(), Py_file_input, &cflags)); + } + + if (code) { + TString nameWithNum = name + ToString(moduleNum); + char* moduleName = const_cast<char*>(nameWithNum.data()); + module.ResetSteal(PyImport_ExecCodeModule(moduleName, code.Get())); + } + + return module; + } + + const TPyContext::TPtr Ctx; + const TString FunctionName; + const TType* FunctionType_; + inline static std::atomic_uint AtomicCounter = 0; +}; diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp new file mode 100644 index 00000000000..b1739a1775e --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp @@ -0,0 +1,232 @@ +#include "python_udf.h" +#include "python_function_factory.h" + +#include <yql/essentials/public/udf/udf_version.h> +#include <yql/essentials/udfs/common/python/bindings/py_utils.h> + +#include <util/generic/vector.h> +#include <util/system/execpath.h> + +namespace { + +#if PY_MAJOR_VERSION >= 3 +#define PYTHON_PROGRAMM_NAME L"YQL::Python3" +#else +#define PYTHON_PROGRAMM_NAME "YQL::Python2" +#endif + +int AddToPythonPath(const TVector<TStringBuf>& pathVals) +{ + char pathVar[] = "path"; // PySys_{Get,Set}Object take a non-const char* arg + + TPyObjectPtr sysPath(PySys_GetObject(pathVar), TPyObjectPtr::ADD_REF); + if (!sysPath) return -1; + + for (const auto& val: pathVals) { + TPyObjectPtr pyStr = PyRepr(val.data()); + int rc = PyList_Append(sysPath.Get(), pyStr.Get()); + if (rc != 0) { + return rc; + } + } + + return PySys_SetObject(pathVar, sysPath.Get()); +} + +void InitArcadiaPythonRuntime() +{ + // Arcadia static python import hook resides in __res module + // It modifies sys.meta_path upon import + + TPyObjectPtr mod(PyImport_ImportModule("__res")); + Y_ABORT_UNLESS(mod, "Can't import arcadia python runtime"); +} + +////////////////////////////////////////////////////////////////////////////// +// TPythonModule +////////////////////////////////////////////////////////////////////////////// +class TPythonModule: public IUdfModule +{ +public: + TPythonModule(const TString& resourceName, EPythonFlavor pythonFlavor, bool standalone = true) + : ResourceName(resourceName), Standalone(standalone) + { + if (Standalone) { + Py_SetProgramName(PYTHON_PROGRAMM_NAME); + PrepareYqlModule(); + Py_Initialize(); + } + + InitYqlModule(pythonFlavor, standalone); + + const auto rc = PyRun_SimpleString(R"( +# numpy on import may find installed openblas library and load it, +# which in turn causes it to start CPUCOUNT threads +# with approx. 40Mb memory reserved for each thread; +# +# See more detailed explanation here: https://st.yandex-team.ru/STATLIBS-1715#5bfc68ecbbc039001cec572a +# +# Thus, we reduce negative effects as much as possible +import os +os.environ['OPENBLAS_NUM_THREADS'] = '1' + + +# Following part allows us later to format tracebacks via sys.excepthook +# in thread-safe manner +import sys +import threading +if sys.version_info >= (3, 0): + from io import StringIO, TextIOWrapper as SysStderrType +else: + from cStringIO import StringIO + SysStderrType = file + +class StderrLocal(threading.local): + + def __init__(self): + self.is_real_mode = True + self.buffer = StringIO() + + +class StderrProxy(object): + def __init__(self, stderr): + self._stderr = stderr + self._tls = StderrLocal() + + def _toggle_real_mode(self): + self._tls.is_real_mode = not self._tls.is_real_mode + if not self._tls.is_real_mode: + self._tls.buffer.clear() + + def _get_value(self): + assert not self._tls.is_real_mode + return self._tls.buffer.getvalue() + + def __getattr__(self, attr): + target = self._stderr + if not self._tls.is_real_mode: + target = self._tls.buffer + + return getattr(target, attr) + +if isinstance(sys.stderr, SysStderrType): + sys.stderr = StderrProxy(sys.stderr) +)"); + Y_ABORT_UNLESS(rc >= 0, "Can't setup module"); + + if (pythonFlavor == EPythonFlavor::Arcadia) { + InitArcadiaPythonRuntime(); + } + +#ifndef _win_ + if (Standalone) { + TVector<TStringBuf> paths; + if (pythonFlavor == EPythonFlavor::System) { + paths.push_back(TStringBuf("/usr/lib/python2.7/dist-packages")); + } + paths.push_back(TStringBuf(".")); + const auto r = AddToPythonPath(paths); + Y_ABORT_UNLESS(r >= 0, "Can't add dist-packages into sys.path"); + } +#endif + + char executableVar[] = "executable"; // PySys_{Get,Set}Object take a non-const char* arg + TPyObjectPtr pyExecutableStr = PyRepr(GetExecPath().data()); + Y_ABORT_UNLESS(PySys_SetObject(executableVar, pyExecutableStr.Get()) >= 0, "Can't set sys.executable"); + + if (Standalone) { + PyEval_InitThreads(); + MainThreadState_ = PyEval_SaveThread(); + } + } + + ~TPythonModule() { + if (Standalone) { + PyEval_RestoreThread(MainThreadState_); + Py_Finalize(); + } + } + + void CleanupOnTerminate() const final { + PyCleanup(); + } + + void GetAllFunctions(IFunctionsSink&) const final {} + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + if (flags & TFlags::TypesOnly) { + return; + } + + try { + auto typeHelper = builder.TypeInfoHelper(); + if (ETypeKind::Callable != typeHelper->GetTypeKind(userType)) { + return builder.SetError(TStringRef::Of("Expected callable type")); + } + + const auto pos = builder.GetSourcePosition(); + builder.Implementation(new TPythonFunctionFactory(name, ResourceName, userType, std::move(typeHelper), pos)); + } catch (const yexception& e) { + builder.SetError(TStringBuf(e.what())); + } + } + +private: + TString ResourceName; + bool Standalone; + PyThreadState* MainThreadState_; +}; + +////////////////////////////////////////////////////////////////////////////// +// TStubModule +////////////////////////////////////////////////////////////////////////////// +class TStubModule: public IUdfModule { + void GetAllFunctions(IFunctionsSink&) const final {} + + void BuildFunctionTypeInfo( + const TStringRef& /*name*/, + TType* /*userType*/, + const TStringRef& /*typeConfig*/, + ui32 flags, + IFunctionTypeInfoBuilder& /*builder*/) const final + { + Y_DEBUG_ABORT_UNLESS(flags & TFlags::TypesOnly, + "in stub module this function can be called only for types loading"); + } + + void CleanupOnTerminate() const final {} +}; + +} // namespace + +void NKikimr::NUdf::RegisterYqlPythonUdf( + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor) +{ + if (flags & IRegistrator::TFlags::TypesOnly) { + registrator.AddModule(moduleName, new TStubModule); + } else { + registrator.AddModule( + moduleName, + NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true) + ); + } +} + +TUniquePtr<NKikimr::NUdf::IUdfModule> NKikimr::NUdf::GetYqlPythonUdfModule( + TStringBuf resourceName, NKikimr::NUdf::EPythonFlavor pythonFlavor, + bool standalone +) { + return new TPythonModule(TString(resourceName), pythonFlavor, standalone); +} diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h new file mode 100644 index 00000000000..16d7da096dd --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h @@ -0,0 +1,26 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_registrator.h> + +namespace NYql { +namespace NUdf { + +enum class EPythonFlavor { + System, + Arcadia, +}; + +void RegisterYqlPythonUdf( + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor); + +TUniquePtr<IUdfModule> GetYqlPythonUdfModule( + TStringBuf resourceName, + EPythonFlavor pythonFlavor, + bool standalone); + +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports new file mode 100644 index 00000000000..2ffd6f54b59 --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/python_udfs_exports.exports @@ -0,0 +1,5 @@ +C Register +C AbiVersion +C RunPython +C BindSymbols +C SetBackTraceCallback diff --git a/yql/essentials/udfs/common/python/python_udf/ya.make b/yql/essentials/udfs/common/python/python_udf/ya.make new file mode 100644 index 00000000000..9a2090665a2 --- /dev/null +++ b/yql/essentials/udfs/common/python/python_udf/ya.make @@ -0,0 +1,20 @@ +PY23_NATIVE_LIBRARY() + +YQL_ABI_VERSION(2 27 0) + +SRCS( + python_udf.cpp +) + +PEERDIR( + yql/essentials/public/udf + yql/essentials/udfs/common/python/bindings +) + +CFLAGS( + -DDISABLE_PYDEBUG +) + +NO_COMPILER_WARNINGS() + +END() diff --git a/yql/essentials/udfs/common/python/system_python/README.MD b/yql/essentials/udfs/common/python/system_python/README.MD new file mode 100644 index 00000000000..16d46fd51d3 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/README.MD @@ -0,0 +1,7 @@ +python3_N folders here are mirrors of python3_small, adjusted for system python (Name Python3 -> SystemPython3_N, LDFLAGS(-lpython3.N)) + +They are supposed to be built with local python: `ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.N -DPYTHON_CONFIG=python3.N-config python3.N` + +One way to get all pythons on the same machine is `sudo add-apt-repository ppa:deadsnakes/ppa` and `sudo apt install python3.N-dev` + +Use build_system_python_udfs.sh to build all python udfs with system pythons(local pythons) diff --git a/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh new file mode 100755 index 00000000000..8dd22452304 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/build_system_python_udfs.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -eux +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.8 -DPYTHON_CONFIG=python3.8-config python3_8 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.9 -DPYTHON_CONFIG=python3.9-config python3_9 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.10 -DPYTHON_CONFIG=python3.10-config python3_10 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.11 -DPYTHON_CONFIG=python3.11-config python3_11 +ya make -DUSE_ARCADIA_PYTHON=no -DUSE_LOCAL_PYTHON=yes -DOS_SDK=local -DPYTHON_BIN=python3.12 -DPYTHON_CONFIG=python3.12-config python3_12 diff --git a/yql/essentials/udfs/common/python/system_python/python3_10/ya.make b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make new file mode 100644 index 00000000000..12068a33a1e --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_10/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_10_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_10 + RESOURCE_NAME SystemPython3_10 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.10") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_11/ya.make b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make new file mode 100644 index 00000000000..483432b9b90 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_11/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_11_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_11 + RESOURCE_NAME SystemPython3_11 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.11") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_12/ya.make b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make new file mode 100644 index 00000000000..8220fda0eac --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_12/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_12_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_12 + RESOURCE_NAME SystemPython3_12 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.12") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_8/ya.make b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make new file mode 100644 index 00000000000..df447bacb4d --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_8/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_8_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_8 + RESOURCE_NAME SystemPython3_8 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.8") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/python3_9/ya.make b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make new file mode 100644 index 00000000000..ea3e5d849ed --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/python3_9/ya.make @@ -0,0 +1,16 @@ +YQL_PYTHON3_UDF(systempython3_9_udf) + +REGISTER_YQL_PYTHON_UDF( + NAME SystemPython3_9 + RESOURCE_NAME SystemPython3_9 +) + +IF (USE_LOCAL_PYTHON) + LDFLAGS("-lpython3.9") +ENDIF() + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/python/system_python/ya.make b/yql/essentials/udfs/common/python/system_python/ya.make new file mode 100644 index 00000000000..3afc7796bd3 --- /dev/null +++ b/yql/essentials/udfs/common/python/system_python/ya.make @@ -0,0 +1,7 @@ +RECURSE( + python3_8 + python3_9 + python3_10 + python3_11 + python3_12 +) diff --git a/yql/essentials/udfs/common/python/ya.make b/yql/essentials/udfs/common/python/ya.make new file mode 100644 index 00000000000..bb6a4c8d5b4 --- /dev/null +++ b/yql/essentials/udfs/common/python/ya.make @@ -0,0 +1,10 @@ +# This module should not be exported under CMake since it requires Python build +NO_BUILD_IF(STRICT EXPORT_CMAKE) + +RECURSE( + bindings + main_py3 + python3_small + python_udf + system_python +) diff --git a/yql/essentials/udfs/common/re2/re2_udf.cpp b/yql/essentials/udfs/common/re2/re2_udf.cpp new file mode 100644 index 00000000000..5d43ce040ae --- /dev/null +++ b/yql/essentials/udfs/common/re2/re2_udf.cpp @@ -0,0 +1,536 @@ +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +#include <contrib/libs/re2/re2/re2.h> + +#include <util/charset/utf8.h> +#include <util/string/cast.h> + +using namespace re2; +using namespace NKikimr; +using namespace NUdf; + +namespace { + + template <typename T> + T Id(T x) { + return x; + } + + re2::RE2::Options::Encoding EncodingFromBool(bool x) { + return x ? re2::RE2::Options::Encoding::EncodingUTF8 : re2::RE2::Options::Encoding::EncodingLatin1; + } + +#define OPTIONS_MAP(xx) \ + xx(Utf8, 0, bool, true, set_encoding, EncodingFromBool) \ + xx(PosixSyntax, 1, bool, false, set_posix_syntax, Id) \ + xx(LongestMatch, 2, bool, false, set_longest_match, Id) \ + xx(LogErrors, 3, bool, true, set_log_errors, Id) \ + xx(MaxMem, 4, ui64, 8 << 20, set_max_mem, Id) \ + xx(Literal, 5, bool, false, set_literal, Id) \ + xx(NeverNl, 6, bool, false, set_never_nl, Id) \ + xx(DotNl, 7, bool, false, set_dot_nl, Id) \ + xx(NeverCapture, 8, bool, false, set_never_capture, Id) \ + xx(CaseSensitive, 9, bool, true, set_case_sensitive, Id) \ + xx(PerlClasses, 10, bool, false, set_perl_classes, Id) \ + xx(WordBoundary, 11, bool, false, set_word_boundary, Id) \ + xx(OneLine, 12, bool, false, set_one_line, Id) + + enum EOptionsField : ui32 { + OPTIONS_MAP(ENUM_VALUE_GEN) + Count + }; + + struct TOptionsSchema { + TType* StructType; + ui32 Indices[EOptionsField::Count]; + }; + + struct TRegexpGroups { + TVector<TString> Names; + TVector<ui32> Indexes; + }; + + class TRe2Udf: public TBoxedValue { + public: + enum EMode { + MATCH, + GREP, + CAPTURE, + REPLACE, + COUNT, + FIND_AND_CONSUME, + }; + + template <bool posix> + class TFactory: public TBoxedValue { + public: + TFactory( + EMode mode, + const TOptionsSchema& optionsSchema, + TSourcePosition pos, + const TRegexpGroups& regexpGroups = TRegexpGroups()) + : Mode(mode) + , OptionsSchema(optionsSchema) + , Pos_(pos) + , RegexpGroups(regexpGroups) + { + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + return TUnboxedValuePod( + new TRe2Udf( + valueBuilder, + args[0], + RegexpGroups, + Mode, + posix, + OptionsSchema, + Pos_)); + } + + EMode Mode; + const TOptionsSchema OptionsSchema; + TSourcePosition Pos_; + const TRegexpGroups RegexpGroups; + }; + + static const TStringRef& Name(EMode mode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto capture = TStringRef::Of("Capture"); + static auto replace = TStringRef::Of("Replace"); + static auto count = TStringRef::Of("Count"); + static auto findAndconsume = TStringRef::Of("FindAndConsume"); + + switch (mode) { + case EMode::MATCH: + return match; + case EMode::GREP: + return grep; + case EMode::CAPTURE: + return capture; + case EMode::REPLACE: + return replace; + case EMode::COUNT: + return count; + case EMode::FIND_AND_CONSUME: + return findAndconsume; + } + Y_ABORT("Unexpected mode"); + } + + TRe2Udf( + const IValueBuilder*, + const TUnboxedValuePod& runConfig, + const TRegexpGroups regexpGroups, + EMode mode, + bool posix, + const TOptionsSchema& optionsSchema, + TSourcePosition pos) + : RegexpGroups(regexpGroups) + , Mode(mode) + , Captured() + , OptionsSchema(optionsSchema) + , Pos_(pos) + { + try { + auto patternValue = runConfig.GetElement(0); + auto optionsValue = runConfig.GetElement(1); + const std::string_view pattern(patternValue.AsStringRef()); + RE2::Options options; + + options.set_posix_syntax(posix); + bool needUtf8 = (UTF8Detect(pattern) == UTF8); + options.set_encoding( + needUtf8 + ? RE2::Options::Encoding::EncodingUTF8 + : RE2::Options::Encoding::EncodingLatin1 + ); + if (optionsValue) { +#define FIELD_HANDLE(name, index, type, defVal, setter, conv) options.setter(conv(optionsValue.GetElement(OptionsSchema.Indices[index]).Get<type>())); + OPTIONS_MAP(FIELD_HANDLE) +#undef FIELD_HANDLE + } + + Regexp = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); + + if (mode == EMode::CAPTURE) { + Captured = std::make_unique<StringPiece[]>(Regexp->NumberOfCapturingGroups() + 1); + } + + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + RE2::Anchor anchor = RE2::UNANCHORED; + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const StringPiece piece(input.data(), input.size()); + + switch (Mode) { + case MATCH: + anchor = RE2::ANCHOR_BOTH; + [[fallthrough]]; + case GREP: + return TUnboxedValuePod(Regexp->Match(piece, 0, input.size(), anchor, nullptr, 0)); + case CAPTURE: { + const int count = Regexp->NumberOfCapturingGroups() + 1; + TUnboxedValue* items = nullptr; + const auto result = valueBuilder->NewArray(RegexpGroups.Names.size(), items); + if (Regexp->Match(piece, 0, input.size(), anchor, Captured.get(), count)) { + for (int i = 0; i < count; ++i) { + if (!Captured[i].empty()) { + items[RegexpGroups.Indexes[i]] = valueBuilder->SubString(args[0], std::distance(piece.begin(), Captured[i].begin()), Captured[i].size()); + } + } + } else { + return BuildEmptyStruct(valueBuilder); + } + return result; + } + case REPLACE: { + const std::string_view rewriteRef(args[1].AsStringRef()); + const StringPiece rewrite(rewriteRef.data(), rewriteRef.size()); + TString rewriteError; + if (!Regexp->CheckRewriteString(rewrite, &rewriteError)) { + UdfTerminate((TStringBuilder() << Pos_ << " [rewrite error] " << rewriteError).data()); + } + std::string result(input); + RE2::GlobalReplace(&result, *Regexp, rewrite); + return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + } + case COUNT: { + std::string inputHolder(input); + const ui32 result = RE2::GlobalReplace(&inputHolder, *Regexp, ""); + return TUnboxedValuePod(result); + } + case FIND_AND_CONSUME: { + StringPiece text(piece); + std::vector<TUnboxedValue> matches; + for (StringPiece w; text.begin() < text.end() && RE2::FindAndConsume(&text, *Regexp, &w);) { + if (w.size() == 0) { + text.remove_prefix(1); + } + matches.emplace_back(valueBuilder->SubString(args[0], std::distance(piece.begin(), w.begin()), w.size())); + } + return valueBuilder->NewList(matches.data(), matches.size()); + } + } + Y_ABORT("Unexpected mode"); + } else { + switch (Mode) { + case MATCH: + case GREP: + return TUnboxedValuePod(false); + case CAPTURE: + return BuildEmptyStruct(valueBuilder); + case REPLACE: + return TUnboxedValuePod(); + case COUNT: + return TUnboxedValuePod::Zero(); + case FIND_AND_CONSUME: + return valueBuilder->NewEmptyList(); + } + Y_ABORT("Unexpected mode"); + } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + + std::unique_ptr<RE2> Regexp; + const TRegexpGroups RegexpGroups; + EMode Mode; + std::unique_ptr<StringPiece[]> Captured; + const TOptionsSchema OptionsSchema; + TSourcePosition Pos_; + + TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const { + TUnboxedValue* items = nullptr; + return valueBuilder->NewArray(RegexpGroups.Names.size(), items); + } + }; + + SIMPLE_STRICT_UDF(TEscape, char*(char*)) { + const std::string_view input(args[0].AsStringRef()); + const auto& result = RE2::QuoteMeta(StringPiece(input.data(), input.size())); + return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + } + + TOptionsSchema MakeOptionsSchema(::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) { + TOptionsSchema ret; + auto structBuilder = builder.Struct(EOptionsField::Count); +#define FIELD_HANDLE(name, index, type, ...) structBuilder->AddField<type>(TStringRef::Of(#name), &ret.Indices[index]); + OPTIONS_MAP(FIELD_HANDLE) +#undef FIELD_HANDLE + + ret.StructType = structBuilder->Build(); + return ret; + } + + class TOptions: public TBoxedValue { + private: + const TOptionsSchema Schema_; + + public: + TOptions(const TOptionsSchema& schema) + : Schema_(schema) + { + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + const auto result = valueBuilder->NewArray(EOptionsField::Count, items); +#define FIELD_HANDLE(name, index, type, defVal, ...) \ + { \ + auto structIndex = Schema_.Indices[index]; \ + if (!args[index]) { \ + items[structIndex] = TUnboxedValuePod(static_cast<type>(defVal)); \ + } else { \ + items[structIndex] = args[index].GetOptionalValue(); \ + } \ + } + + OPTIONS_MAP(FIELD_HANDLE) +#undef FIELD_HANDLE + return result; + } + + static const ::NKikimr::NUdf::TStringRef& Name() { + static auto name = ::NKikimr::NUdf::TStringRef::Of("Options"); + return name; + } + + static bool DeclareSignature( + const ::NKikimr::NUdf::TStringRef& name, + ::NKikimr::NUdf::TType* userType, + ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.IsStrict(); + + auto argsBuilder = builder.Args(); +#define FIELD_HANDLE(name, index, type, ...) argsBuilder->Add<TOptional<type>>().Name(TStringRef::Of(#name)); + OPTIONS_MAP(FIELD_HANDLE) +#undef FIELD_HANDLE + auto optionsSchema = MakeOptionsSchema(builder); + builder.Returns(optionsSchema.StructType); + builder.OptionalArgs(EOptionsField::Count); + if (!typesOnly) { + builder.Implementation(new TOptions(optionsSchema)); + } + + return true; + } else { + return false; + } + } + }; + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TPatternFromLike, char*(char*, TOptional<char*>), 1) { + const std::string_view input(args[0].AsStringRef()); + const bool hasEscape = bool(args[1]); + char escape = 0; + if (hasEscape) { + const std::string_view escapeRef(args[1].AsStringRef()); + if (escapeRef.size() != 1U) { + UdfTerminate((TStringBuilder() << GetPos() << " Escape should be single character").data()); + } + escape = escapeRef.front(); + } + const TString escaped(RE2::QuoteMeta(StringPiece(input.data(), input.size()))); + + TStringBuilder result; + result << "(?s)"; + bool slash = false; + bool escapeOn = false; + + for (const char& c : escaped) { + switch (c) { + case '\\': + if (slash) { + result << "\\\\"; + } + slash = !slash; + break; + case '%': + if (escapeOn) { + result << "\\%"; + escapeOn = false; + } else { + result << ".*"; + } + slash = false; + break; + case '_': + if (escapeOn) { + result << "\\_"; + escapeOn = false; + } else { + result << '.'; + } + slash = false; + break; + default: + if (hasEscape && c == escape) { + if (escapeOn) { + result << RE2::QuoteMeta(StringPiece(&c, 1)); + } + escapeOn = !escapeOn; + } else { + if (slash) + result << '\\'; + result << c; + escapeOn = false; + } + slash = false; + break; + } + } + return valueBuilder->NewString(result); + } + + TType* MakeRunConfigType(IFunctionTypeInfoBuilder& builder, TType* optOptionsStructType) { + return builder.Tuple()->Add<char*>().Add(optOptionsStructType).Build(); + } + + template <bool posix> + class TRe2Module: public IUdfModule { + public: + TStringRef Name() const { + return posix ? TStringRef::Of("Re2posix") : TStringRef::Of("Re2"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::MATCH)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::GREP)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::CAPTURE))->SetTypeAwareness(); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::REPLACE)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::COUNT)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::FIND_AND_CONSUME)); + sink.Add(TEscape::Name()); + sink.Add(TPatternFromLike::Name()); + sink.Add(TOptions::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { + Y_UNUSED(userType); + TOptionsSchema optionsSchema = MakeOptionsSchema(builder); + auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); + + bool typesOnly = (flags & TFlags::TypesOnly); + bool isMatch = (TRe2Udf::Name(TRe2Udf::EMode::MATCH) == name); + bool isGrep = (TRe2Udf::Name(TRe2Udf::EMode::GREP) == name); + bool isCapture = (TRe2Udf::Name(TRe2Udf::EMode::CAPTURE) == name); + bool isReplace = (TRe2Udf::Name(TRe2Udf::EMode::REPLACE) == name); + bool isCount = (TRe2Udf::Name(TRe2Udf::EMode::COUNT) == name); + bool isFindAndConsume = (TRe2Udf::Name(TRe2Udf::FIND_AND_CONSUME) == name); + + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; + builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition())); + } + } else if (isCapture) { + TRegexpGroups groups; + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + auto structBuilder = builder.Struct(); + RE2 regexp(StringPiece(typeConfig.Data(), typeConfig.Size())); + const auto& groupNames = regexp.CapturingGroupNames(); + int groupCount = regexp.NumberOfCapturingGroups(); + if (groupCount >= 0) { + std::unordered_set<std::string_view> groupNamesSet; + int unnamedCount = 0; + ++groupCount; + groups.Indexes.resize(groupCount); + groups.Names.resize(groupCount); + for (int i = 0; i < groupCount; ++i) { + TString fieldName; + auto it = groupNames.find(i); + if (it != groupNames.end()) { + if (!groupNamesSet.insert(it->second).second) { + builder.SetError( + TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second); + return; + } + fieldName = it->second; + } else { + fieldName = "_" + ToString(unnamedCount); + ++unnamedCount; + } + groups.Names[i] = fieldName; + structBuilder->AddField(fieldName, optionalStringType, &groups.Indexes[i]); + } + builder.Args(1)->Add(optionalStringType).Done().Returns(structBuilder->Build()).RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation( + new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), groups)); + } + + } else { + if (regexp.ok()) { + builder.SetError("Regexp contains no capturing groups"); + } else { + builder.SetError(regexp.error()); + } + } + } else if (isReplace) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition())); + } + } else if (isCount) { + builder.SimpleSignature<ui32(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition())); + } + } else if (isFindAndConsume) { + builder.SimpleSignature<TListType<char*>(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition())); + } + } else if (!( + TEscape::DeclareSignature(name, userType, builder, typesOnly) || + TPatternFromLike::DeclareSignature(name, userType, builder, typesOnly) || + TOptions::DeclareSignature(name, userType, builder, typesOnly))) { + builder.SetError( + TStringBuilder() << "Unknown function name: " << TString(name)); + } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + }; + +} + +REGISTER_MODULES( + TRe2Module<false>, + TRe2Module<true>) diff --git a/yql/essentials/udfs/common/re2/test/canondata/result.json b/yql/essentials/udfs/common/re2/test/canondata/result.json new file mode 100644 index 00000000000..b9a16f32d66 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/result.json @@ -0,0 +1,52 @@ +{ + "test.test[BackslashInLike]": [ + { + "uri": "file://test.test_BackslashInLike_/results.txt" + } + ], + "test.test[BasicOptions]": [ + { + "uri": "file://test.test_BasicOptions_/results.txt" + } + ], + "test.test[Basic]": [ + { + "uri": "file://test.test_Basic_/results.txt" + } + ], + "test.test[DefOptions]": [ + { + "uri": "file://test.test_DefOptions_/results.txt" + } + ], + "test.test[FindAndConsumeEmpty]": [ + { + "uri": "file://test.test_FindAndConsumeEmpty_/results.txt" + } + ], + "test.test[LikeEscape]": [ + { + "uri": "file://test.test_LikeEscape_/results.txt" + } + ], + "test.test[MultipleCaptureGroups]": [ + { + "uri": "file://test.test_MultipleCaptureGroups_/extracted" + } + ], + "test.test[MutableLambda]": [ + { + "uri": "file://test.test_MutableLambda_/results.txt" + } + ], + "test.test[SkipGroup]": [ + { + "uri": "file://test.test_SkipGroup_/results.txt" + } + ], + "test.test[Space]": [ + { + "uri": "file://test.test_Space_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt new file mode 100644 index 00000000000..cbd3b76ebac --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_BackslashInLike_/results.txt @@ -0,0 +1,28 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt new file mode 100644 index 00000000000..ba109c8a084 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_BasicOptions_/results.txt @@ -0,0 +1,278 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "match"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "grep"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "capture"; + [ + "StructType"; + [ + [ + "_0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "_1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "foo"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + [ + "capture_member"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "replace"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "count"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "tokens"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + ""; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "" + ]; + "0"; + [] + ]; + [ + "a"; + %false; + %true; + [ + #; + #; + # + ]; + #; + [ + "a" + ]; + "1"; + [ + "a" + ] + ]; + [ + "aax"; + %false; + %true; + [ + #; + #; + # + ]; + #; + [ + "aax" + ]; + "2"; + [ + "aax" + ] + ]; + [ + "xaax1"; + %false; + %true; + [ + [ + "xaax1" + ]; + [ + "aa" + ]; + [ + "x" + ] + ]; + [ + "aa" + ]; + [ + "baaz1" + ]; + "2"; + [ + "xaax1" + ] + ]; + [ + "xaaxaaxaa"; + %false; + %true; + [ + [ + "xaaxaaxaa" + ]; + [ + "aa" + ]; + [ + "x" + ] + ]; + [ + "aa" + ]; + [ + "baazaaxaa" + ]; + "6"; + [ + "xaaxaaxaa" + ] + ]; + [ + "sup, dude"; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "sup, dude" + ]; + "0"; + [ + "sup"; + "dude" + ] + ]; + [ + "one, two, three."; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "one, two, three." + ]; + "0"; + [ + "one"; + "two"; + "three" + ] + ]; + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD1\x8D\xD1\x82\xD0\xBE \xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8\xD0\xB5, \xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\213111!"; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD1\x8D\xD1\x82\xD0\xBE \xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8\xD0\xB5, \xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\213111!" + ]; + "0"; + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"; + "\xD1\x8D\xD1\x82\xD0\xBE"; + "\xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8\xD0\xB5"; + "\xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\213111" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt new file mode 100644 index 00000000000..d57d92025ef --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_Basic_/results.txt @@ -0,0 +1,257 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "match"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "grep"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "capture"; + [ + "StructType"; + [ + [ + "_0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "_1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "foo"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + [ + "capture_member"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "replace"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "count"; + [ + "DataType"; + "Uint32" + ] + ]; + [ + "tokens"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + ""; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "" + ]; + "0"; + [] + ]; + [ + "a"; + %false; + %true; + [ + #; + #; + # + ]; + #; + [ + "a" + ]; + "1"; + [ + "a" + ] + ]; + [ + "aax"; + %false; + %true; + [ + #; + #; + # + ]; + #; + [ + "aax" + ]; + "2"; + [ + "aax" + ] + ]; + [ + "xaax1"; + %false; + %true; + [ + [ + "xaax1" + ]; + [ + "aa" + ]; + [ + "x" + ] + ]; + [ + "aa" + ]; + [ + "baaz1" + ]; + "2"; + [ + "xaax1" + ] + ]; + [ + "xaaxaaxaa"; + %false; + %true; + [ + [ + "xaaxaaxaa" + ]; + [ + "aa" + ]; + [ + "x" + ] + ]; + [ + "aa" + ]; + [ + "baazaaxaa" + ]; + "6"; + [ + "xaaxaaxaa" + ] + ]; + [ + "sup, dude"; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "sup, dude" + ]; + "0"; + [ + "sup"; + "dude" + ] + ]; + [ + "one, two, three."; + %false; + %false; + [ + #; + #; + # + ]; + #; + [ + "one, two, three." + ]; + "0"; + [ + "one"; + "two"; + "three" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt new file mode 100644 index 00000000000..1287d3da2e8 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_DefOptions_/results.txt @@ -0,0 +1,266 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "StructType"; + [ + [ + "CaseSensitive"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "DotNl"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "Literal"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "LogErrors"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "LongestMatch"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "MaxMem"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "NeverCapture"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "NeverNl"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "OneLine"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "PerlClasses"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "PosixSyntax"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "Utf8"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "WordBoundary"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true; + %false; + %false; + %true; + %false; + "8388608"; + %false; + %false; + %false; + %false; + %false; + %true; + %false + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "StructType"; + [ + [ + "CaseSensitive"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "DotNl"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "Literal"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "LogErrors"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "LongestMatch"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "MaxMem"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "NeverCapture"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "NeverNl"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "OneLine"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "PerlClasses"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "PosixSyntax"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "Utf8"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "WordBoundary"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true; + %false; + %false; + %true; + %false; + "8388608"; + %false; + %false; + %false; + %false; + %false; + %true; + %false + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt new file mode 100644 index 00000000000..55958536293 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_FindAndConsumeEmpty_/results.txt @@ -0,0 +1,66 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "a"; + ""; + "aa" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt new file mode 100644 index 00000000000..b9dbaf1f91f --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_LikeEscape_/results.txt @@ -0,0 +1,76 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %true; + %true; + %true; + %true; + %true; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted b/yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted new file mode 100644 index 00000000000..2441849448b --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted @@ -0,0 +1,14 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:8:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem + select $regexp("abc"); + ^ + <tmp_path>/program.sql:<main>:8:8: Error: At function: Apply + select $regexp("abc"); + ^ + <tmp_path>/program.sql:<main>:4:16: Error: At function: Udf, At Re2.Capture + $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)"); + ^ + <tmp_path>/program.sql:<main>:4:16: Error: Failed to find UDF function: Re2.Capture, reason: Error: Module: Re2, function: Capture, error: Regexp contains duplicate capturing group name: groupname1 + $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)"); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt new file mode 100644 index 00000000000..4e62b7d8ce4 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_MutableLambda_/results.txt @@ -0,0 +1,52 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "x"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "e"; + %false + ]; + [ + "aa"; + %true + ]; + [ + "et"; + %false + ]; + [ + "cb"; + %false + ]; + [ + "ba"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt new file mode 100644 index 00000000000..466ed839507 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_SkipGroup_/results.txt @@ -0,0 +1,105 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "capture"; + [ + "StructType"; + [ + [ + "_0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "_1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "major"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "minor"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + [ + "no_groups"; + [ + "StructType"; + [ + [ + "_0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari" + ]; + [ + "Safari" + ]; + [ + "5" + ]; + [ + "0" + ] + ]; + [ + [ + "Intel Mac" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt new file mode 100644 index 00000000000..b62998b6977 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_Space_/results.txt @@ -0,0 +1,28 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql b/yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql new file mode 100644 index 00000000000..65973c7d812 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/BackslashInLike.sql @@ -0,0 +1 @@ +select 'utma' like @@%utm\_@@; diff --git a/yql/essentials/udfs/common/re2/test/cases/Basic.in b/yql/essentials/udfs/common/re2/test/cases/Basic.in new file mode 100644 index 00000000000..ba0028e8611 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/Basic.in @@ -0,0 +1,7 @@ +{"key"="1";"subkey"="1";"value"=""}; +{"key"="2";"subkey"="2";"value"="a"}; +{"key"="3";"subkey"="3";"value"="aax"}; +{"key"="4";"subkey"="4";"value"="xaax1"}; +{"key"="5";"subkey"="5";"value"="xaaxaaxaa"}; +{"key"="6";"subkey"="6";"value"="sup, dude"}; +{"key"="7";"subkey"="7";"value"="one, two, three."}; diff --git a/yql/essentials/udfs/common/re2/test/cases/Basic.sql b/yql/essentials/udfs/common/re2/test/cases/Basic.sql new file mode 100644 index 00000000000..7d049f88b2f --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/Basic.sql @@ -0,0 +1,21 @@ +/* syntax version 1 */ +$match = Re2::Match("[ax]+\d"); +$grep = Re2Posix::Grep("a.*"); +$capture = Re2::Capture(".*(?P<foo>xa?)(a{2,}).*"); +$replace = Re2::Replace("x(a+)x"); +$count = Re2::Count("a"); +-- regex to find all tokens consisting of letters and digist +-- L stands for "Letters", Nd stands for "Number, decimal digit", +-- see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category +$find_and_consume = Re2::FindAndConsume('([\\pL\\p{Nd}]+)'); + +SELECT + value, + $match(value) AS match, + $grep(value) AS grep, + $capture(value) AS capture, + $capture(value)._1 AS capture_member, + $replace(value, "b\\1z") AS replace, + $count(value) AS count, + $find_and_consume(value) AS tokens +FROM Input; diff --git a/yql/essentials/udfs/common/re2/test/cases/BasicOptions.in b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.in new file mode 100644 index 00000000000..f63986dffef --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.in @@ -0,0 +1,8 @@ +{"key"="1";"subkey"="1";"value"=""}; +{"key"="2";"subkey"="2";"value"="a"}; +{"key"="3";"subkey"="3";"value"="aax"}; +{"key"="4";"subkey"="4";"value"="xaax1"}; +{"key"="5";"subkey"="5";"value"="xaaxaaxaa"}; +{"key"="6";"subkey"="6";"value"="sup, dude"}; +{"key"="7";"subkey"="7";"value"="one, two, three."}; +{"key"="7";"subkey"="7";"value"="привет это русские, буквы111!"}; diff --git a/yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql new file mode 100644 index 00000000000..a4338c03e18 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/BasicOptions.sql @@ -0,0 +1,22 @@ +/* syntax version 1 */ +$options = Re2::Options(true as Utf8); +$match = Re2::Match("[ax]+\d",$options); +$grep = Re2Posix::Grep("a.*",$options); +$capture = Re2::Capture(".*(?P<foo>xa?)(a{2,}).*",$options); +$replace = Re2::Replace("x(a+)x",$options); +$count = Re2::Count("a",$options); +-- regex to find all tokens consisting of letters and digist +-- L stands for "Letters", Nd stands for "Number, decimal digit", +-- see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category +$find_and_consume = Re2::FindAndConsume('([\\pL\\p{Nd}]+)',$options); + +SELECT + value, + $match(value) AS match, + $grep(value) AS grep, + $capture(value) AS capture, + $capture(value)._1 AS capture_member, + $replace(value, "b\\1z") AS replace, + $count(value) AS count, + $find_and_consume(value) AS tokens +FROM Input; diff --git a/yql/essentials/udfs/common/re2/test/cases/DefOptions.sql b/yql/essentials/udfs/common/re2/test/cases/DefOptions.sql new file mode 100644 index 00000000000..d21a7108a82 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/DefOptions.sql @@ -0,0 +1,19 @@ +/* syntax version 1 */ +select Re2::Options( + true as `Utf8`, + false as PosixSyntax, + false as LongestMatch, + true as LogErrors, + 8<<20 as MaxMem, + false as Literal, + false as NeverNl, + false as DotNl, + false as NeverCapture, + true as CaseSensitive, + false as PerlClasses, + false as WordBoundary, + false as OneLine +); + +select Re2::Options( +); diff --git a/yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql b/yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql new file mode 100644 index 00000000000..e40ad0deb98 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/FindAndConsumeEmpty.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +$regexp1 = Re2::FindAndConsume("(a*)"); +$regexp2 = Re2::FindAndConsume("a(b*)"); + +SELECT $regexp1("abaa"); +SELECT $regexp2("a");
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql b/yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql new file mode 100644 index 00000000000..ca488640767 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/LikeEscape.sql @@ -0,0 +1,7 @@ +SELECT '?' LIKE '%??%' ESCAPE '?', + 'x_' LIKE '%xxx_' ESCAPE 'x', + '[' LIKE '[' ESCAPE '!', + '.' LIKE '..' ESCAPE '.', + '[' LIKE '[[' ESCAPE '[', + 'a%b' LIKE '.a.%.b' ESCAPE '.', + 'x' LIKE '..' ESCAPE '.';
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg new file mode 100644 index 00000000000..eb2e5315d1e --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg @@ -0,0 +1 @@ +xfail
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql new file mode 100644 index 00000000000..49e0da34fd2 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/MultipleCaptureGroups.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)"); + +select $regexp("abc");
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/cases/MutableLambda.in b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.in diff --git a/yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql new file mode 100644 index 00000000000..5e3f24be0b2 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/MutableLambda.sql @@ -0,0 +1,24 @@ +/* syntax version 1 */ +$regs = AsList("^a","^b"); + +$input = AsList("e","aa","et","cb","ba"); + +$table_input = (select * from (select $input as x) flatten by x); + +$compiled_regs = ListMap($regs, ($r)->{ + return Re2::Grep($r); +}); + +$f = ($s) -> { + $apply_list = ListMap($compiled_regs, ($cr)->{ + return $cr($s); + }); + + $filtered = ListFilter($apply_list, ($m)->{ + return $m; + }); + + return ListLength(ListTake($filtered,1)) > 0; +}; + +select x, $f(x) from $table_input; diff --git a/yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql b/yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql new file mode 100644 index 00000000000..5231c72fcbf --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/SkipGroup.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +$input = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7"; +$capture = Re2::Capture( + "(?:Mozilla|Opera)/(?P<major>\\d)\\.(?P<minor>\\d).*(Safari)" +); +$no_groups = Re2::Capture("(?:Intel) Mac"); + +SELECT + $capture($input) AS capture, + $no_groups($input) AS no_groups; diff --git a/yql/essentials/udfs/common/re2/test/cases/Space.sql b/yql/essentials/udfs/common/re2/test/cases/Space.sql new file mode 100644 index 00000000000..34f0590ca1f --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/Space.sql @@ -0,0 +1,2 @@ +/* syntax version 1 */ +SELECT 'a b c' LIKE 'a b%'; diff --git a/yql/essentials/udfs/common/re2/test/ya.make b/yql/essentials/udfs/common/re2/test/ya.make new file mode 100644 index 00000000000..179b2ca19bf --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/re2) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/re2/ya.make b/yql/essentials/udfs/common/re2/ya.make new file mode 100644 index 00000000000..426916222c8 --- /dev/null +++ b/yql/essentials/udfs/common/re2/ya.make @@ -0,0 +1,30 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319903255 OUT_NOAUTO libre2_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(re2_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + re2_udf.cpp + ) + + PEERDIR( + contrib/libs/re2 + library/cpp/deprecated/enum_codegen + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/set/set_udf.cpp b/yql/essentials/udfs/common/set/set_udf.cpp new file mode 100644 index 00000000000..4a9d050a31a --- /dev/null +++ b/yql/essentials/udfs/common/set/set_udf.cpp @@ -0,0 +1,576 @@ +#include <yql/essentials/public/udf/udf_type_ops.h> +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <unordered_set> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + +template <typename THash, typename TEquals> +class TSetBase { +private: + std::unordered_set<TUnboxedValue, THash, TEquals, TUnboxedValue::TAllocator> Set; + ui32 MaxSize = 0; + bool WasChanged = false; + +protected: + TSetBase(THash hash, TEquals equals) + : Set(1, hash, equals) + {} + + void Init(const TUnboxedValuePod& value, ui32 maxSize) { + MaxSize = maxSize ? maxSize : std::numeric_limits<ui32>::max(); + AddValue(value); + } + + void Merge(const TSetBase& left, const TSetBase& right) { + MaxSize = std::max(left.MaxSize, right.MaxSize); + for (const auto& item : left.Set) { + AddValue(item); + } + for (const auto& item : right.Set) { + AddValue(item); + } + } + + void Deserialize(const TUnboxedValuePod& serialized) { + MaxSize = serialized.GetElement(0).Get<ui32>(); + auto list = serialized.GetElement(1); + + const auto listIter = list.GetListIterator(); + for (TUnboxedValue current; listIter.Next(current);) { + AddValue(current); + } + } + +public: + void ResetChanged() { + WasChanged = false; + } + + bool Changed() const { + return WasChanged; + } + + TUnboxedValue Serialize(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Set.size(), values); + + for (const auto& item : Set) { + *values++ = item; + } + + TUnboxedValue* items = nullptr; + auto result = builder->NewArray(2U, items); + items[0] = TUnboxedValuePod(MaxSize); + items[1] = list; + + return result; + } + + TUnboxedValue GetResult(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto result = builder->NewArray(Set.size(), values); + + for (const auto& item : Set) { + *values++ = item; + } + return result; + } + + void AddValue(const TUnboxedValuePod& value) { + if (Set.size() < MaxSize) { + WasChanged = Set.insert(TUnboxedValuePod(value)).second; + } + } +}; + +template <EDataSlot Slot> +class TSetData + : public TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>> +{ +public: + using TBase = TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>>; + + TSetData(const TUnboxedValuePod& value, ui32 maxSize) + : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>()) + { + TBase::Init(value, maxSize); + } + + TSetData(const TSetData& left, const TSetData& right) + : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>()) + { + TBase::Merge(left, right); + } + + explicit TSetData(const TUnboxedValuePod& serialized) + : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>()) + { + TBase::Deserialize(serialized); + } +}; + +struct TGenericHash { + IHash::TPtr Hash; + + std::size_t operator()(const TUnboxedValuePod& value) const { + return Hash->Hash(value); + } +}; + +struct TGenericEquals { + IEquate::TPtr Equate; + + bool operator()(const TUnboxedValuePod& left, const TUnboxedValuePod& right) const { + return Equate->Equals(left, right); + } +}; + +class TSetGeneric + : public TSetBase<TGenericHash, TGenericEquals> +{ +public: + using TBase = TSetBase<TGenericHash, TGenericEquals>; + + TSetGeneric(const TUnboxedValuePod& value, ui32 maxSize, + IHash::TPtr hash, IEquate::TPtr equate) + : TBase(TGenericHash{hash}, TGenericEquals{equate}) + { + TBase::Init(value, maxSize); + } + + TSetGeneric(const TSetGeneric& left, const TSetGeneric& right, + IHash::TPtr hash, IEquate::TPtr equate) + : TBase(TGenericHash{hash}, TGenericEquals{equate}) + { + TBase::Merge(left, right); + } + + TSetGeneric(const TUnboxedValuePod& serialized, + IHash::TPtr hash, IEquate::TPtr equate) + : TBase(TGenericHash{hash}, TGenericEquals{equate}) + { + TBase::Deserialize(serialized); + } +}; + +extern const char SetResourceNameGeneric[] = "Set.SetResource.Generic"; +class TSetResource: + public TBoxedResource<TSetGeneric, SetResourceNameGeneric> +{ +public: + template <typename... Args> + inline TSetResource(Args&&... args) + : TBoxedResource(std::forward<Args>(args)...) + {} +}; + +template <EDataSlot Slot> +class TSetResourceData; + +template <EDataSlot Slot> +TSetResourceData<Slot>* GetSetResourceData(const TUnboxedValuePod& arg) { + TSetResourceData<Slot>::Validate(arg); + return static_cast<TSetResourceData<Slot>*>(arg.AsBoxed().Get()); +} + +TSetResource* GetSetResource(const TUnboxedValuePod& arg) { + TSetResource::Validate(arg); + return static_cast<TSetResource*>(arg.AsBoxed().Get()); +} + + +template <EDataSlot Slot> +class TSetCreateData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return TUnboxedValuePod(new TSetResourceData<Slot>(args[0], args[1].Get<ui32>())); + } +}; + +class TSetCreate: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return TUnboxedValuePod(new TSetResource(args[0], args[1].Get<ui32>(), Hash_, Equate_)); + } + +public: + TSetCreate(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + {} + +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; + +template <EDataSlot Slot> +class TSetAddValueData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetSetResourceData<Slot>(args[0]); + resource->Get()->ResetChanged(); + resource->Get()->AddValue(args[1]); + return TUnboxedValuePod(resource); + } +}; + +class TSetAddValue: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetSetResource(args[0]); + resource->Get()->ResetChanged(); + resource->Get()->AddValue(args[1]); + return TUnboxedValuePod(resource); + } +}; + +template <EDataSlot Slot> +class TSetWasChangedData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetSetResourceData<Slot>(args[0]); + return TUnboxedValuePod(resource->Get()->Changed()); + } +}; + +class TSetWasChanged: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetSetResource(args[0]); + return TUnboxedValuePod(resource->Get()->Changed()); + } +}; + +template <EDataSlot Slot> +class TSetSerializeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + return GetSetResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder); + } +}; + +class TSetSerialize: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + return GetSetResource(args[0])->Get()->Serialize(valueBuilder); + } +}; + +template <EDataSlot Slot> +class TSetDeserializeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return TUnboxedValuePod(new TSetResourceData<Slot>(args[0])); + } +}; + +class TSetDeserialize: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return TUnboxedValuePod(new TSetResource(args[0], Hash_, Equate_)); + } + +public: + TSetDeserialize(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + {} + +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; + +template <EDataSlot Slot> +class TSetMergeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto left = GetSetResourceData<Slot>(args[0]); + auto right = GetSetResourceData<Slot>(args[1]); + return TUnboxedValuePod(new TSetResourceData<Slot>(*left->Get(), *right->Get())); + } +}; + +class TSetMerge: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto left = GetSetResource(args[0]); + auto right = GetSetResource(args[1]); + return TUnboxedValuePod(new TSetResource(*left->Get(), *right->Get(), Hash_, Equate_)); + } + +public: + TSetMerge(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + {} + +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; + +template <EDataSlot Slot> +class TSetGetResultData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + return GetSetResourceData<Slot>(args[0])->Get()->GetResult(valueBuilder); + } +}; + +class TSetGetResult: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + return GetSetResource(args[0])->Get()->GetResult(valueBuilder); + } +}; + + +#define MAKE_RESOURCE(slot, ...) \ +extern const char SetResourceName##slot[] = "Set.SetResource."#slot; \ +template <> \ +class TSetResourceData<EDataSlot::slot>: \ + public TBoxedResource<TSetData<EDataSlot::slot>, SetResourceName##slot> \ +{ \ +public: \ + template <typename... Args> \ + inline TSetResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + {} \ +}; + +UDF_TYPE_ID_MAP(MAKE_RESOURCE) + +#define MAKE_IMPL(operation, slot) \ +case EDataSlot::slot: \ + builder.Implementation(new operation<EDataSlot::slot>); \ + break; + +#define MAKE_CREATE(slot, ...) MAKE_IMPL(TSetCreateData, slot) +#define MAKE_ADD_VALUE(slot, ...) MAKE_IMPL(TSetAddValueData, slot) +#define MAKE_WAS_CHANGED(slot, ...) MAKE_IMPL(TSetWasChangedData, slot) +#define MAKE_SERIALIZE(slot, ...) MAKE_IMPL(TSetSerializeData, slot) +#define MAKE_DESERIALIZE(slot, ...) MAKE_IMPL(TSetDeserializeData, slot) +#define MAKE_MERGE(slot, ...) MAKE_IMPL(TSetMergeData, slot) +#define MAKE_GET_RESULT(slot, ...) MAKE_IMPL(TSetGetResultData, slot) + +#define MAKE_TYPE(slot, ...) \ +case EDataSlot::slot: \ + setType = builder.Resource(SetResourceName##slot); \ + break; + + +static const auto CreateName = TStringRef::Of("Create"); +static const auto AddValueName = TStringRef::Of("AddValue"); +static const auto WasChangedName = TStringRef::Of("WasChanged"); // must be used right after AddValue +static const auto SerializeName = TStringRef::Of("Serialize"); +static const auto DeserializeName = TStringRef::Of("Deserialize"); +static const auto MergeName = TStringRef::Of("Merge"); +static const auto GetResultName = TStringRef::Of("GetResult"); + +class TSetModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Set"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(AddValueName)->SetTypeAwareness(); + sink.Add(WasChangedName)->SetTypeAwareness(); + sink.Add(SerializeName)->SetTypeAwareness(); + sink.Add(DeserializeName)->SetTypeAwareness(); + sink.Add(MergeName)->SetTypeAwareness(); + sink.Add(GetResultName)->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + try { + const bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); + + auto typeHelper = builder.TypeInfoHelper(); + + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } + + bool isGeneric = false; + IHash::TPtr hash; + IEquate::TPtr equate; + TMaybe<EDataSlot> slot; + + auto valueType = userTypeInspector.GetElementType(2); + auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType); + if (!valueTypeInspector) { + isGeneric = true; + hash = builder.MakeHash(valueType); + equate = builder.MakeEquate(valueType); + if (!hash || !equate) { + return; + } + } else { + slot = FindDataSlot(valueTypeInspector.GetTypeId()); + if (!slot) { + builder.SetError("Unknown data type"); + return; + } + const auto& info = NUdf::GetDataTypeInfo(*slot); + const auto& features = info.Features; + if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) { + builder.SetError(TStringBuilder() << "Type " << info.Name << " is not hashable or equatable"); + return; + } + } + + auto serializedListType = builder.List()->Item(valueType).Build(); + auto serializedType = builder.Tuple()->Add<ui32>().Add(serializedListType).Build(); + + TType* setType = nullptr; + if (isGeneric) { + setType = builder.Resource(SetResourceNameGeneric); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_TYPE) + } + } + + if (name == CreateName) { + builder.IsStrict(); + + builder.Args()->Add(valueType).Add<ui32>().Done().Returns(setType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetCreate(hash, equate)); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_CREATE) + } + } + } + } + + if (name == AddValueName) { + builder.IsStrict(); + + builder.Args()->Add(setType).Add(valueType).Done().Returns(setType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetAddValue); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) + } + } + } + } + + if (name == WasChangedName) { + builder.IsStrict(); + + builder.Args()->Add(setType).Done().Returns<bool>(); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetWasChanged); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_WAS_CHANGED) + } + } + } + } + + if (name == MergeName) { + builder.IsStrict(); + + builder.Args()->Add(setType).Add(setType).Done().Returns(setType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetMerge(hash, equate)); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_MERGE) + } + } + } + } + + if (name == SerializeName) { + builder.IsStrict(); + + builder.Args()->Add(setType).Done().Returns(serializedType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetSerialize); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_SERIALIZE) + } + } + } + } + + if (name == DeserializeName) { + builder.Args()->Add(serializedType).Done().Returns(setType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetDeserialize(hash, equate)); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) + } + } + } + } + + if (name == GetResultName) { + auto resultType = builder.List()->Item(valueType).Build(); + + builder.IsStrict(); + + builder.Args()->Add(setType).Done().Returns(resultType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TSetGetResult); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_GET_RESULT) + } + } + } + } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } +}; + +} // namespace + +REGISTER_MODULES(TSetModule) diff --git a/yql/essentials/udfs/common/set/test/canondata/result.json b/yql/essentials/udfs/common/set/test/canondata/result.json new file mode 100644 index 00000000000..a235fbf9029 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/result.json @@ -0,0 +1,47 @@ +{ + "test.test[ListDistinctDictInDict]": [ + { + "uri": "file://test.test_ListDistinctDictInDict_/results.txt" + } + ], + "test.test[ListDistinctDict]": [ + { + "uri": "file://test.test_ListDistinctDict_/results.txt" + } + ], + "test.test[ListDistinctLazyList]": [ + { + "uri": "file://test.test_ListDistinctLazyList_/results.txt" + } + ], + "test.test[ListDistinctLimit]": [ + { + "uri": "file://test.test_ListDistinctLimit_/results.txt" + } + ], + "test.test[ListDistinctSingular]": [ + { + "uri": "file://test.test_ListDistinctSingular_/results.txt" + } + ], + "test.test[ListDistinctStructInDict]": [ + { + "uri": "file://test.test_ListDistinctStructInDict_/results.txt" + } + ], + "test.test[ListDistinctTuple]": [ + { + "uri": "file://test.test_ListDistinctTuple_/results.txt" + } + ], + "test.test[ListDistinctVariant]": [ + { + "uri": "file://test.test_ListDistinctVariant_/results.txt" + } + ], + "test.test[ListDistinct]": [ + { + "uri": "file://test.test_ListDistinct_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt new file mode 100644 index 00000000000..09714369716 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDictInDict_/results.txt @@ -0,0 +1,79 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DictType"; + [ + "DictType"; + [ + "DataType"; + "Int32" + ]; + [ + "VoidType" + ] + ]; + [ + "VoidType" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + [ + "1"; + "Void" + ] + ]; + "Void" + ] + ]; + [ + [ + [ + [ + "1"; + "Void" + ] + ]; + "Void" + ]; + [ + [ + [ + "2"; + "Void" + ]; + [ + "1"; + "Void" + ] + ]; + "Void" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt new file mode 100644 index 00000000000..c2f9c4e101f --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctDict_/results.txt @@ -0,0 +1,103 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DictType"; + [ + "DataType"; + "Int32" + ]; + [ + "VoidType" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + "1"; + "Void" + ] + ]; + [ + [ + "2"; + "Void" + ]; + [ + "1"; + "Void" + ] + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DictType"; + [ + "DataType"; + "Int32" + ]; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + "1"; + "3" + ] + ]; + [ + [ + "1"; + "2" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt new file mode 100644 index 00000000000..3749a4a3b75 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLazyList_/results.txt @@ -0,0 +1,85 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "1"; + "2"; + "3" + ]; + [ + "1"; + "2" + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "1"; + "3" + ]; + [ + "1"; + "2" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt new file mode 100644 index 00000000000..dbcfe2be9dc --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctLimit_/results.txt @@ -0,0 +1,57 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + "a"; + "c"; + "b" + ] + ]; + [ + "2"; + [ + "x"; + "y"; + "u" + ] + ]; + [ + "3"; + [ + "m" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt new file mode 100644 index 00000000000..7fb0c4f89b1 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctSingular_/results.txt @@ -0,0 +1,115 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "EmptyListType" + ] + ] + ] + ] + ]; + "Data" = [ + [ + [] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "VoidType" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "EmptyListType" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "EmptyDictType" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt new file mode 100644 index 00000000000..09933a72e3d --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctStructInDict_/results.txt @@ -0,0 +1,71 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DictType"; + [ + "StructType"; + [ + [ + "a"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "b"; + [ + "DataType"; + "Int32" + ] + ] + ] + ]; + [ + "VoidType" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + [ + "1"; + "3" + ]; + "Void" + ] + ]; + [ + [ + [ + "1"; + "2" + ]; + "Void" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt new file mode 100644 index 00000000000..510244f4301 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctTuple_/results.txt @@ -0,0 +1,115 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + [ + "1"; + "a" + ]; + [ + "1"; + "e" + ]; + [ + "1"; + "b" + ]; + [ + "1"; + "c" + ]; + [ + "2"; + "a" + ] + ] + ]; + [ + "2"; + [ + [ + "2"; + "x" + ]; + [ + "3"; + "y" + ]; + [ + "4"; + "x" + ]; + [ + "5"; + "u" + ]; + [ + "6"; + "v" + ]; + [ + "8"; + "x" + ]; + [ + "7"; + "w" + ]; + [ + "9"; + "w" + ] + ] + ]; + [ + "3"; + [ + [ + "0"; + "m" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt new file mode 100644 index 00000000000..e0ce566a468 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinctVariant_/results.txt @@ -0,0 +1,108 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "DataType"; + "Int32" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "1"; + "str" + ]; + [ + "0"; + "1" + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "VariantType"; + [ + "StructType"; + [ + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "y"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "1"; + "str" + ]; + [ + "0"; + "1" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt new file mode 100644 index 00000000000..937160a4571 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/canondata/test.test_ListDistinct_/results.txt @@ -0,0 +1,60 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + "a"; + "c"; + "b"; + "e" + ] + ]; + [ + "2"; + [ + "x"; + "y"; + "v"; + "u"; + "w" + ] + ]; + [ + "3"; + [ + "m" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinct.in b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in new file mode 100644 index 00000000000..af51412a1e3 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="";"value"="a"}; +{"key"="1";"subkey"="";"value"="b"}; +{"key"="1";"subkey"="";"value"="c"}; +{"key"="1";"subkey"="";"value"="a"}; +{"key"="1";"subkey"="";"value"="e"}; +{"key"="1";"subkey"="";"value"="b"}; +{"key"="2";"subkey"="";"value"="x"}; +{"key"="2";"subkey"="";"value"="y"}; +{"key"="2";"subkey"="";"value"="x"}; +{"key"="2";"subkey"="";"value"="u"}; +{"key"="2";"subkey"="";"value"="v"}; +{"key"="2";"subkey"="";"value"="w"}; +{"key"="2";"subkey"="";"value"="x"}; +{"key"="2";"subkey"="";"value"="w"}; +{"key"="3";"subkey"="";"value"="m"}; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinct.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinct.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinct.sql new file mode 100644 index 00000000000..b13b6c9c715 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinct.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + AGGREGATE_LIST_DISTINCT(value) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql new file mode 100644 index 00000000000..2bee58b66e9 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctDict.sql @@ -0,0 +1,8 @@ +/* syntax version 1 */ +select AGGREGATE_LIST_DISTINCT(x) from +(select [{1,2},{1},{1,2}] as x) +flatten list by x; + +select AGGREGATE_LIST_DISTINCT(x) from +(select [{1:2},{1:3},{1:2}] as x) +flatten list by x; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql new file mode 100644 index 00000000000..775b8dc797a --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctDictInDict.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +select AGGREGATE_LIST_DISTINCT(x) from +(select [{{1,2},{1}},{{1}},{{1,2},{1}}] as x) +flatten list by x; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql new file mode 100644 index 00000000000..fcaf49e69d6 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLazyList.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +select AGGREGATE_LIST_DISTINCT(x) from +(select [[1,2],[1,2,3],YQL::LazyList([1,2])] as x) +flatten list by x; + +select AGGREGATE_LIST_DISTINCT(x) from +(select [YQL::LazyList([1,2]),[1,3], YQL::LazyList([1,2])] as x) +flatten list by x; + diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in new file mode 100644 index 00000000000..af51412a1e3 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="";"value"="a"}; +{"key"="1";"subkey"="";"value"="b"}; +{"key"="1";"subkey"="";"value"="c"}; +{"key"="1";"subkey"="";"value"="a"}; +{"key"="1";"subkey"="";"value"="e"}; +{"key"="1";"subkey"="";"value"="b"}; +{"key"="2";"subkey"="";"value"="x"}; +{"key"="2";"subkey"="";"value"="y"}; +{"key"="2";"subkey"="";"value"="x"}; +{"key"="2";"subkey"="";"value"="u"}; +{"key"="2";"subkey"="";"value"="v"}; +{"key"="2";"subkey"="";"value"="w"}; +{"key"="2";"subkey"="";"value"="x"}; +{"key"="2";"subkey"="";"value"="w"}; +{"key"="3";"subkey"="";"value"="m"}; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql new file mode 100644 index 00000000000..439f05776ee --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctLimit.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + AGGREGATE_LIST_DISTINCT(value, 3) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql new file mode 100644 index 00000000000..1887ec61804 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctSingular.sql @@ -0,0 +1,16 @@ +/* syntax version 1 */ +select AGGREGATE_LIST_DISTINCT(x) from +(select [null, null] as x) +flatten list by x; + +select AGGREGATE_LIST_DISTINCT(x) from +(select [void(), void()] as x) +flatten list by x; + +select AGGREGATE_LIST_DISTINCT(x) from +(select [[], []] as x) +flatten list by x; + +select AGGREGATE_LIST_DISTINCT(x) from +(select [{}, {}] as x) +flatten list by x; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql new file mode 100644 index 00000000000..88acaede014 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctStructInDict.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +select AGGREGATE_LIST_DISTINCT(x) from +(select [{<|a:1,b:2|>},{<|a:1,b:3|>},{<|a:1,b:2|>}] as x) +flatten list by x; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in new file mode 100644 index 00000000000..7c638071c18 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="1";"value"="a"}; +{"key"="1";"subkey"="1";"value"="b"}; +{"key"="1";"subkey"="1";"value"="c"}; +{"key"="1";"subkey"="2";"value"="a"}; +{"key"="1";"subkey"="1";"value"="e"}; +{"key"="1";"subkey"="1";"value"="b"}; +{"key"="2";"subkey"="2";"value"="x"}; +{"key"="2";"subkey"="3";"value"="y"}; +{"key"="2";"subkey"="4";"value"="x"}; +{"key"="2";"subkey"="5";"value"="u"}; +{"key"="2";"subkey"="6";"value"="v"}; +{"key"="2";"subkey"="7";"value"="w"}; +{"key"="2";"subkey"="8";"value"="x"}; +{"key"="2";"subkey"="9";"value"="w"}; +{"key"="3";"subkey"="0";"value"="m"}; diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql new file mode 100644 index 00000000000..8c3472bb858 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctTuple.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + AGGREGATE_LIST_DISTINCT(AsTuple(subkey, value)) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql b/yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql new file mode 100644 index 00000000000..29469d01b1e --- /dev/null +++ b/yql/essentials/udfs/common/set/test/cases/ListDistinctVariant.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +$vt1 = Variant<Int32,String>; +select AGGREGATE_LIST_DISTINCT(x) from +(select [Variant(1,"0",$vt1),Variant("str","1",$vt1),Variant(1,"0",$vt1)] as x) +flatten list by x; + +$vt2 = Variant<x:Int32,y:String>; +select AGGREGATE_LIST_DISTINCT(x) from +(select [Variant(1,"x",$vt2),Variant("str","y",$vt2),Variant(1,"x",$vt2)] as x) +flatten list by x; + diff --git a/yql/essentials/udfs/common/set/test/ya.make b/yql/essentials/udfs/common/set/test/ya.make new file mode 100644 index 00000000000..e31115a00d8 --- /dev/null +++ b/yql/essentials/udfs/common/set/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/set) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/set/ya.make b/yql/essentials/udfs/common/set/ya.make new file mode 100644 index 00000000000..1303267409d --- /dev/null +++ b/yql/essentials/udfs/common/set/ya.make @@ -0,0 +1,25 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319903801 OUT_NOAUTO libset_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(set_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + set_udf.cpp + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/stat/stat_udf.cpp b/yql/essentials/udfs/common/stat/stat_udf.cpp new file mode 100644 index 00000000000..64c2bb4a698 --- /dev/null +++ b/yql/essentials/udfs/common/stat/stat_udf.cpp @@ -0,0 +1,3 @@ +#include "static/stat_udf.h" + +REGISTER_MODULES(TStatModule) diff --git a/yql/essentials/udfs/common/stat/stat_udf_ut.cpp b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp new file mode 100644 index 00000000000..2a033ff31e4 --- /dev/null +++ b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp @@ -0,0 +1,363 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <yql/essentials/minikql/mkql_function_registry.h> +#include <yql/essentials/minikql/mkql_program_builder.h> +#include <yql/essentials/minikql/computation/mkql_computation_node.h> +#include <yql/essentials/minikql/comp_nodes/mkql_factories.h> +#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h> +#include <util/random/random.h> +#include <util/system/sanitizers.h> +#include <array> + +namespace NYql { +using namespace NKikimr::NMiniKQL; + + namespace NUdf { + extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule(); + } + + Y_UNIT_TEST_SUITE(TUDFStatTest) { + Y_UNIT_TEST(SimplePercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(10000000); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } + + for (int n = 1; n < 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral((double)n); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); + } + + Y_UNIT_TEST(SimplePercentileSpecific) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(75.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } + + TVector<double> vals = {800, 20, 150}; + for (auto val : vals) { + auto param2 = pgmBuilder.NewDataLiteral(val); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.5); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + Cerr << value.Get<double>() << Endl; + //~ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 9.0, 0.001); + } + + Y_UNIT_TEST(SerializedPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); + auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); + + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } + + for (int n = 1; n < 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral((double)n); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmSerializedData; + { + TVector<TRuntimeNode> params = {pgmDigest}; + pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); + } + + TRuntimeNode pgmDigest2; + { + TVector<TRuntimeNode> params = {pgmSerializedData}; + pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest2, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); + } + + Y_UNIT_TEST(SerializedMergedPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); + auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); + auto udfTDigest_Merge = pgmBuilder.Udf("Stat.TDigest_Merge"); + + TVector<TRuntimeNode> pgmSerializedDataVector; + + for (int i = 0; i < 100; i += 10) { + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral(double(i) / 10); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } + + for (int n = i + 1; n < i + 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral(double(n) / 10); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmSerializedData; + { + TVector<TRuntimeNode> params = {pgmDigest}; + pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); + } + pgmSerializedDataVector.push_back(pgmSerializedData); + } + + TRuntimeNode pgmDigest; + for (size_t i = 0; i < pgmSerializedDataVector.size(); ++i) { + TRuntimeNode pgmDigest2; + { + TVector<TRuntimeNode> params = {pgmSerializedDataVector[i]}; + pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); + } + if (!pgmDigest) { + pgmDigest = pgmDigest2; + } else { + TVector<TRuntimeNode> params = {pgmDigest, pgmDigest2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Merge, params); + } + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.95, 0.001); + } + + static double GetParetoRandomNumber(double a) { + return 1 / pow(RandomNumber<double>(), double(1) / a); + } + + Y_UNIT_TEST(BigPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + const size_t NUMBERS = 100000; + const double PERCENTILE = 0.99; + const double THRESHOLD = 0.0004; // at q=0.99 threshold is 4*delta*0.0099 + TVector<double> randomNumbers1; + TVector<TRuntimeNode> randomNumbers2; + randomNumbers1.reserve(NUMBERS); + randomNumbers2.reserve(NUMBERS); + for (size_t n = 0; n < NUMBERS; ++n) { + double randomNumber = GetParetoRandomNumber(10); + randomNumbers1.push_back(randomNumber); + randomNumbers2.push_back(pgmBuilder.NewDataLiteral(randomNumber)); + } + TRuntimeNode bigList = pgmBuilder.AsList(randomNumbers2); + auto pgmDigest = + pgmBuilder.Fold1(bigList, + [&](TRuntimeNode item) { + std::array<TRuntimeNode, 1> args; + args[0] = item; + return pgmBuilder.Apply(udfTDigest_Create, args); + }, + [&](TRuntimeNode item, TRuntimeNode state) { + std::array<TRuntimeNode, 2> args; + args[0] = state; + args[1] = item; + return pgmBuilder.Apply(udfTDigest_AddValue, args); + }); + TRuntimeNode pgmReturn = + pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { + auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); + std::array<TRuntimeNode, 2> args; + args[0] = item; + args[1] = param2; + return pgmBuilder.Apply(udfTDigest_GetPercentile, args); + }); + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT(value); + double digestValue = value.Get<double>(); + std::sort(randomNumbers1.begin(), randomNumbers1.end()); + // This gives us a 1-based index of the last value <= digestValue + auto index = std::upper_bound(randomNumbers1.begin(), randomNumbers1.end(), digestValue) - randomNumbers1.begin(); + // See https://en.wikipedia.org/wiki/Percentile#First_Variant.2C + double p = (index - 0.5) / double(randomNumbers1.size()); + UNIT_ASSERT_DOUBLES_EQUAL(p, PERCENTILE, THRESHOLD); + } + + Y_UNIT_TEST(CentroidPrecision) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + const size_t NUMBERS = 100000; + const double PERCENTILE = 0.25; + const double minValue = 1.0; + const double maxValue = 100.0; + const double majorityValue = 50.0; + TVector<TRuntimeNode> numbers; + numbers.reserve(NUMBERS); + for (size_t n = 0; n < NUMBERS - 2; ++n) { + numbers.push_back(pgmBuilder.NewDataLiteral(majorityValue)); + } + numbers.push_back(pgmBuilder.NewDataLiteral(minValue)); + numbers.push_back(pgmBuilder.NewDataLiteral(maxValue)); + TRuntimeNode bigList = pgmBuilder.AsList(numbers); + auto pgmDigest = + pgmBuilder.Fold1(bigList, + [&](TRuntimeNode item) { + std::array<TRuntimeNode, 1> args; + args[0] = item; + return pgmBuilder.Apply(udfTDigest_Create, args); + }, + [&](TRuntimeNode item, TRuntimeNode state) { + std::array<TRuntimeNode, 2> args; + args[0] = state; + args[1] = item; + return pgmBuilder.Apply(udfTDigest_AddValue, args); + }); + TRuntimeNode pgmReturn = + pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { + auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); + std::array<TRuntimeNode, 2> args; + args[0] = item; + args[1] = param2; + return pgmBuilder.Apply(udfTDigest_GetPercentile, args); + }); + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT(value); + double digestValue = value.Get<double>(); + UNIT_ASSERT_EQUAL(digestValue, majorityValue); + } + } +} diff --git a/yql/essentials/udfs/common/stat/static/stat_udf.h b/yql/essentials/udfs/common/stat/static/stat_udf.h new file mode 100644 index 00000000000..f0c11a6812d --- /dev/null +++ b/yql/essentials/udfs/common/stat/static/stat_udf.h @@ -0,0 +1,75 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +#include <library/cpp/tdigest/tdigest.h> + +using namespace NYql; +using namespace NUdf; + +namespace { + extern const char DigestResourceName[] = "Stat.TDigestResource"; + + typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource; + typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr; + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) { + Y_UNUSED(valueBuilder); + const double delta = args[1].GetOrDefault<double>(0.01); + const double K = args[2].GetOrDefault<double>(25.0); + if (delta == 0 || K / delta < 1) { + UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").data()); + } + + return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>())); + } + + SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get()); + resource->Get()->AddValue(args[1].Get<double>()); + return TUnboxedValuePod(resource); + } + + SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>())); + } + + SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) { + TDigestResource::Validate(args[0]); + return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize()); + } + + SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef()))); + } + + SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + TDigestResource::Validate(args[1]); + return TUnboxedValuePod(new TDigestResource(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get())); + } + + /* + * + * TODO: Memory tracking + * + * + * + */ + + SIMPLE_MODULE(TStatModule, + TTDigest_Create, + TTDigest_AddValue, + TTDigest_GetPercentile, + TTDigest_Serialize, + TTDigest_Deserialize, + TTDigest_Merge) + +} diff --git a/yql/essentials/udfs/common/stat/static/static_udf.cpp b/yql/essentials/udfs/common/stat/static/static_udf.cpp new file mode 100644 index 00000000000..3cb1d88a1c8 --- /dev/null +++ b/yql/essentials/udfs/common/stat/static/static_udf.cpp @@ -0,0 +1,10 @@ +#include "stat_udf.h" + +namespace NYql { + namespace NUdf { + NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule() { + return new TStatModule(); + } + + } +} diff --git a/yql/essentials/udfs/common/stat/static/ya.make b/yql/essentials/udfs/common/stat/static/ya.make new file mode 100644 index 00000000000..f3cc7842eea --- /dev/null +++ b/yql/essentials/udfs/common/stat/static/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 28 + 0 +) + +SRCS( + static_udf.cpp + stat_udf.h +) + +PEERDIR( + yql/essentials/public/udf + library/cpp/tdigest +) + +END() diff --git a/yql/essentials/udfs/common/stat/ut/ya.make b/yql/essentials/udfs/common/stat/ut/ya.make new file mode 100644 index 00000000000..e7c32bb7ec4 --- /dev/null +++ b/yql/essentials/udfs/common/stat/ut/ya.make @@ -0,0 +1,19 @@ +UNITTEST_FOR(yql/essentials/udfs/common/stat/static) + +SRCS( + ../stat_udf_ut.cpp +) + +PEERDIR( + yql/essentials/minikql/comp_nodes/llvm14 + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql/pg_dummy +) + +YQL_LAST_ABI_VERSION() + +TIMEOUT(300) + +SIZE(MEDIUM) + +END() diff --git a/yql/essentials/udfs/common/stat/ya.make b/yql/essentials/udfs/common/stat/ya.make new file mode 100644 index 00000000000..cbc0f71c032 --- /dev/null +++ b/yql/essentials/udfs/common/stat/ya.make @@ -0,0 +1,30 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319904307 OUT_NOAUTO libstat_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(stat_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + stat_udf.cpp + ) + + PEERDIR( + yql/essentials/udfs/common/stat/static + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + ut +) + diff --git a/yql/essentials/udfs/common/streaming/streaming_udf.cpp b/yql/essentials/udfs/common/streaming/streaming_udf.cpp new file mode 100644 index 00000000000..bd01935321e --- /dev/null +++ b/yql/essentials/udfs/common/streaming/streaming_udf.cpp @@ -0,0 +1,829 @@ +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_registrator.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_terminator.h> + +#include <util/generic/buffer.h> +#include <util/generic/mem_copy.h> +#include <util/generic/maybe.h> +#include <util/generic/ptr.h> +#include <util/string/builder.h> +#include <util/stream/mem.h> +#include <library/cpp/deprecated/kmp/kmp.h> +#include <util/string/strip.h> +#include <util/system/condvar.h> +#include <util/system/shellcommand.h> +#include <util/system/tempfile.h> +#include <util/system/sysstat.h> + +#include <functional> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + // Cyclic Read-Write buffer. + // Not thread safe, synchronization between reader and writer threads + // should be managed externally. + class TCyclicRWBuffer { + public: + TCyclicRWBuffer(size_t capacity) + : Buffer(capacity) + , Finished(false) + , DataStart(0) + , DataSize(0) + { + Buffer.Resize(capacity); + } + + bool IsFinished() const { + return Finished; + } + + void Finish() { + Finished = true; + } + + bool HasData() const { + return DataSize > 0; + } + + size_t GetDataSize() const { + return DataSize; + } + + void GetData(const char*& ptr, size_t& len) const { + size_t readSize = GetDataRegionSize(DataStart, DataSize); + ptr = Buffer.Data() + DataStart; + len = readSize; + } + + void CommitRead(size_t len) { + Y_DEBUG_ABORT_UNLESS(len <= GetDataRegionSize(DataStart, DataSize)); + + DataStart = GetBufferPosition(DataStart + len); + DataSize -= len; + } + + bool CanWrite() const { + return WriteSize() > 0; + } + + size_t WriteSize() const { + return Buffer.Size() - DataSize; + } + + size_t Write(const char*& ptr, size_t& len) { + if (!CanWrite()) { + return 0; + } + + size_t bytesWritten = 0; + size_t bytesToWrite = std::min(len, WriteSize()); + while (bytesToWrite > 0) { + size_t writeStart = GetWriteStart(); + size_t writeSize = GetDataRegionSize(writeStart, bytesToWrite); + + MemCopy(Data(writeStart), ptr, writeSize); + + DataSize += writeSize; + bytesWritten += writeSize; + bytesToWrite -= writeSize; + + ptr += writeSize; + len -= writeSize; + } + + return bytesWritten; + } + + size_t Write(IZeroCopyInput& input) { + const void* ptr; + size_t dataLen = input.Next(&ptr, WriteSize()); + const char* dataPtr = reinterpret_cast<const char*>(ptr); + return Write(dataPtr, dataLen); + } + + private: + size_t GetBufferPosition(size_t pos) const { + return pos % Buffer.Size(); + } + + size_t GetDataRegionSize(size_t start, size_t size) const { + Y_DEBUG_ABORT_UNLESS(start < Buffer.Size()); + + return std::min(size, Buffer.Size() - start); + } + + size_t GetWriteStart() const { + return GetBufferPosition(DataStart + DataSize); + } + + char* Data(size_t pos) { + Y_DEBUG_ABORT_UNLESS(pos < Buffer.Size()); + + return (Buffer.Data() + pos); + } + + private: + TBuffer Buffer; + + bool Finished; + + size_t DataStart; + size_t DataSize; + }; + + struct TStreamingParams { + public: + const size_t DefaultProcessPollLatencyMs = 5 * 1000; // 5 seconds + const size_t DefaultInputBufferSizeBytes = 4 * 1024 * 1024; // 4MB + const size_t DefaultOutputBufferSizeBytes = 16 * 1024 * 1024; // 16MB + const char* DefaultInputDelimiter = "\n"; + const char* DefaultOutputDelimiter = "\n"; + + public: + TUnboxedValue InputStreamObj; + TString CommandLine; + TUnboxedValue ArgumentsList; + TString InputDelimiter; + TString OutputDelimiter; + size_t InputBufferSizeBytes; + size_t OutputBufferSizeBytes; + size_t ProcessPollLatencyMs; + + TStreamingParams() + : InputDelimiter(DefaultInputDelimiter) + , OutputDelimiter(DefaultOutputDelimiter) + , InputBufferSizeBytes(DefaultInputBufferSizeBytes) + , OutputBufferSizeBytes(DefaultOutputBufferSizeBytes) + , ProcessPollLatencyMs(DefaultProcessPollLatencyMs) + { + } + }; + + struct TThreadSyncData { + TMutex BuffersMutex; + TCondVar InputBufferCanReadCond; + TCondVar MainThreadHasWorkCond; + TCondVar OutputBufferCanWriteCond; + }; + + class TStringListBufferedInputStream: public IInputStream { + public: + TStringListBufferedInputStream(TUnboxedValue rowsStream, const TString& delimiter, size_t bufferSizeBytes, + TThreadSyncData& syncData, TSourcePosition pos) + : RowsStream(rowsStream) + , Delimiter(delimiter) + , SyncData(syncData) + , Pos_(pos) + , DelimiterMatcher(delimiter) + , DelimiterInput(delimiter) + , Buffer(bufferSizeBytes) + , CurReadMode(ReadMode::Start) + { + } + + TStringListBufferedInputStream(const TStringListBufferedInputStream&) = delete; + TStringListBufferedInputStream& operator=(const TStringListBufferedInputStream&) = delete; + + TCyclicRWBuffer& GetBuffer() { + return Buffer; + } + + // Fetch input from upstream list iterator to the buffer. + // Called from Main thread. + EFetchStatus FetchInput() { + with_lock (SyncData.BuffersMutex) { + Y_DEBUG_ABORT_UNLESS(!Buffer.HasData()); + Y_DEBUG_ABORT_UNLESS(Buffer.CanWrite()); + + bool receivedYield = false; + + while (Buffer.CanWrite() && CurReadMode != ReadMode::Done && !receivedYield) { + switch (CurReadMode) { + case ReadMode::Start: { + auto status = ReadNextString(); + if (status == EFetchStatus::Yield) { + receivedYield = true; + break; + } + + CurReadMode = (status == EFetchStatus::Ok) + ? ReadMode::String + : ReadMode::Done; + + break; + } + + case ReadMode::String: + if (CurStringInput.Exhausted()) { + DelimiterInput.Reset(Delimiter.data(), Delimiter.size()); + CurReadMode = ReadMode::Delimiter; + break; + } + + Buffer.Write(CurStringInput); + break; + + case ReadMode::Delimiter: + if (DelimiterInput.Exhausted()) { + CurReadMode = ReadMode::Start; + break; + } + + Buffer.Write(DelimiterInput); + break; + + default: + break; + } + } + + if (CurReadMode == ReadMode::Done) { + Buffer.Finish(); + } + + SyncData.InputBufferCanReadCond.Signal(); + return receivedYield ? EFetchStatus::Yield : EFetchStatus::Ok; + } + } + + private: + // Read data to pass into the child process input pipe. + // Called from Communicate thread. + size_t DoRead(void* buf, size_t len) override { + try { + with_lock (SyncData.BuffersMutex) { + while (!Buffer.HasData() && !Buffer.IsFinished()) { + SyncData.MainThreadHasWorkCond.Signal(); + SyncData.InputBufferCanReadCond.WaitI(SyncData.BuffersMutex); + } + + if (!Buffer.HasData()) { + Y_DEBUG_ABORT_UNLESS(Buffer.IsFinished()); + return 0; + } + + const char* dataPtr; + size_t dataLen; + Buffer.GetData(dataPtr, dataLen); + + size_t bytesRead = std::min(dataLen, len); + Y_DEBUG_ABORT_UNLESS(bytesRead > 0); + memcpy(buf, dataPtr, bytesRead); + Buffer.CommitRead(bytesRead); + return bytesRead; + } + + ythrow yexception(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + EFetchStatus ReadNextString() { + TUnboxedValue item; + EFetchStatus status = RowsStream.Fetch(item); + switch (status) { + case EFetchStatus::Yield: + case EFetchStatus::Finish: + return status; + default: + break; + } + + CurString = item.GetElement(0); + CurStringInput.Reset(CurString.AsStringRef().Data(), CurString.AsStringRef().Size()); + + // Check that input string doesn't contain delimiters + const char* match; + Y_UNUSED(match); + if (DelimiterMatcher.SubStr( + CurString.AsStringRef().Data(), + CurString.AsStringRef().Data() + CurString.AsStringRef().Size(), + match)) + { + ythrow yexception() << "Delimiter found in input string."; + } + + return EFetchStatus::Ok; + } + + private: + enum class ReadMode { + Start, + String, + Delimiter, + Done + }; + + TUnboxedValue RowsStream; + TString Delimiter; + TThreadSyncData& SyncData; + TSourcePosition Pos_; + + TKMPMatcher DelimiterMatcher; + TUnboxedValue CurString; + TMemoryInput CurStringInput; + TMemoryInput DelimiterInput; + + TCyclicRWBuffer Buffer; + + ReadMode CurReadMode; + }; + + class TStringListBufferedOutputStream: public IOutputStream { + public: + TStringListBufferedOutputStream(const TString& delimiter, size_t stringBufferSizeBytes, + TStringListBufferedInputStream& inputStream, TThreadSyncData& syncData) + : Delimiter(delimiter) + , InputStream(inputStream) + , SyncData(syncData) + , HasDelimiterMatch(false) + , DelimiterMatcherCallback(HasDelimiterMatch) + , DelimiterMatcher(delimiter.data(), delimiter.data() + delimiter.size(), &DelimiterMatcherCallback) + , Buffer(stringBufferSizeBytes) + { + } + + TStringListBufferedOutputStream(const TStringListBufferedOutputStream&) = delete; + TStringListBufferedOutputStream& operator=(const TStringListBufferedOutputStream&) = delete; + + // Get string record from buffer. + // Called from Main thread. + EFetchStatus FetchNextString(TString& str) { + while (!HasDelimiterMatch) { + with_lock (SyncData.BuffersMutex) { + bool inputHasData; + bool bufferNeedsData; + + do { + inputHasData = InputStream.GetBuffer().HasData() || InputStream.GetBuffer().IsFinished(); + bufferNeedsData = !Buffer.HasData() && !Buffer.IsFinished(); + + if (inputHasData && bufferNeedsData) { + SyncData.MainThreadHasWorkCond.WaitI(SyncData.BuffersMutex); + } + } while (inputHasData && bufferNeedsData); + + if (!inputHasData) { + auto status = InputStream.FetchInput(); + if (status == EFetchStatus::Yield) { + return EFetchStatus::Yield; + } + } + + if (bufferNeedsData) { + continue; + } + + if (!Buffer.HasData()) { + Y_DEBUG_ABORT_UNLESS(Buffer.IsFinished()); + str = TString(TStringBuf(CurrentString.Data(), CurrentString.Size())); + CurrentString.Clear(); + return str.empty() ? EFetchStatus::Finish : EFetchStatus::Ok; + } + + const char* data; + size_t size; + Buffer.GetData(data, size); + + size_t read = 0; + while (!HasDelimiterMatch && read < size) { + DelimiterMatcher.Push(data[read]); + ++read; + } + + Y_DEBUG_ABORT_UNLESS(read > 0); + CurrentString.Append(data, read); + bool signalCanWrite = !Buffer.CanWrite(); + Buffer.CommitRead(read); + + if (signalCanWrite) { + SyncData.OutputBufferCanWriteCond.Signal(); + } + } + } + + Y_DEBUG_ABORT_UNLESS(CurrentString.Size() >= Delimiter.size()); + str = TString(TStringBuf(CurrentString.Data(), CurrentString.Size() - Delimiter.size())); + CurrentString.Clear(); + HasDelimiterMatch = false; + + return EFetchStatus::Ok; + } + + TCyclicRWBuffer& GetBuffer() { + return Buffer; + } + + private: + // Write data from child process output to buffer. + // Called from Communicate thread. + void DoWrite(const void* buf, size_t len) override { + const char* curStrPos = reinterpret_cast<const char*>(buf); + size_t curStrLen = len; + + while (curStrLen > 0) { + with_lock (SyncData.BuffersMutex) { + while (!Buffer.CanWrite() && !Buffer.IsFinished()) { + SyncData.OutputBufferCanWriteCond.WaitI(SyncData.BuffersMutex); + } + + if (Buffer.IsFinished()) { + return; + } + + bool signalCanRead = !Buffer.HasData(); + Buffer.Write(curStrPos, curStrLen); + + if (signalCanRead) { + SyncData.MainThreadHasWorkCond.Signal(); + } + } + } + } + + void DoFinish() override { + IOutputStream::DoFinish(); + + with_lock (SyncData.BuffersMutex) { + Buffer.Finish(); + SyncData.MainThreadHasWorkCond.Signal(); + } + } + + private: + class MatcherCallback: public TKMPStreamMatcher<char>::ICallback { + public: + MatcherCallback(bool& hasMatch) + : HasMatch(hasMatch) + { + } + + void OnMatch(const char* begin, const char* end) override { + Y_UNUSED(begin); + Y_UNUSED(end); + + HasMatch = true; + } + + private: + bool& HasMatch; + }; + + private: + TString Delimiter; + TStringListBufferedInputStream& InputStream; + TThreadSyncData& SyncData; + + bool HasDelimiterMatch; + MatcherCallback DelimiterMatcherCallback; + TKMPStreamMatcher<char> DelimiterMatcher; + + TBuffer CurrentString; + + TCyclicRWBuffer Buffer; + }; + + class TStreamingOutputListIterator { + public: + TStreamingOutputListIterator(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) + : StreamingParams(params) + , ValueBuilder(valueBuilder) + , Pos_(pos) + { + } + + TStreamingOutputListIterator(const TStreamingOutputListIterator&) = delete; + TStreamingOutputListIterator& operator=(const TStreamingOutputListIterator&) = delete; + + ~TStreamingOutputListIterator() { + if (ShellCommand) { + Y_DEBUG_ABORT_UNLESS(InputStream && OutputStream); + + try { + ShellCommand->Terminate(); + } catch (const std::exception& e) { + Cerr << CurrentExceptionMessage(); + } + + // Let Communicate thread finish. + with_lock (ThreadSyncData.BuffersMutex) { + InputStream->GetBuffer().Finish(); + OutputStream->GetBuffer().Finish(); + ThreadSyncData.InputBufferCanReadCond.Signal(); + ThreadSyncData.OutputBufferCanWriteCond.Signal(); + } + + ShellCommand->Wait(); + } + } + + EFetchStatus Fetch(TUnboxedValue& result) { + try { + EFetchStatus status = EFetchStatus::Ok; + + if (!ProcessStarted()) { + StartProcess(); + + // Don't try to fetch data if there was a problem starting the process, + // this causes infinite wait on Windows system due to incorrect ShellCommand behavior. + if (ShellCommand->GetStatus() != TShellCommand::SHELL_RUNNING && ShellCommand->GetStatus() != TShellCommand::SHELL_FINISHED) { + status = EFetchStatus::Finish; + } + } + + if (status == EFetchStatus::Ok) { + status = OutputStream->FetchNextString(CurrentRecord); + } + + if (status == EFetchStatus::Finish) { + switch (ShellCommand->GetStatus()) { + case TShellCommand::SHELL_FINISHED: + break; + case TShellCommand::SHELL_INTERNAL_ERROR: + ythrow yexception() << "Internal error running process: " << ShellCommand->GetInternalError(); + break; + case TShellCommand::SHELL_ERROR: + ythrow yexception() << "Error running user process: " << ShellCommand->GetError(); + break; + default: + ythrow yexception() << "Unexpected shell command status: " << (int)ShellCommand->GetStatus(); + } + return EFetchStatus::Finish; + } + + if (status == EFetchStatus::Ok) { + TUnboxedValue* items = nullptr; + result = ValueBuilder->NewArray(1, items); + *items = ValueBuilder->NewString(TStringRef(CurrentRecord.data(), CurrentRecord.size())); + } + + return status; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + private: + void StartProcess() { + InputStream.Reset(new TStringListBufferedInputStream( + StreamingParams.InputStreamObj, StreamingParams.InputDelimiter, + StreamingParams.InputBufferSizeBytes, ThreadSyncData, Pos_)); + + OutputStream.Reset(new TStringListBufferedOutputStream( + StreamingParams.OutputDelimiter, StreamingParams.OutputBufferSizeBytes, *InputStream, + ThreadSyncData)); + + TShellCommandOptions opt; + opt.SetAsync(true).SetUseShell(false).SetLatency(StreamingParams.ProcessPollLatencyMs).SetInputStream(InputStream.Get()).SetOutputStream(OutputStream.Get()).SetCloseStreams(true).SetCloseAllFdsOnExec(true); + + TList<TString> commandArguments; + auto argumetsIterator = StreamingParams.ArgumentsList.GetListIterator(); + for (TUnboxedValue item; argumetsIterator.Next(item);) { + commandArguments.emplace_back(TStringBuf(item.AsStringRef())); + } + + ShellCommand.Reset(new TShellCommand(StreamingParams.CommandLine, commandArguments, opt)); + ShellCommand->Run(); + } + + bool ProcessStarted() const { + return !!ShellCommand; + } + + private: + TStreamingParams StreamingParams; + const IValueBuilder* ValueBuilder; + TSourcePosition Pos_; + + TThreadSyncData ThreadSyncData; + + THolder<TShellCommand> ShellCommand; + THolder<TStringListBufferedInputStream> InputStream; + THolder<TStringListBufferedOutputStream> OutputStream; + + TString CurrentRecord; + }; + + class TStreamingOutput: public TBoxedValue { + public: + TStreamingOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) + : StreamingParams(params) + , ValueBuilder(valueBuilder) + , Pos_(pos) + { + } + + TStreamingOutput(const TStreamingOutput&) = delete; + TStreamingOutput& operator=(const TStreamingOutput&) = delete; + + private: + EFetchStatus Fetch(TUnboxedValue& result) override { + if (IsFinished) { + return EFetchStatus::Finish; + } + + if (!Iterator) { + Iterator.Reset(new TStreamingOutputListIterator(StreamingParams, ValueBuilder, Pos_)); + } + + auto ret = Iterator->Fetch(result); + + if (ret == EFetchStatus::Finish) { + IsFinished = true; + Iterator.Reset(); + } + + return ret; + } + + TStreamingParams StreamingParams; + const IValueBuilder* ValueBuilder; + TSourcePosition Pos_; + bool IsFinished = false; + THolder<TStreamingOutputListIterator> Iterator; + }; + + class TStreamingScriptOutput: public TStreamingOutput { + public: + TStreamingScriptOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, + TSourcePosition pos, const TString& script, const TString& scriptFilename) + : TStreamingOutput(params, valueBuilder, pos) + , ScriptFileHandle(scriptFilename) + { + auto scriptStripped = StripBeforeShebang(script); + ScriptFileHandle.Write(scriptStripped.data(), scriptStripped.size()); + ScriptFileHandle.Close(); + + if (Chmod(ScriptFileHandle.Name().c_str(), MODE0755) != 0) { + ythrow yexception() << "Chmod failed for script file:" << ScriptFileHandle.Name() + << " with error: " << LastSystemErrorText(); + } + } + + private: + static TString StripBeforeShebang(const TString& script) { + auto shebangIndex = script.find("#!"); + if (shebangIndex != TString::npos) { + auto scriptStripped = StripStringLeft(script); + + if (scriptStripped.size() == script.size() - shebangIndex) { + return scriptStripped; + } + } + + return script; + } + + TTempFileHandle ScriptFileHandle; + }; + + class TStreamingProcess: public TBoxedValue { + public: + TStreamingProcess(TSourcePosition pos) + : Pos_(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto inputListArg = args[0]; + auto commandLineArg = args[1].AsStringRef(); + auto argumentsArg = args[2]; + auto inputDelimiterArg = args[3]; + auto outputDelimiterArg = args[4]; + + Y_DEBUG_ABORT_UNLESS(inputListArg.IsBoxed()); + + TStreamingParams params; + params.InputStreamObj = TUnboxedValuePod(inputListArg); + params.CommandLine = TString(TStringBuf(commandLineArg)); + params.ArgumentsList = !argumentsArg + ? valueBuilder->NewEmptyList() + : TUnboxedValue(argumentsArg.GetOptionalValue()); + + if (inputDelimiterArg) { + params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); + } + if (outputDelimiterArg) { + params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); + } + + return TUnboxedValuePod(new TStreamingOutput(params, valueBuilder, Pos_)); + } + + public: + static TStringRef Name() { + static auto name = TStringRef::Of("Process"); + return name; + } + + private: + TSourcePosition Pos_; + }; + + class TStreamingProcessInline: public TBoxedValue { + public: + TStreamingProcessInline(TSourcePosition pos) + : Pos_(pos) + {} + + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto inputListArg = args[0]; + auto scriptArg = args[1].AsStringRef(); + auto argumentsArg = args[2]; + auto inputDelimiterArg = args[3]; + auto outputDelimiterArg = args[4]; + + TString script(scriptArg); + TString scriptFilename = MakeTempName("."); + + TStreamingParams params; + params.InputStreamObj = TUnboxedValuePod(inputListArg); + params.CommandLine = scriptFilename; + params.ArgumentsList = !argumentsArg + ? valueBuilder->NewEmptyList() + : TUnboxedValue(argumentsArg.GetOptionalValue()); + + if (inputDelimiterArg) { + params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); + } + if (outputDelimiterArg) { + params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); + } + + return TUnboxedValuePod(new TStreamingScriptOutput(params, valueBuilder, Pos_, script, scriptFilename)); + } + + public: + static TStringRef Name() { + static auto name = TStringRef::Of("ProcessInline"); + return name; + } + + private: + TSourcePosition Pos_; + }; + + class TStreamingModule: public IUdfModule { + public: + TStringRef Name() const { + return TStringRef::Of("Streaming"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TStreamingProcess::Name()); + sink.Add(TStreamingProcessInline::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + NUdf::TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const override { + try { + Y_UNUSED(userType); + Y_UNUSED(typeConfig); + + bool typesOnly = (flags & TFlags::TypesOnly); + + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + auto rowType = builder.Struct(1)->AddField("Data", TDataType<char*>::Id, nullptr).Build(); + auto rowsType = builder.Stream()->Item(rowType).Build(); + auto stringListType = builder.List()->Item(TDataType<char*>::Id).Build(); + auto optionalStringListType = builder.Optional()->Item(stringListType).Build(); + + if (TStreamingProcess::Name() == name) { + builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + + if (!typesOnly) { + builder.Implementation(new TStreamingProcess(builder.GetSourcePosition())); + } + } + + if (TStreamingProcessInline::Name() == name) { + builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + + if (!typesOnly) { + builder.Implementation(new TStreamingProcessInline(builder.GetSourcePosition())); + } + } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } + }; + +} + +REGISTER_MODULES(TStreamingModule) diff --git a/yql/essentials/udfs/common/streaming/test/canondata/result.json b/yql/essentials/udfs/common/streaming/test/canondata/result.json new file mode 100644 index 00000000000..311aa7bb59d --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/canondata/result.json @@ -0,0 +1,44 @@ +{ + "test.test[Big]": [ + { + "checksum": "6ea4faa20341a15cc8ff132ede9be694", + "size": 1909, + "uri": "https://{canondata_backend}/1130705/0ba4949d004901679d526dece7802426a6bb3667/resource.tar#test.test_Big_/results.txt" + } + ], + "test.test[Empty]": [ + { + "checksum": "7a81241874ebe3aaa437d8e6abe6af55", + "size": 574, + "uri": "https://{canondata_backend}/212715/eeb78aadf48e5da34543a0dd89f2554c391a4ad5/resource.tar#test.test_Empty_/results.txt" + } + ], + "test.test[File]": [ + { + "checksum": "a784abbfe20172c03bed177628a71c79", + "size": 10454, + "uri": "https://{canondata_backend}/212715/eeb78aadf48e5da34543a0dd89f2554c391a4ad5/resource.tar#test.test_File_/results.txt" + } + ], + "test.test[Simple]": [ + { + "checksum": "18a4d9c3e1efd491be7844e09066ebe4", + "size": 2331, + "uri": "https://{canondata_backend}/1130705/0ba4949d004901679d526dece7802426a6bb3667/resource.tar#test.test_Simple_/results.txt" + } + ], + "test.test[YieldSwitchEmpty]": [ + { + "checksum": "3f1707fb49aabaaadbb283a3aa34ea5b", + "size": 2447, + "uri": "https://{canondata_backend}/1130705/0ba4949d004901679d526dece7802426a6bb3667/resource.tar#test.test_YieldSwitchEmpty_/results.txt" + } + ], + "test.test[Yield]": [ + { + "checksum": "3a6152c18e813c6be8fe23308ba05fc7", + "size": 1085, + "uri": "https://{canondata_backend}/212715/eeb78aadf48e5da34543a0dd89f2554c391a4ad5/resource.tar#test.test_Yield_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/streaming/test/cases/Big.sql b/yql/essentials/udfs/common/streaming/test/cases/Big.sql new file mode 100644 index 00000000000..29a08c74ff7 --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/Big.sql @@ -0,0 +1,48 @@ +/* syntax version 1 */ +SELECT YQL::@@(block '( + (let times16 (lambda '(x) (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x (Concat x x))))))))))))))))) + (let s2_8 (Apply times16 (String '"1000000000000007"))) + (let s2_12 (Apply times16 s2_8)) + (let s2_16 (Apply times16 s2_12)) + (let s2_20 (Apply times16 s2_16)) + (let s2_24 (Apply times16 s2_20)) + + (let s2_12_1 (Concat s2_12 (String '"762"))) + (let vt (VariantType (TupleType (DataType 'Int32) (DataType 'String)))) + (let inputRows (AsList + (Variant s2_24 '1 vt) + (Variant s2_8 '1 vt) + (Variant s2_12 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_16 '1 vt) + (Variant s2_8 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_24 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_24 '1 vt) + (Variant s2_24 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_24 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_12_1 '1 vt) + (Variant s2_16 '1 vt) + )) + + (let udf (Udf '"Streaming.Process")) + + (let pr (lambda '(x) (block '( + (let res (AsStruct '('Data x))) + (return res) + )))) + + (let tr1 (lambda '(x) (block '( + (let y (OrderedMap x pr)) + (return (Apply udf y (String '"cat")))) + ))) + + (let hugeResult (Switch (Iterator inputRows (DependsOn (String 'A))) '1 '('1) tr1)) + (let md5Udf (Udf '"Digest.Md5Hex")) + (let shortResult (OrderedMap hugeResult (lambda '(x) (Apply md5Udf (Member x 'Data))))) + (return (Collect shortResult)) +))@@; diff --git a/yql/essentials/udfs/common/streaming/test/cases/Empty.in b/yql/essentials/udfs/common/streaming/test/cases/Empty.in new file mode 100644 index 00000000000..0f7f64882e5 --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/Empty.in @@ -0,0 +1,100 @@ +{"key"="1";"subkey"="1";"value"="Input line #13"}; +{"key"="1";"subkey"="1";"value"="Input line #35"}; +{"key"="1";"subkey"="1";"value"="Input line #76"}; +{"key"="1";"subkey"="1";"value"="Input line #70"}; +{"key"="1";"subkey"="1";"value"="Input line #9"}; +{"key"="1";"subkey"="1";"value"="Input line #63"}; +{"key"="1";"subkey"="1";"value"="Input line #53"}; +{"key"="1";"subkey"="1";"value"="Input line #89"}; +{"key"="1";"subkey"="1";"value"="Input line #31"}; +{"key"="1";"subkey"="1";"value"="Input line #4"}; +{"key"="1";"subkey"="1";"value"="Input line #65"}; +{"key"="1";"subkey"="1";"value"="Input line #64"}; +{"key"="1";"subkey"="1";"value"="Input line #37"}; +{"key"="1";"subkey"="1";"value"="Input line #79"}; +{"key"="1";"subkey"="1";"value"="Input line #51"}; +{"key"="1";"subkey"="1";"value"="Input line #59"}; +{"key"="1";"subkey"="1";"value"="Input line #67"}; +{"key"="1";"subkey"="1";"value"="Input line #98"}; +{"key"="1";"subkey"="1";"value"="Input line #94"}; +{"key"="1";"subkey"="1";"value"="Input line #55"}; +{"key"="1";"subkey"="1";"value"="Input line #80"}; +{"key"="1";"subkey"="1";"value"="Input line #96"}; +{"key"="1";"subkey"="1";"value"="Input line #27"}; +{"key"="1";"subkey"="1";"value"="Input line #29"}; +{"key"="1";"subkey"="1";"value"="Input line #84"}; +{"key"="1";"subkey"="1";"value"="Input line #77"}; +{"key"="1";"subkey"="1";"value"="Input line #19"}; +{"key"="1";"subkey"="1";"value"="Input line #22"}; +{"key"="1";"subkey"="1";"value"="Input line #21"}; +{"key"="1";"subkey"="1";"value"="Input line #49"}; +{"key"="1";"subkey"="1";"value"="Input line #93"}; +{"key"="1";"subkey"="1";"value"="Input line #61"}; +{"key"="1";"subkey"="1";"value"="Input line #71"}; +{"key"="1";"subkey"="1";"value"="Input line #15"}; +{"key"="1";"subkey"="1";"value"="Input line #92"}; +{"key"="1";"subkey"="1";"value"="Input line #50"}; +{"key"="1";"subkey"="1";"value"="Input line #14"}; +{"key"="1";"subkey"="1";"value"="Input line #99"}; +{"key"="1";"subkey"="1";"value"="Input line #57"}; +{"key"="1";"subkey"="1";"value"="Input line #10"}; +{"key"="1";"subkey"="1";"value"="Input line #73"}; +{"key"="1";"subkey"="1";"value"="Input line #54"}; +{"key"="1";"subkey"="1";"value"="Input line #43"}; +{"key"="1";"subkey"="1";"value"="Input line #17"}; +{"key"="1";"subkey"="1";"value"="Input line #34"}; +{"key"="1";"subkey"="1";"value"="Input line #36"}; +{"key"="1";"subkey"="1";"value"="Input line #45"}; +{"key"="1";"subkey"="1";"value"="Input line #30"}; +{"key"="1";"subkey"="1";"value"="Input line #72"}; +{"key"="1";"subkey"="1";"value"="Input line #90"}; +{"key"="1";"subkey"="1";"value"="Input line #47"}; +{"key"="1";"subkey"="1";"value"="Input line #86"}; +{"key"="1";"subkey"="1";"value"="Input line #56"}; +{"key"="1";"subkey"="1";"value"="Input line #38"}; +{"key"="1";"subkey"="1";"value"="Input line #52"}; +{"key"="1";"subkey"="1";"value"="Input line #42"}; +{"key"="1";"subkey"="1";"value"="Input line #1"}; +{"key"="1";"subkey"="1";"value"="Input line #82"}; +{"key"="1";"subkey"="1";"value"="Input line #48"}; +{"key"="1";"subkey"="1";"value"="Input line #75"}; +{"key"="1";"subkey"="1";"value"="Input line #40"}; +{"key"="1";"subkey"="1";"value"="Input line #85"}; +{"key"="1";"subkey"="1";"value"="Input line #58"}; +{"key"="1";"subkey"="1";"value"="Input line #33"}; +{"key"="1";"subkey"="1";"value"="Input line #12"}; +{"key"="1";"subkey"="1";"value"="Input line #46"}; +{"key"="1";"subkey"="1";"value"="Input line #8"}; +{"key"="1";"subkey"="1";"value"="Input line #44"}; +{"key"="1";"subkey"="1";"value"="Input line #18"}; +{"key"="1";"subkey"="1";"value"="Input line #25"}; +{"key"="1";"subkey"="1";"value"="Input line #11"}; +{"key"="1";"subkey"="1";"value"="Input line #2"}; +{"key"="1";"subkey"="1";"value"="Input line #5"}; +{"key"="1";"subkey"="1";"value"="Input line #3"}; +{"key"="1";"subkey"="1";"value"="Input line #23"}; +{"key"="1";"subkey"="1";"value"="Input line #20"}; +{"key"="1";"subkey"="1";"value"="Input line #83"}; +{"key"="1";"subkey"="1";"value"="Input line #6"}; +{"key"="1";"subkey"="1";"value"="Input line #78"}; +{"key"="1";"subkey"="1";"value"="Input line #95"}; +{"key"="1";"subkey"="1";"value"="Input line #0"}; +{"key"="1";"subkey"="1";"value"="Input line #16"}; +{"key"="1";"subkey"="1";"value"="Input line #88"}; +{"key"="1";"subkey"="1";"value"="Input line #28"}; +{"key"="1";"subkey"="1";"value"="Input line #81"}; +{"key"="1";"subkey"="1";"value"="Input line #60"}; +{"key"="1";"subkey"="1";"value"="Input line #41"}; +{"key"="1";"subkey"="1";"value"="Input line #24"}; +{"key"="1";"subkey"="1";"value"="Input line #87"}; +{"key"="1";"subkey"="1";"value"="Input line #26"}; +{"key"="1";"subkey"="1";"value"="Input line #97"}; +{"key"="1";"subkey"="1";"value"="Input line #91"}; +{"key"="1";"subkey"="1";"value"="Input line #66"}; +{"key"="1";"subkey"="1";"value"="Input line #69"}; +{"key"="1";"subkey"="1";"value"="Input line #74"}; +{"key"="1";"subkey"="1";"value"="Input line #7"}; +{"key"="1";"subkey"="1";"value"="Input line #68"}; +{"key"="1";"subkey"="1";"value"="Input line #39"}; +{"key"="1";"subkey"="1";"value"="Input line #32"}; +{"key"="1";"subkey"="1";"value"="Input line #62"}; diff --git a/yql/essentials/udfs/common/streaming/test/cases/Empty.sql b/yql/essentials/udfs/common/streaming/test/cases/Empty.sql new file mode 100644 index 00000000000..21ed9da180b --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/Empty.sql @@ -0,0 +1,3 @@ +/* syntax version 1 */ +$in = (SELECT value AS Data FROM Input); +PROCESS $in USING Streaming::Process(TableRows(), "tail", AsList("-n+101"));
\ No newline at end of file diff --git a/yql/essentials/udfs/common/streaming/test/cases/File.in b/yql/essentials/udfs/common/streaming/test/cases/File.in new file mode 100644 index 00000000000..045bc6bd14c --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/File.in @@ -0,0 +1,200 @@ +{"key"="180";"subkey"="7";"value"="Value #44"}; +{"key"="178";"subkey"="17";"value"="Value #7"}; +{"key"="6";"subkey"="4";"value"="Value #1"}; +{"key"="53";"subkey"="19";"value"="Value #41"}; +{"key"="112";"subkey"="15";"value"="Value #14"}; +{"key"="121";"subkey"="11";"value"="Value #58"}; +{"key"="69";"subkey"="5";"value"="Value #49"}; +{"key"="142";"subkey"="3";"value"="Value #5"}; +{"key"="73";"subkey"="8";"value"="Value #33"}; +{"key"="52";"subkey"="14";"value"="Value #45"}; +{"key"="18";"subkey"="6";"value"="Value #40"}; +{"key"="141";"subkey"="16";"value"="Value #35"}; +{"key"="63";"subkey"="18";"value"="Value #12"}; +{"key"="5";"subkey"="13";"value"="Value #28"}; +{"key"="128";"subkey"="2";"value"="Value #56"}; +{"key"="48";"subkey"="12";"value"="Value #13"}; +{"key"="93";"subkey"="9";"value"="Value #20"}; +{"key"="49";"subkey"="0";"value"="Value #30"}; +{"key"="95";"subkey"="1";"value"="Value #34"}; +{"key"="159";"subkey"="10";"value"="Value #52"}; +{"key"="55";"subkey"="7";"value"="Value #15"}; +{"key"="7";"subkey"="17";"value"="Value #24"}; +{"key"="35";"subkey"="4";"value"="Value #51"}; +{"key"="82";"subkey"="19";"value"="Value #0"}; +{"key"="170";"subkey"="15";"value"="Value #26"}; +{"key"="150";"subkey"="11";"value"="Value #27"}; +{"key"="26";"subkey"="5";"value"="Value #54"}; +{"key"="58";"subkey"="3";"value"="Value #37"}; +{"key"="16";"subkey"="8";"value"="Value #46"}; +{"key"="166";"subkey"="14";"value"="Value #4"}; +{"key"="86";"subkey"="6";"value"="Value #16"}; +{"key"="101";"subkey"="16";"value"="Value #32"}; +{"key"="160";"subkey"="18";"value"="Value #39"}; +{"key"="199";"subkey"="13";"value"="Value #25"}; +{"key"="138";"subkey"="2";"value"="Value #6"}; +{"key"="96";"subkey"="12";"value"="Value #57"}; +{"key"="33";"subkey"="9";"value"="Value #21"}; +{"key"="9";"subkey"="0";"value"="Value #42"}; +{"key"="21";"subkey"="1";"value"="Value #55"}; +{"key"="176";"subkey"="10";"value"="Value #23"}; +{"key"="0";"subkey"="7";"value"="Value #18"}; +{"key"="66";"subkey"="17";"value"="Value #3"}; +{"key"="198";"subkey"="4";"value"="Value #22"}; +{"key"="186";"subkey"="19";"value"="Value #17"}; +{"key"="83";"subkey"="15";"value"="Value #2"}; +{"key"="179";"subkey"="11";"value"="Value #19"}; +{"key"="64";"subkey"="5";"value"="Value #38"}; +{"key"="56";"subkey"="3";"value"="Value #50"}; +{"key"="155";"subkey"="8";"value"="Value #43"}; +{"key"="143";"subkey"="14";"value"="Value #9"}; +{"key"="188";"subkey"="6";"value"="Value #8"}; +{"key"="172";"subkey"="16";"value"="Value #53"}; +{"key"="103";"subkey"="18";"value"="Value #11"}; +{"key"="44";"subkey"="13";"value"="Value #36"}; +{"key"="173";"subkey"="2";"value"="Value #10"}; +{"key"="133";"subkey"="12";"value"="Value #48"}; +{"key"="168";"subkey"="9";"value"="Value #29"}; +{"key"="157";"subkey"="0";"value"="Value #31"}; +{"key"="152";"subkey"="1";"value"="Value #47"}; +{"key"="74";"subkey"="10";"value"="Value #59"}; +{"key"="154";"subkey"="7";"value"="Value #44"}; +{"key"="40";"subkey"="17";"value"="Value #7"}; +{"key"="89";"subkey"="4";"value"="Value #1"}; +{"key"="41";"subkey"="19";"value"="Value #41"}; +{"key"="24";"subkey"="15";"value"="Value #14"}; +{"key"="182";"subkey"="11";"value"="Value #58"}; +{"key"="80";"subkey"="5";"value"="Value #49"}; +{"key"="196";"subkey"="3";"value"="Value #5"}; +{"key"="43";"subkey"="8";"value"="Value #33"}; +{"key"="156";"subkey"="14";"value"="Value #45"}; +{"key"="34";"subkey"="6";"value"="Value #40"}; +{"key"="88";"subkey"="16";"value"="Value #35"}; +{"key"="22";"subkey"="18";"value"="Value #12"}; +{"key"="27";"subkey"="13";"value"="Value #28"}; +{"key"="84";"subkey"="2";"value"="Value #56"}; +{"key"="12";"subkey"="12";"value"="Value #13"}; +{"key"="98";"subkey"="9";"value"="Value #20"}; +{"key"="140";"subkey"="0";"value"="Value #30"}; +{"key"="31";"subkey"="1";"value"="Value #34"}; +{"key"="105";"subkey"="10";"value"="Value #52"}; +{"key"="149";"subkey"="7";"value"="Value #15"}; +{"key"="153";"subkey"="17";"value"="Value #24"}; +{"key"="177";"subkey"="4";"value"="Value #51"}; +{"key"="14";"subkey"="19";"value"="Value #0"}; +{"key"="190";"subkey"="15";"value"="Value #26"}; +{"key"="118";"subkey"="11";"value"="Value #27"}; +{"key"="174";"subkey"="5";"value"="Value #54"}; +{"key"="104";"subkey"="3";"value"="Value #37"}; +{"key"="47";"subkey"="8";"value"="Value #46"}; +{"key"="46";"subkey"="14";"value"="Value #4"}; +{"key"="124";"subkey"="6";"value"="Value #16"}; +{"key"="70";"subkey"="16";"value"="Value #32"}; +{"key"="110";"subkey"="18";"value"="Value #39"}; +{"key"="91";"subkey"="13";"value"="Value #25"}; +{"key"="192";"subkey"="2";"value"="Value #6"}; +{"key"="183";"subkey"="12";"value"="Value #57"}; +{"key"="100";"subkey"="9";"value"="Value #21"}; +{"key"="38";"subkey"="0";"value"="Value #42"}; +{"key"="71";"subkey"="1";"value"="Value #55"}; +{"key"="29";"subkey"="10";"value"="Value #23"}; +{"key"="51";"subkey"="7";"value"="Value #18"}; +{"key"="32";"subkey"="17";"value"="Value #3"}; +{"key"="130";"subkey"="4";"value"="Value #22"}; +{"key"="77";"subkey"="19";"value"="Value #17"}; +{"key"="4";"subkey"="15";"value"="Value #2"}; +{"key"="97";"subkey"="11";"value"="Value #19"}; +{"key"="67";"subkey"="5";"value"="Value #38"}; +{"key"="158";"subkey"="3";"value"="Value #50"}; +{"key"="25";"subkey"="8";"value"="Value #43"}; +{"key"="119";"subkey"="14";"value"="Value #9"}; +{"key"="2";"subkey"="6";"value"="Value #8"}; +{"key"="167";"subkey"="16";"value"="Value #53"}; +{"key"="193";"subkey"="18";"value"="Value #11"}; +{"key"="11";"subkey"="13";"value"="Value #36"}; +{"key"="129";"subkey"="2";"value"="Value #10"}; +{"key"="187";"subkey"="12";"value"="Value #48"}; +{"key"="20";"subkey"="9";"value"="Value #29"}; +{"key"="134";"subkey"="0";"value"="Value #31"}; +{"key"="115";"subkey"="1";"value"="Value #47"}; +{"key"="94";"subkey"="10";"value"="Value #59"}; +{"key"="30";"subkey"="7";"value"="Value #44"}; +{"key"="175";"subkey"="17";"value"="Value #7"}; +{"key"="62";"subkey"="4";"value"="Value #1"}; +{"key"="147";"subkey"="19";"value"="Value #41"}; +{"key"="87";"subkey"="15";"value"="Value #14"}; +{"key"="99";"subkey"="11";"value"="Value #58"}; +{"key"="114";"subkey"="5";"value"="Value #49"}; +{"key"="117";"subkey"="3";"value"="Value #5"}; +{"key"="10";"subkey"="8";"value"="Value #33"}; +{"key"="162";"subkey"="14";"value"="Value #45"}; +{"key"="171";"subkey"="6";"value"="Value #40"}; +{"key"="108";"subkey"="16";"value"="Value #35"}; +{"key"="60";"subkey"="18";"value"="Value #12"}; +{"key"="144";"subkey"="13";"value"="Value #28"}; +{"key"="113";"subkey"="2";"value"="Value #56"}; +{"key"="102";"subkey"="12";"value"="Value #13"}; +{"key"="194";"subkey"="9";"value"="Value #20"}; +{"key"="76";"subkey"="0";"value"="Value #30"}; +{"key"="189";"subkey"="1";"value"="Value #34"}; +{"key"="164";"subkey"="10";"value"="Value #52"}; +{"key"="23";"subkey"="7";"value"="Value #15"}; +{"key"="65";"subkey"="17";"value"="Value #24"}; +{"key"="54";"subkey"="4";"value"="Value #51"}; +{"key"="148";"subkey"="19";"value"="Value #0"}; +{"key"="123";"subkey"="15";"value"="Value #26"}; +{"key"="185";"subkey"="11";"value"="Value #27"}; +{"key"="28";"subkey"="5";"value"="Value #54"}; +{"key"="13";"subkey"="3";"value"="Value #37"}; +{"key"="136";"subkey"="8";"value"="Value #46"}; +{"key"="57";"subkey"="14";"value"="Value #4"}; +{"key"="184";"subkey"="6";"value"="Value #16"}; +{"key"="36";"subkey"="16";"value"="Value #32"}; +{"key"="132";"subkey"="18";"value"="Value #39"}; +{"key"="120";"subkey"="13";"value"="Value #25"}; +{"key"="50";"subkey"="2";"value"="Value #6"}; +{"key"="195";"subkey"="12";"value"="Value #57"}; +{"key"="135";"subkey"="9";"value"="Value #21"}; +{"key"="92";"subkey"="0";"value"="Value #42"}; +{"key"="151";"subkey"="1";"value"="Value #55"}; +{"key"="125";"subkey"="10";"value"="Value #23"}; +{"key"="146";"subkey"="7";"value"="Value #18"}; +{"key"="45";"subkey"="17";"value"="Value #3"}; +{"key"="90";"subkey"="4";"value"="Value #22"}; +{"key"="126";"subkey"="19";"value"="Value #17"}; +{"key"="145";"subkey"="15";"value"="Value #2"}; +{"key"="19";"subkey"="11";"value"="Value #19"}; +{"key"="127";"subkey"="5";"value"="Value #38"}; +{"key"="79";"subkey"="3";"value"="Value #50"}; +{"key"="131";"subkey"="8";"value"="Value #43"}; +{"key"="111";"subkey"="14";"value"="Value #9"}; +{"key"="75";"subkey"="6";"value"="Value #8"}; +{"key"="191";"subkey"="16";"value"="Value #53"}; +{"key"="3";"subkey"="18";"value"="Value #11"}; +{"key"="165";"subkey"="13";"value"="Value #36"}; +{"key"="85";"subkey"="2";"value"="Value #10"}; +{"key"="1";"subkey"="12";"value"="Value #48"}; +{"key"="161";"subkey"="9";"value"="Value #29"}; +{"key"="37";"subkey"="0";"value"="Value #31"}; +{"key"="107";"subkey"="1";"value"="Value #47"}; +{"key"="122";"subkey"="10";"value"="Value #59"}; +{"key"="139";"subkey"="7";"value"="Value #44"}; +{"key"="15";"subkey"="17";"value"="Value #7"}; +{"key"="106";"subkey"="4";"value"="Value #1"}; +{"key"="59";"subkey"="19";"value"="Value #41"}; +{"key"="61";"subkey"="15";"value"="Value #14"}; +{"key"="17";"subkey"="11";"value"="Value #58"}; +{"key"="68";"subkey"="5";"value"="Value #49"}; +{"key"="163";"subkey"="3";"value"="Value #5"}; +{"key"="197";"subkey"="8";"value"="Value #33"}; +{"key"="81";"subkey"="14";"value"="Value #45"}; +{"key"="169";"subkey"="6";"value"="Value #40"}; +{"key"="8";"subkey"="16";"value"="Value #35"}; +{"key"="109";"subkey"="18";"value"="Value #12"}; +{"key"="78";"subkey"="13";"value"="Value #28"}; +{"key"="181";"subkey"="2";"value"="Value #56"}; +{"key"="116";"subkey"="12";"value"="Value #13"}; +{"key"="137";"subkey"="9";"value"="Value #20"}; +{"key"="39";"subkey"="0";"value"="Value #30"}; +{"key"="72";"subkey"="1";"value"="Value #34"}; +{"key"="42";"subkey"="10";"value"="Value #52"}; diff --git a/yql/essentials/udfs/common/streaming/test/cases/File.sql b/yql/essentials/udfs/common/streaming/test/cases/File.sql new file mode 100644 index 00000000000..a4b0faed03b --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/File.sql @@ -0,0 +1,24 @@ +/* syntax version 1 */ +SELECT YQL::@@(block '( + (let x (Read! world (DataSource '"yt" '"plato") (Key '('table (String '"Input"))) (Void) '())) + + (let world (Left! x)) + (let table0 (Right! x)) + + (let data (FlatMap table0 (lambda '(row) (block '( + (let res (Struct)) + (let res (AddMember res '"Data" ("Apply" ("Udf" '"String.JoinFromList") ("AsList" (Member row '"key") (Member row '"subkey") (Member row '"value")) (String '",")))) + (let res (AsList res)) + (return res) + )))) + ) + + (let udf (Udf '"Streaming.Process")) + (let args1 (List (ListType (DataType 'String)) (String '"[13]"))) + (let res1 (LMap data (lambda '(stream) (Apply udf stream (String '"grep") args1)))) + + (let args2 (List (ListType (DataType 'String)) (String '"2"))) + (let res2 (LMap res1 (lambda '(stream) (Apply udf stream (String '"grep") args2)))) + + (return res2) +))@@; diff --git a/yql/essentials/udfs/common/streaming/test/cases/Simple.sql b/yql/essentials/udfs/common/streaming/test/cases/Simple.sql new file mode 100644 index 00000000000..0ea920cc2e1 --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/Simple.sql @@ -0,0 +1,116 @@ +/* syntax version 1 */ +SELECT YQL::@@(block '( + (let inputRows (AsList + (AsStruct '('Data (String '"Input line #13"))) + (AsStruct '('Data (String '"Input line #35"))) + (AsStruct '('Data (String '"Input line #76"))) + (AsStruct '('Data (String '"Input line #70"))) + (AsStruct '('Data (String '"Input line #9"))) + (AsStruct '('Data (String '"Input line #63"))) + (AsStruct '('Data (String '"Input line #53"))) + (AsStruct '('Data (String '"Input line #89"))) + (AsStruct '('Data (String '"Input line #31"))) + (AsStruct '('Data (String '"Input line #4"))) + (AsStruct '('Data (String '"Input line #65"))) + (AsStruct '('Data (String '"Input line #64"))) + (AsStruct '('Data (String '"Input line #37"))) + (AsStruct '('Data (String '"Input line #79"))) + (AsStruct '('Data (String '"Input line #51"))) + (AsStruct '('Data (String '"Input line #59"))) + (AsStruct '('Data (String '"Input line #67"))) + (AsStruct '('Data (String '"Input line #98"))) + (AsStruct '('Data (String '"Input line #94"))) + (AsStruct '('Data (String '"Input line #55"))) + (AsStruct '('Data (String '"Input line #80"))) + (AsStruct '('Data (String '"Input line #96"))) + (AsStruct '('Data (String '"Input line #27"))) + (AsStruct '('Data (String '"Input line #29"))) + (AsStruct '('Data (String '"Input line #84"))) + (AsStruct '('Data (String '"Input line #77"))) + (AsStruct '('Data (String '"Input line #19"))) + (AsStruct '('Data (String '"Input line #22"))) + (AsStruct '('Data (String '"Input line #21"))) + (AsStruct '('Data (String '"Input line #49"))) + (AsStruct '('Data (String '"Input line #93"))) + (AsStruct '('Data (String '"Input line #61"))) + (AsStruct '('Data (String '"Input line #71"))) + (AsStruct '('Data (String '"Input line #15"))) + (AsStruct '('Data (String '"Input line #92"))) + (AsStruct '('Data (String '"Input line #50"))) + (AsStruct '('Data (String '"Input line #14"))) + (AsStruct '('Data (String '"Input line #99"))) + (AsStruct '('Data (String '"Input line #57"))) + (AsStruct '('Data (String '"Input line #10"))) + (AsStruct '('Data (String '"Input line #73"))) + (AsStruct '('Data (String '"Input line #54"))) + (AsStruct '('Data (String '"Input line #43"))) + (AsStruct '('Data (String '"Input line #17"))) + (AsStruct '('Data (String '"Input line #34"))) + (AsStruct '('Data (String '"Input line #36"))) + (AsStruct '('Data (String '"Input line #45"))) + (AsStruct '('Data (String '"Input line #30"))) + (AsStruct '('Data (String '"Input line #72"))) + (AsStruct '('Data (String '"Input line #90"))) + (AsStruct '('Data (String '"Input line #47"))) + (AsStruct '('Data (String '"Input line #86"))) + (AsStruct '('Data (String '"Input line #56"))) + (AsStruct '('Data (String '"Input line #38"))) + (AsStruct '('Data (String '"Input line #52"))) + (AsStruct '('Data (String '"Input line #42"))) + (AsStruct '('Data (String '"Input line #1"))) + (AsStruct '('Data (String '"Input line #82"))) + (AsStruct '('Data (String '"Input line #48"))) + (AsStruct '('Data (String '"Input line #75"))) + (AsStruct '('Data (String '"Input line #40"))) + (AsStruct '('Data (String '"Input line #85"))) + (AsStruct '('Data (String '"Input line #58"))) + (AsStruct '('Data (String '"Input line #33"))) + (AsStruct '('Data (String '"Input line #12"))) + (AsStruct '('Data (String '"Input line #46"))) + (AsStruct '('Data (String '"Input line #8"))) + (AsStruct '('Data (String '"Input line #44"))) + (AsStruct '('Data (String '"Input line #18"))) + (AsStruct '('Data (String '"Input line #25"))) + (AsStruct '('Data (String '"Input line #11"))) + (AsStruct '('Data (String '"Input line #2"))) + (AsStruct '('Data (String '"Input line #5"))) + (AsStruct '('Data (String '"Input line #3"))) + (AsStruct '('Data (String '"Input line #23"))) + (AsStruct '('Data (String '"Input line #20"))) + (AsStruct '('Data (String '"Input line #83"))) + (AsStruct '('Data (String '"Input line #6"))) + (AsStruct '('Data (String '"Input line #78"))) + (AsStruct '('Data (String '"Input line #95"))) + (AsStruct '('Data (String '"Input line #0"))) + (AsStruct '('Data (String '"Input line #16"))) + (AsStruct '('Data (String '"Input line #88"))) + (AsStruct '('Data (String '"Input line #28"))) + (AsStruct '('Data (String '"Input line #81"))) + (AsStruct '('Data (String '"Input line #60"))) + (AsStruct '('Data (String '"Input line #41"))) + (AsStruct '('Data (String '"Input line #24"))) + (AsStruct '('Data (String '"Input line #87"))) + (AsStruct '('Data (String '"Input line #26"))) + (AsStruct '('Data (String '"Input line #97"))) + (AsStruct '('Data (String '"Input line #91"))) + (AsStruct '('Data (String '"Input line #66"))) + (AsStruct '('Data (String '"Input line #69"))) + (AsStruct '('Data (String '"Input line #74"))) + (AsStruct '('Data (String '"Input line #7"))) + (AsStruct '('Data (String '"Input line #68"))) + (AsStruct '('Data (String '"Input line #39"))) + (AsStruct '('Data (String '"Input line #32"))) + (AsStruct '('Data (String '"Input line #62"))) + )) + + (let udf (Udf '"Streaming.Process")) + (let args1 (AsList (String '"[123679]"))) + (let res1 (Apply udf (Iterator inputRows) (String '"grep") args1)) + + (let args2 (AsList (String '"4"))) + (let res2 (Apply udf res1 (String '"grep") args2)) + + (let res3 (Apply udf res2 (String '"head"))) + + (return (Collect res3)) +))@@; diff --git a/yql/essentials/udfs/common/streaming/test/cases/Yield.sql b/yql/essentials/udfs/common/streaming/test/cases/Yield.sql new file mode 100644 index 00000000000..bd8cce86a8f --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/Yield.sql @@ -0,0 +1,44 @@ +/* syntax version 1 */ +SELECT YQL::@@(block '( + (let vt (VariantType (TupleType (DataType 'String) (DataType 'String)))) + (let inputRows (AsList + (Variant (String 'abbbbd111) '1 vt) + (Variant (String 'btzzzzzzzzzz) '0 vt) + (Variant (String 'kaziiaakkakaka) '1 vt) + (Variant (String 'bufffffffff) '0 vt) + (Variant (String 'aaaaa11111qqqqd) '1 vt) + (Variant (String 'zoppppppppp) '0 vt) + (Variant (String 'arrrrrrrr) '0 vt) + (Variant (String 'zzzzzzzzzzzzzzz) '0 vt) + (Variant (String 'wwwwwwwwwwwwwww1) '0 vt) + (Variant (String 'baaaaaaaaaaaaaaa) '1 vt) + )) + + (let udf (Udf '"Streaming.Process")) + (let args1 (AsList (String '"[ab1]"))) + (let args2 (AsList (String '"[rpd]"))) + + (let pr (lambda '(x) (block '( + (let res (AsStruct '('Data x))) + (return res) + )))) + + (let tr1 (lambda '(x) (block '( + (let y (OrderedMap x pr)) + (return (Apply udf y (String '"grep") args1))) + ))) + + (let tr2 (lambda '(x) (block '( + (let y (OrderedMap x pr)) + (return (Apply udf y (String '"grep") args2))) + ))) + + (let id (lambda '(x) x)) + (let res1 (Switch (Iterator inputRows (DependsOn (String 'A))) '1 '('0) tr1 '('1) tr2)) + (let pr2 (lambda '(x) (Member x 'Data))) + (let pr3 (lambda '(x) (Visit x '0 pr2 '1 pr2))) + (let res2 (OrderedMap (Collect res1) pr3)) + (let res3 (Sort res2 (Bool 'true) id)) + + (return res3) +))@@; diff --git a/yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql b/yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql new file mode 100644 index 00000000000..015c82217ec --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/cases/YieldSwitchEmpty.sql @@ -0,0 +1,44 @@ +/* syntax version 1 */ +SELECT YQL::@@(block '( + (let vt (VariantType (TupleType (DataType 'String) (DataType 'String)))) + (let inputRows (AsList + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + (Variant (String 'aaaaaa) '0 vt) + (Variant (String 'bbbbbb) '1 vt) + )) + + (let udf (Udf '"Streaming.Process")) + (let args (AsList (String '"-c") (String '"grep missing || true"))) + + (let pr (lambda '(x) (block '( + (let res (AsStruct '('Data x))) + (return res) + )))) + + (let tr1 (lambda '(x) (block '( + (let y (Map x pr)) + (return (Apply udf y (String '"bash") args))) + ))) + + (let tr2 (lambda '(x) (block '( + (let y (Map x pr)) + (return (Apply udf y (String '"bash") args))) + ))) + + (let input2 (Switch (Iterator inputRows (DependsOn (String 'A))) '1 '('0) tr1 '('1) tr2)) + + (return (Collect input2)) +))@@; diff --git a/yql/essentials/udfs/common/streaming/test/ya.make b/yql/essentials/udfs/common/streaming/test/ya.make new file mode 100644 index 00000000000..08e2048adc0 --- /dev/null +++ b/yql/essentials/udfs/common/streaming/test/ya.make @@ -0,0 +1,16 @@ +IF (OS_LINUX) +YQL_UDF_TEST_CONTRIB() + DEPENDS( + yql/essentials/udfs/common/digest + yql/essentials/udfs/common/string + yql/essentials/udfs/common/streaming + ) + TIMEOUT(300) + SIZE(MEDIUM) + + IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 + ENDIF() + END() + +ENDIF() diff --git a/yql/essentials/udfs/common/streaming/ya.make b/yql/essentials/udfs/common/streaming/ya.make new file mode 100644 index 00000000000..9b080a7f86f --- /dev/null +++ b/yql/essentials/udfs/common/streaming/ya.make @@ -0,0 +1,21 @@ +YQL_UDF_CONTRIB(streaming_udf) + +YQL_ABI_VERSION( + 2 + 27 + 0 +) + +SRCS( + streaming_udf.cpp +) + +PEERDIR( + library/cpp/deprecated/kmp +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp new file mode 100644 index 00000000000..d621e92582d --- /dev/null +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -0,0 +1,926 @@ +#include <yql/essentials/public/udf/udf_allocator.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_value_builder.h> + +#include <library/cpp/charset/codepage.h> +#include <library/cpp/deprecated/split/split_iterator.h> +#include <library/cpp/html/pcdata/pcdata.h> +#include <library/cpp/string_utils/base32/base32.h> +#include <library/cpp/string_utils/base64/base64.h> +#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h> +#include <library/cpp/string_utils/quote/quote.h> + +#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h> + +#include <util/charset/wide.h> +#include <util/generic/vector.h> +#include <util/stream/format.h> +#include <util/string/ascii.h> +#include <util/string/escape.h> +#include <util/string/hex.h> +#include <util/string/join.h> +#include <util/string/reverse.h> +#include <util/string/split.h> +#include <util/string/strip.h> +#include <util/string/subst.h> +#include <util/string/util.h> +#include <util/string/vector.h> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + +#define STRING_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \ + const TString input(args[0].AsStringRef()); \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec<T##udfName##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + const TString input(arg1.AsStringRef()); \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ + + +// 'unsafe' udf is actually strict - it returns null on any exception +#define STRING_UNSAFE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const TString input(args[0].AsStringRef()); \ + try { \ + const auto& result = function(input); \ + return valueBuilder->NewString(result); \ + } catch (yexception&) { \ + return TUnboxedValue(); \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec<T##udfName##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + if (!arg1) { \ + return sink(TBlockItem()); \ + } \ + \ + const TString input(arg1.AsStringRef()); \ + try { \ + const auto& result = function(input); \ + sink(TBlockItem(result)); \ + } catch (yexception&) { \ + return sink(TBlockItem()); \ + } \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + +#define STROKA_UDF(udfName, function) \ + SIMPLE_STRICT_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0) \ + const TString input(args[0].AsStringRef()); \ + try { \ + TUtf16String wide = UTF8ToWide(input); \ + function(wide); \ + return valueBuilder->NewString(WideToUTF8(wide)); \ + } catch (yexception&) { \ + return TUnboxedValue(); \ + } \ + } + +#define STROKA_CASE_UDF(udfName, function) \ + SIMPLE_STRICT_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0) \ + const TString input(args[0].AsStringRef()); \ + try { \ + TUtf16String wide = UTF8ToWide(input); \ + function(wide.begin(), wide.size()); \ + return valueBuilder->NewString(WideToUTF8(wide)); \ + } catch (yexception&) { \ + return TUnboxedValue(); \ + } \ + } + +#define STROKA_ASCII_CASE_UDF(udfName, function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \ + TString input(args[0].AsStringRef()); \ + if (input.function()) { \ + return valueBuilder->NewString(input); \ + } else { \ + return args[0]; \ + } \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec<T##udfName##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TString input(arg1.AsStringRef()); \ + if (input.function()) { \ + sink(TBlockItem(input)); \ + } else { \ + sink(arg1); \ + } \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + + +#define STROKA_FIND_UDF(udfName, function) \ + SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TString haystack(args[0].AsStringRef()); \ + const TString needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(haystack.function(needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } + +#define STRING_TWO_ARGS_UDF(udfName, function) \ + SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TString haystack(args[0].AsStringRef()); \ + const TString needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(function(haystack, needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } + +#define IS_ASCII_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf input(args[0].AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + return TUnboxedValuePod(result); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec<T##function##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + if (arg1) { \ + const TStringBuf input(arg1.AsStringRef()); \ + bool result = true; \ + for (auto c : input) { \ + if (!function(c)) { \ + result = false; \ + break; \ + } \ + } \ + sink(TBlockItem(result)); \ + } else { \ + sink(TBlockItem(false)); \ + } \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) + + + +#define STRING_STREAM_PAD_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \ + char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) \ + { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + char paddingSymbol = ' '; \ + if (args[2]) { \ + if (args[2].AsStringRef().Size() != 1) { \ + ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ + } \ + paddingSymbol = TString(args[2].AsStringRef())[0]; \ + } \ + const ui64 padLen = args[1].Get<ui64>(); \ + if (padLen > padLim) { \ + ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \ + } \ + result << function(input, padLen, paddingSymbol); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TGenericKernelExec<T##function##KernelExec, 3> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ + TStringStream result; \ + const TStringBuf input(args.GetElement(0).AsStringRef()); \ + char paddingSymbol = ' '; \ + if (args.GetElement(2)) { \ + if (args.GetElement(2).AsStringRef().Size() != 1) { \ + ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ + } \ + paddingSymbol = TString(args.GetElement(2).AsStringRef())[0]; \ + } \ + const ui64 padLen = args.GetElement(1).Get<ui64>(); \ + if (padLen > padLim) { \ + ythrow yexception() << "Padding length (" << padLen \ + << ") exceeds maximum: " << padLim; \ + } \ + result << function(input, padLen, paddingSymbol); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) + +#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<argType>)) { \ + TStringStream result; \ + result << function(args[0].Get<argType>()); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec<T##function##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + result << function(arg1.Get<argType>()); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) + +#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<char*>)) { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + result << function(input); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TUnaryKernelExec<T##function##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + const TStringBuf input(arg1.AsStringRef()); \ + result << function(input); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) + + +#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ + BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<ui64>)) { \ + TStringStream result; \ + result << HumanReadableSize(args[0].Get<ui64>(), hrSize); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##udfName##KernelExec \ + : public TUnaryKernelExec<T##udfName##KernelExec> \ + { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ + TStringStream result; \ + result << HumanReadableSize(arg1.Get<ui64>(), hrSize); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ + END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) + +#define STRING_UDF_MAP(XX) \ + XX(Base32Encode, Base32Encode) \ + XX(Base64Encode, Base64Encode) \ + XX(Base64EncodeUrl, Base64EncodeUrl) \ + XX(EscapeC, EscapeC) \ + XX(UnescapeC, UnescapeC) \ + XX(HexEncode, HexEncode) \ + XX(EncodeHtml, EncodeHtmlPcdata) \ + XX(DecodeHtml, DecodeHtmlPcdata) \ + XX(CgiEscape, CGIEscapeRet) \ + XX(CgiUnescape, CGIUnescapeRet) \ + XX(Strip, Strip) \ + XX(Collapse, Collapse) + +#define STRING_UNSAFE_UDF_MAP(XX) \ + XX(Base32Decode, Base32Decode) \ + XX(Base32StrictDecode, Base32StrictDecode) \ + XX(Base64Decode, Base64Decode) \ + XX(Base64StrictDecode, Base64StrictDecode) \ + XX(HexDecode, HexDecode) + +// NOTE: The functions below are marked as deprecated, so block implementation +// is not required for them. Hence, STROKA_CASE_UDF provides only the scalar +// one at the moment. +#define STROKA_CASE_UDF_MAP(XX) \ + XX(ToLower, ToLower) \ + XX(ToUpper, ToUpper) \ + XX(ToTitle, ToTitle) + +#define STROKA_ASCII_CASE_UDF_MAP(XX) \ + XX(AsciiToLower, to_lower) \ + XX(AsciiToUpper, to_upper) \ + XX(AsciiToTitle, to_title) + +// NOTE: The functions below are marked as deprecated, so block implementation +// is not required for them. Hence, STROKA_FIND_UDF provides only the scalar +// one at the moment. +#define STROKA_FIND_UDF_MAP(XX) \ + XX(StartsWith, StartsWith) \ + XX(EndsWith, EndsWith) \ + XX(HasPrefix, StartsWith) \ + XX(HasSuffix, EndsWith) + +// NOTE: The functions below are marked as deprecated, so block implementation +// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the +// scalar one at the moment. +#define STRING_TWO_ARGS_UDF_MAP(XX) \ + XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \ + XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \ + XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \ + XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase) + +// NOTE: The functions below are marked as deprecated, so block implementation +// is not required for them. Hence, STROKA_UDF provides only the scalar one at +// the moment. +#define STROKA_UDF_MAP(XX) \ + XX(Reverse, ReverseInPlace) + +#define IS_ASCII_UDF_MAP(XX) \ + XX(IsAscii) \ + XX(IsAsciiSpace) \ + XX(IsAsciiUpper) \ + XX(IsAsciiLower) \ + XX(IsAsciiDigit) \ + XX(IsAsciiAlpha) \ + XX(IsAsciiAlnum) \ + XX(IsAsciiHex) + +#define STRING_STREAM_PAD_FORMATTER_UDF_MAP(XX) \ + XX(LeftPad) \ + XX(RightPad) + +#define STRING_STREAM_NUM_FORMATTER_UDF_MAP(XX) \ + XX(Hex, ui64) \ + XX(SHex, i64) \ + XX(Bin, ui64) \ + XX(SBin, i64) + +#define STRING_STREAM_TEXT_FORMATTER_UDF_MAP(XX) \ + XX(HexText) \ + XX(BinText) + +#define STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(XX) \ + XX(HumanReadableQuantity, SF_QUANTITY) \ + XX(HumanReadableBytes, SF_BYTES) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) { + TString input(args[0].AsStringRef()); + ui64 maxLength = args[1].Get<ui64>(); + CollapseText(input, maxLength); + return valueBuilder->NewString(input); + } + + struct TCollapseTextKernelExec + : public TBinaryKernelExec<TCollapseTextKernelExec> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + TString input(arg1.AsStringRef()); + ui64 maxLength = arg2.Get<ui64>(); + CollapseText(input, maxLength); + return sink(TBlockItem(input)); + } + }; + + END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do); + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) { + Y_UNUSED(valueBuilder); + if (!args[0]) + return TUnboxedValuePod(false); + + const TString haystack(args[0].AsStringRef()); + const TString needle(args[1].AsStringRef()); + return TUnboxedValuePod(haystack.Contains(needle)); + } + + struct TContainsKernelExec : public TBinaryKernelExec<TContainsKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) + return sink(TBlockItem(false)); + + const TString haystack(arg1.AsStringRef()); + const TString needle(arg2.AsStringRef()); + sink(TBlockItem(haystack.Contains(needle))); + } + }; + + END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do); + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) { + if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) + return valueBuilder->NewString(result); + else + return args[0]; + } + + struct TReplaceAllKernelExec + : public TGenericKernelExec<TReplaceAllKernelExec, 3> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + TString result(args.GetElement(0).AsStringRef()); + const TStringBuf what(args.GetElement(1).AsStringRef()); + const TStringBuf with(args.GetElement(2).AsStringRef()); + if (SubstGlobal(result, what, with)) { + return sink(TBlockItem(result)); + } else { + return sink(args.GetElement(0)); + } + } + }; + + END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; + } + + struct TReplaceFirstKernelExec + : public TGenericKernelExec<TReplaceFirstKernelExec, 3> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + std::string result(args.GetElement(0).AsStringRef()); + const std::string_view what(args.GetElement(1).AsStringRef()); + const std::string_view with(args.GetElement(2).AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), with); + return sink(TBlockItem(result)); + } + return sink(args.GetElement(0)); + } + }; + + END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; + } + + struct TReplaceLastKernelExec + : public TGenericKernelExec<TReplaceLastKernelExec, 3> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + std::string result(args.GetElement(0).AsStringRef()); + const std::string_view what(args.GetElement(1).AsStringRef()); + const std::string_view with(args.GetElement(2).AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), with); + return sink(TBlockItem(result)); + } + return sink(args.GetElement(0)); + } + }; + + END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + size_t tpos = 0; + for (const ui8 c : input) { + if (!chars[c]) { + input[tpos++] = c; + } + } + if (tpos != input.size()) { + input.resize(tpos); + return valueBuilder->NewString(input); + } + return args[0]; + } + + struct TRemoveAllKernelExec + : public TBinaryKernelExec<TRemoveAllKernelExec> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + size_t tpos = 0; + for (const ui8 c : input) { + if (!chars[c]) { + input[tpos++] = c; + } + } + if (tpos != input.size()) { + input.resize(tpos); + return sink(TBlockItem(input)); + } + sink(arg1); + } + }; + + END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(it); + return valueBuilder->NewString(input); + } + } + return args[0]; + } + + struct TRemoveFirstKernelExec + : public TBinaryKernelExec<TRemoveFirstKernelExec> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(it); + return sink(TBlockItem(input)); + } + } + sink(arg1); + } + }; + + END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(input.crend() - it - 1, 1); + return valueBuilder->NewString(input); + } + } + return args[0]; + } + + struct TRemoveLastKernelExec + : public TBinaryKernelExec<TRemoveLastKernelExec> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(input.crend() - it - 1, 1); + return sink(TBlockItem(input)); + } + } + sink(arg1); + } + }; + + END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do) + + + // NOTE: String::Find is marked as deprecated, so block implementation is + // not required for them. Hence, only the scalar one is provided. + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const TString haystack(args[0].AsStringRef()); + const TString needle(args[1].AsStringRef()); + const ui64 pos = args[2].GetOrDefault<ui64>(0); + return TUnboxedValuePod(haystack.find(needle, pos)); + } + + // NOTE: String::ReverseFind is marked as deprecated, so block + // implementation is not required for them. Hence, only the scalar one is + // provided. + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const TString haystack(args[0].AsStringRef()); + const TString needle(args[1].AsStringRef()); + const ui64 pos = args[2].GetOrDefault<ui64>(TString::npos); + return TUnboxedValuePod(haystack.rfind(needle, pos)); + } + + // NOTE: String::Substring is marked as deprecated, so block implementation + // is not required for them. Hence, only the scalar one is provided. + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) { + const TString input(args[0].AsStringRef()); + const ui64 from = args[1].GetOrDefault<ui64>(0); + const ui64 count = args[2].GetOrDefault<ui64>(TString::npos); + return valueBuilder->NewString(input.substr(from, count)); + } + + using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; + + template <typename TIt> + static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + const TIt& it, + TTmpVector& result) { + for (const auto& elem : it) { + result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); + } + } + template <typename TIt> + static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + TIt& it, + bool skipEmpty, + TTmpVector& result) { + if (skipEmpty) { + SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); + } else { + SplitToListImpl(valueBuilder, input, from, it, result); + } + } + + constexpr char delimeterStringName[] = "DelimeterString"; + constexpr char skipEmptyName[] = "SkipEmpty"; + constexpr char limitName[] = "Limit"; + using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; + using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; + using TLimitArg = TNamedArg<ui64, limitName>; + + + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>( + TOptional<char*>, + char*, + TDelimeterStringArg, + TSkipEmptyArg, + TLimitArg + ), + 3) { + TTmpVector result; + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view delimeter(args[1].AsStringRef()); + const bool delimiterString = args[2].GetOrDefault<bool>(true); + const bool skipEmpty = args[3].GetOrDefault<bool>(false); + const auto limit = args[4].GetOrDefault<ui64>(0); + if (delimiterString) { + if (limit) { + auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitByString(delimeter); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } else { + if (limit) { + auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } + } + return valueBuilder->NewList(result.data(), result.size()); + } + + SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) { + auto input = args[0].GetListIterator(); + const TString delimeter(args[1].AsStringRef()); + TVector<TString> items; + + for (TUnboxedValue current; input.Next(current);) { + if (current) { + TString item(current.AsStringRef()); + items.push_back(std::move(item)); + } + } + + return valueBuilder->NewString(JoinSeq(delimeter, items)); + } + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const TStringBuf left(args[0].AsStringRef()); + const TStringBuf right(args[1].AsStringRef()); + const ui64 result = NLevenshtein::Distance(left, right); + return TUnboxedValuePod(result); + } + + struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + const std::string_view left(arg1.AsStringRef()); + const std::string_view right(arg2.AsStringRef()); + const ui64 result = NLevenshtein::Distance(left, right); + sink(TBlockItem(result)); + } + }; + + END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do); + + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) { + TStringStream result; + result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>())); + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); + } + + struct THumanReadableDurationKernelExec + : public TUnaryKernelExec<THumanReadableDurationKernelExec> + { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { + TStringStream result; + result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>())); + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); + } + }; + + END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do) + + + BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) { + TStringStream result; + result << Prec(args[0].Get<double>(), args[1].Get<ui64>()); + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); + } + + struct TPrecKernelExec : public TBinaryKernelExec<TPrecKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + TStringStream result; + result << Prec(arg1.Get<double>(), arg2.Get<ui64>()); + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); + } + }; + + END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do) + + + SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) { + const TStringBuf input(args[0].AsStringRef()); + TUnboxedValue* items = nullptr; + TUnboxedValue result = valueBuilder->NewArray(input.size(), items); + for (const unsigned char c : input) { + *items++ = TUnboxedValuePod(c); + } + return result; + } + + SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) { + auto input = args[0]; + + if (auto elems = input.GetElements()) { + const auto elemCount = input.GetListLength(); + TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength()); + auto bufferPtr = result.AsStringRef().Data(); + for (ui64 i = 0; i != elemCount; ++i) { + *(bufferPtr++) = elems[i].Get<ui8>(); + } + return result; + } + + std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer; + buffer.reserve(TUnboxedValuePod::InternalBufferSize); + + const auto& iter = input.GetListIterator(); + for (NUdf::TUnboxedValue item; iter.Next(item); ) { + buffer.push_back(item.Get<ui8>()); + } + + return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); + } + +#define STRING_REGISTER_UDF(udfName, ...) T##udfName, + + STRING_UDF_MAP(STRING_UDF) + STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF) + STROKA_UDF_MAP(STROKA_UDF) + STROKA_CASE_UDF_MAP(STROKA_CASE_UDF) + STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF) + STROKA_FIND_UDF_MAP(STROKA_FIND_UDF) + STRING_TWO_ARGS_UDF_MAP(STRING_TWO_ARGS_UDF) + IS_ASCII_UDF_MAP(IS_ASCII_UDF) + + static constexpr ui64 padLim = 1000000; + STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF) + STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF) + STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF) + STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF) + + SIMPLE_MODULE(TStringModule, + STRING_UDF_MAP(STRING_REGISTER_UDF) + STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_UDF_MAP(STRING_REGISTER_UDF) + STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) + STRING_TWO_ARGS_UDF_MAP(STRING_REGISTER_UDF) + IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + TCollapseText, + TReplaceAll, + TReplaceFirst, + TReplaceLast, + TRemoveAll, + TRemoveFirst, + TRemoveLast, + TContains, + TFind, + TReverseFind, + TSubstring, + TSplitToList, + TJoinFromList, + TLevensteinDistance, + THumanReadableDuration, + TPrec, + TToByteList, + TFromByteList) +} + +REGISTER_MODULES(TStringModule) diff --git a/yql/essentials/udfs/common/string/test/canondata/result.json b/yql/essentials/udfs/common/string/test/canondata/result.json new file mode 100644 index 00000000000..f9e3a670c2c --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/result.json @@ -0,0 +1,112 @@ +{ + "test.test[AsciiChecks]": [ + { + "uri": "file://test.test_AsciiChecks_/results.txt" + } + ], + "test.test[Base32Decode]": [ + { + "uri": "file://test.test_Base32Decode_/results.txt" + } + ], + "test.test[Base32Encode]": [ + { + "uri": "file://test.test_Base32Encode_/results.txt" + } + ], + "test.test[BlockAsciiChecks]": [ + { + "uri": "file://test.test_BlockAsciiChecks_/results.txt" + } + ], + "test.test[BlockFind]": [ + { + "uri": "file://test.test_BlockFind_/results.txt" + } + ], + "test.test[BlockRemove]": [ + { + "uri": "file://test.test_BlockRemove_/results.txt" + } + ], + "test.test[BlockReplace]": [ + { + "uri": "file://test.test_BlockReplace_/results.txt" + } + ], + "test.test[BlockStreamFormat]": [ + { + "uri": "file://test.test_BlockStreamFormat_/results.txt" + } + ], + "test.test[BlockStringUDF]": [ + { + "uri": "file://test.test_BlockStringUDF_/results.txt" + } + ], + "test.test[BlockStringUnsafeUDF]": [ + { + "uri": "file://test.test_BlockStringUnsafeUDF_/results.txt" + } + ], + "test.test[BlockTo]": [ + { + "uri": "file://test.test_BlockTo_/results.txt" + } + ], + "test.test[ExtendAndTake]": [ + { + "uri": "file://test.test_ExtendAndTake_/results.txt" + } + ], + "test.test[Find]": [ + { + "uri": "file://test.test_Find_/results.txt" + } + ], + "test.test[List]": [ + { + "uri": "file://test.test_List_/results.txt" + } + ], + "test.test[List_v0]": [ + { + "uri": "file://test.test_List_v0_/results.txt" + } + ], + "test.test[Remove]": [ + { + "uri": "file://test.test_Remove_/results.txt" + } + ], + "test.test[ReplaceFirstLast]": [ + { + "uri": "file://test.test_ReplaceFirstLast_/results.txt" + } + ], + "test.test[Replace]": [ + { + "uri": "file://test.test_Replace_/results.txt" + } + ], + "test.test[StreamFormat]": [ + { + "uri": "file://test.test_StreamFormat_/results.txt" + } + ], + "test.test[StringUDF]": [ + { + "uri": "file://test.test_StringUDF_/results.txt" + } + ], + "test.test[StringUnsafeUDF]": [ + { + "uri": "file://test.test_StringUnsafeUDF_/results.txt" + } + ], + "test.test[To]": [ + { + "uri": "file://test.test_To_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt new file mode 100644 index 00000000000..944b17d4c1e --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_AsciiChecks_/results.txt @@ -0,0 +1,124 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "isascii"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isspace"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isupper"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "islower"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isdigit"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isalpha"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isalnum"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "ishex"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + %true; + %false; + %false; + %true; + %false; + %true; + %true; + %false + ]; + [ + %true; + %false; + %false; + %false; + %true; + %false; + %true; + %true + ]; + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt new file mode 100644 index 00000000000..bf4aa56fa93 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Decode_/results.txt @@ -0,0 +1,79 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "strict_decoded"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "decoded"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "ORSXG5A="; + [ + "test" + ]; + [ + "test" + ] + ]; + [ + "KRSXG5CUMVZXI==="; + [ + "TestTest" + ]; + [ + "TestTest" + ] + ]; + [ + "MFYHA3DF"; + [ + "apple" + ]; + [ + "apple" + ] + ]; + [ + "hmmmm===hmmmm"; + #; + [ + "\0\0\0" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt new file mode 100644 index 00000000000..51c74759fc7 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Base32Encode_/results.txt @@ -0,0 +1,44 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "encoded"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "test"; + "ORSXG5A=" + ]; + [ + "TestTest"; + "KRSXG5CUMVZXI===" + ]; + [ + "apple"; + "MFYHA3DF" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt new file mode 100644 index 00000000000..944b17d4c1e --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiChecks_/results.txt @@ -0,0 +1,124 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "isascii"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isspace"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isupper"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "islower"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isdigit"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isalpha"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "isalnum"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "ishex"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + %true; + %false; + %false; + %true; + %false; + %true; + %true; + %false + ]; + [ + %true; + %false; + %false; + %false; + %true; + %false; + %true; + %true + ]; + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt new file mode 100644 index 00000000000..f6374e682e5 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt @@ -0,0 +1,69 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "contains"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "levenstein"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + %false; + "3" + ]; + [ + "aswedfg"; + %true; + "5" + ]; + [ + "asdadsaasd"; + %true; + "8" + ]; + [ + "gdsfsassas"; + %true; + "8" + ]; + [ + ""; + %false; + "2" + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + %false; + "23" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt new file mode 100644 index 00000000000..6fbf37a9f9b --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockRemove_/results.txt @@ -0,0 +1,173 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "all"; + [ + "DataType"; + "String" + ] + ]; + [ + "first"; + [ + "DataType"; + "String" + ] + ]; + [ + "last"; + [ + "DataType"; + "String" + ] + ]; + [ + "first2"; + [ + "DataType"; + "String" + ] + ]; + [ + "last2"; + [ + "DataType"; + "String" + ] + ]; + [ + "first3"; + [ + "DataType"; + "String" + ] + ]; + [ + "last3"; + [ + "DataType"; + "String" + ] + ]; + [ + "hwruall"; + [ + "DataType"; + "String" + ] + ]; + [ + "hwrufirst"; + [ + "DataType"; + "String" + ] + ]; + [ + "hwrulast"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + "fd"; + "fds"; + "fds"; + "fda"; + "fds"; + "fdsa"; + "fdsa"; + "fdsa"; + "fdsa"; + "fdsa" + ]; + [ + "aswedfg"; + "wedfg"; + "swedfg"; + "swedfg"; + "swedfg"; + "awedfg"; + "aswedfg"; + "aswedfg"; + "aswedfg"; + "aswedfg"; + "aswedfg" + ]; + [ + "asdadsaasd"; + "ddd"; + "sdadsaasd"; + "asdadsasd"; + "sdadsaasd"; + "asdadsaad"; + "asdadsaasd"; + "asdadsaasd"; + "asdadsaasd"; + "asdadsaasd"; + "asdadsaasd" + ]; + [ + "gdsfsassas"; + "gdf"; + "gdsfsssas"; + "gdsfsasss"; + "gdfsassas"; + "gdsfsassa"; + "gdsfsassas"; + "gdsfsassas"; + "gdsfsassas"; + "gdsfsassas"; + "gdsfsassas" + ]; + [ + ""; + ""; + ""; + ""; + ""; + ""; + ""; + ""; + ""; + ""; + "" + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"; + "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt new file mode 100644 index 00000000000..2ac3566c61d --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockReplace_/results.txt @@ -0,0 +1,134 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "all"; + [ + "DataType"; + "String" + ] + ]; + [ + "first"; + [ + "DataType"; + "String" + ] + ]; + [ + "last"; + [ + "DataType"; + "String" + ] + ]; + [ + "first2"; + [ + "DataType"; + "String" + ] + ]; + [ + "last2"; + [ + "DataType"; + "String" + ] + ]; + [ + "first3"; + [ + "DataType"; + "String" + ] + ]; + [ + "last3"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + "fdsa"; + "fdsz"; + "fdsz"; + "fdszz"; + "fdszz"; + "fds"; + "fds" + ]; + [ + "aswedfg"; + "zzzwedfg"; + "zswedfg"; + "zswedfg"; + "zzswedfg"; + "zzswedfg"; + "swedfg"; + "swedfg" + ]; + [ + "asdadsaasd"; + "zzzdadsazzzd"; + "zsdadsaasd"; + "asdadsazsd"; + "zzsdadsaasd"; + "asdadsazzsd"; + "sdadsaasd"; + "asdadsasd" + ]; + [ + "gdsfsassas"; + "gdsfszzzszzz"; + "gdsfszssas"; + "gdsfsasszs"; + "gdsfszzssas"; + "gdsfsasszzs"; + "gdsfsssas"; + "gdsfsasss" + ]; + [ + ""; + ""; + ""; + ""; + ""; + ""; + ""; + "" + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt new file mode 100644 index 00000000000..b1bff8a57b8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStreamFormat_/results.txt @@ -0,0 +1,208 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "right_pad"; + [ + "DataType"; + "String" + ] + ]; + [ + "left_pad"; + [ + "DataType"; + "String" + ] + ]; + [ + "right_pad_zero"; + [ + "DataType"; + "String" + ] + ]; + [ + "left_pad_zero"; + [ + "DataType"; + "String" + ] + ]; + [ + "hex"; + [ + "DataType"; + "String" + ] + ]; + [ + "shex"; + [ + "DataType"; + "String" + ] + ]; + [ + "bin"; + [ + "DataType"; + "String" + ] + ]; + [ + "sbin"; + [ + "DataType"; + "String" + ] + ]; + [ + "hex_text"; + [ + "DataType"; + "String" + ] + ]; + [ + "bin_text"; + [ + "DataType"; + "String" + ] + ]; + [ + "duration"; + [ + "DataType"; + "String" + ] + ]; + [ + "quantity"; + [ + "DataType"; + "String" + ] + ]; + [ + "bytes"; + [ + "DataType"; + "String" + ] + ]; + [ + "prec"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "qwertyui"; + "qwertyui "; + " qwertyui"; + "qwertyui000000000000"; + "000000000000qwertyui"; + "0x00000000499602D2"; + "-0x000000000000007B"; + "0b0000000000000000000000000000000001001001100101100000001011010010"; + "-0b0000000000000000000000000000000000000000000000000000000001111011"; + "71 77 65 72 74 79 75 69"; + "01110001 01110111 01100101 01110010 01110100 01111001 01110101 01101001"; + "20m 34s"; + "1.23G"; + "1.15GiB"; + "-0.009963" + ]; + [ + "asdfghjl"; + "asdfghjl "; + " asdfghjl"; + "asdfghjl000000000000"; + "000000000000asdfghjl"; + "0x000000024CB016EA"; + "-0x00000000000001C8"; + "0b0000000000000000000000000000001001001100101100000001011011101010"; + "-0b0000000000000000000000000000000000000000000000000000000111001000"; + "61 73 64 66 67 68 6A 6C"; + "01100001 01110011 01100100 01100110 01100111 01101000 01101010 01101100"; + "2h 44m 36s"; + "9.88G"; + "9.2GiB"; + "-0.03694" + ]; + [ + "zxcvbnm?"; + "zxcvbnm? "; + " zxcvbnm?"; + "zxcvbnm?000000000000"; + "000000000000zxcvbnm?"; + "0x00000002540BE3FF"; + "-0x0000000000000315"; + "0b0000000000000000000000000000001001010100000010111110001111111111"; + "-0b0000000000000000000000000000000000000000000000000000001100010101"; + "7A 78 63 76 62 6E 6D 3F"; + "01111010 01111000 01100011 01110110 01100010 01101110 01101101 00111111"; + "2h 46m 40s"; + "10G"; + "9.31GiB"; + "-0.06391" + ]; + [ + "12345678"; + "12345678 "; + " 12345678"; + "12345678000000000000"; + "00000000000012345678"; + "0x0000000000000000"; + "0x0000000000000000"; + "0b0000000000000000000000000000000000000000000000000000000000000000"; + "0b0000000000000000000000000000000000000000000000000000000000000000"; + "31 32 33 34 35 36 37 38"; + "00110001 00110010 00110011 00110100 00110101 00110110 00110111 00111000"; + "0us"; + "0"; + "0B"; + "0" + ]; + [ + "!@#$%^&*"; + "!@#$%^&* "; + " !@#$%^&*"; + "!@#$%^&*000000000000"; + "000000000000!@#$%^&*"; + "0x0000000223557439"; + "-0x00000000000003E7"; + "0b0000000000000000000000000000001000100011010101010111010000111001"; + "-0b0000000000000000000000000000000000000000000000000000001111100111"; + "21 40 23 24 25 5E 26 2A"; + "00100001 01000000 00100011 00100100 00100101 01011110 00100110 00101010"; + "2h 33m 2s"; + "9.18G"; + "8.55GiB"; + "-0.08092" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt new file mode 100644 index 00000000000..a665105224f --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUDF_/results.txt @@ -0,0 +1,169 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "b32enc"; + [ + "DataType"; + "String" + ] + ]; + [ + "b64enc"; + [ + "DataType"; + "String" + ] + ]; + [ + "b64encu"; + [ + "DataType"; + "String" + ] + ]; + [ + "cesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "cunesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "xenc"; + [ + "DataType"; + "String" + ] + ]; + [ + "henc"; + [ + "DataType"; + "String" + ] + ]; + [ + "hdec"; + [ + "DataType"; + "String" + ] + ]; + [ + "cgesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "cgunesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "clps"; + [ + "DataType"; + "String" + ] + ]; + [ + "strp"; + [ + "DataType"; + "String" + ] + ]; + [ + "clpst"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="; + "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="; + "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ,"; + " !qwe rty uiop [ ]$"; + " !qwe rty uiop [ ]$"; + "202020217177652072747920202075696F70205B205D24"; + " !qwe rty uiop [ ]$"; + " !qwe rty uiop [ ]$"; + "+++!qwe+rty+++uiop+%5B+%5D$"; + " !qwe rty uiop [ ]$"; + " !qwe rty uiop [ ]$"; + "!qwe rty uiop [ ]$"; + "!qwe ..." + ]; + [ + "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"; + "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA=="; + "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"; + "@as dfgh jkl\\\\n;'% "; + "@as dfgh jkl\n;'% "; + "4061732020202020202064666768206A6B6C5C6E3B27252020"; + "@as dfgh jkl\\n;'% "; + "@as dfgh jkl\\n;'% "; + "@as+++++++dfgh+jkl%5Cn;%27%25++"; + "@as dfgh jkl\\n;'% "; + "@as dfgh jkl\\n;'% "; + "@as dfgh jkl\\n;'%"; + "@as ..." + ]; + [ + "EAQCAI32PBRQS5TCNYQASCQIEBWSYLRPH5PCAIBA"; + "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8/XiAgIA=="; + "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8_XiAgIA,,"; + " #zxc\\tvbn \\t\\n\\x08 m,./?^ "; + " #zxc\tvbn \t\n\x08 m,./?^ "; + "202020237A78630976626E20090A08206D2C2E2F3F5E202020"; + " #zxc\tvbn \t\n\x08 m,./?^ "; + " #zxc\tvbn \t\n\x08 m,./?^ "; + "+++%23zxc%09vbn+%09%0A%08+m%2C./%3F%5E+++"; + " #zxc\tvbn \t\n\x08 m,./?^ "; + " #zxc vbn \x08 m,./?^ "; + "#zxc\tvbn \t\n\x08 m,./?^"; + "#zxc ..." + ]; + [ + "GEQTEQBTEM2CINJFGZPDOJRYFI4SQMBJFVPT2KZMHQXD4==="; + "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg=="; + "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg,,"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "31213240332334243525365E3726382A392830292D5F3D2B2C3C2E3E"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3%234$5%256%5E7%268*9%280%29-_%3D%2B%2C%3C.%3E"; + "1!2@3#4$5%6^7&8*9(0)-_= ,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@ ..." + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt new file mode 100644 index 00000000000..26b182f9343 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockStringUnsafeUDF_/results.txt @@ -0,0 +1,158 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "b32dec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "b32sdec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "b64dec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "b64sdec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "xdec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + " !qwe rty uiop [ ]$" + ]; + [ + " !qwe rty uiop [ ]$" + ]; + [ + [ + "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOS" + ] + ]; + #; + # + ]; + [ + [ + [ + "QIAEXLvMggAcAECCAFgAQUALyg==" + ] + ]; + #; + [ + " !qwe rty uiop [ ]$" + ]; + [ + " !qwe rty uiop [ ]$" + ]; + # + ]; + [ + [ + [ + "0DQNA0D4P/93QP6/z4NA0DQP98Dxfg0DodA6PQ==" + ] + ]; + #; + #; + #; + [ + " !qwe rty uiop [ ]$" + ] + ]; + [ + [ + "@as dfgh jkl\\n;'% " + ]; + [ + "@as dfgh jkl\\n;'% " + ]; + [ + [ + "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA" + ] + ]; + [ + [ + "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA" + ] + ]; + # + ]; + [ + [ + [ + "gYoECABAgAQaIM6AAAAAubn0goBAAA==" + ] + ]; + #; + [ + "@as dfgh jkl\\n;'% " + ]; + [ + "@as dfgh jkl\\n;'% " + ]; + # + ]; + [ + [ + [ + "4DwP70DQNA0DQNA0D3Pe9/wNA8DwfC6LxNh1/XdA0A==" + ] + ]; + #; + #; + #; + [ + "@as dfgh jkl\\n;'% " + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt new file mode 100644 index 00000000000..143cfb76417 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_BlockTo_/results.txt @@ -0,0 +1,88 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "ascii_lower"; + [ + "DataType"; + "String" + ] + ]; + [ + "ascii_upper"; + [ + "DataType"; + "String" + ] + ]; + [ + "ascii_title"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "test"; + "test"; + "TEST"; + "Test" + ]; + [ + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82" + ]; + [ + "TeSt"; + "test"; + "TEST"; + "Test" + ]; + [ + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2" + ]; + [ + "Eyl\xC3\xBCl"; + "eyl\xC3\xBCl"; + "EYL\xC3\xBCL"; + "Eyl\xC3\xBCl" + ]; + [ + "6"; + "6"; + "6"; + "6" + ]; + [ + ""; + ""; + ""; + "" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt new file mode 100644 index 00000000000..81269c68153 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ExtendAndTake_/results.txt @@ -0,0 +1,60 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "a"; + "b"; + "c" + ]; + [ + "b" + ] + ]; + [ + [ + "d" + ]; + [ + "d" + ] + ]; + [ + []; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt new file mode 100644 index 00000000000..cec53212501 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt @@ -0,0 +1,147 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "contains"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "prefix"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "starts"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "suffix"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "ends"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "find"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "rfind"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "levenstein"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + %false; + %false; + %false; + %false; + %false; + "-1"; + "-1"; + "3" + ]; + [ + "aswedfg"; + %true; + %true; + %true; + %false; + %false; + "0"; + "0"; + "5" + ]; + [ + "asdadsaasd"; + %true; + %true; + %true; + %false; + %false; + "0"; + "7"; + "8" + ]; + [ + "gdsfsassas"; + %true; + %false; + %false; + %true; + %true; + "5"; + "8"; + "8" + ]; + [ + ""; + %false; + %false; + %false; + %false; + %false; + "-1"; + "-1"; + "2" + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + %false; + %false; + %false; + %false; + %false; + "-1"; + "-1"; + "23" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt new file mode 100644 index 00000000000..dac9a135756 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_List_/results.txt @@ -0,0 +1,265 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "equals_to_original"; + [ + "DataType"; + "String" + ] + ]; + [ + "replace_delimeter"; + [ + "DataType"; + "String" + ] + ]; + [ + "just_split"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "first"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "skip_empty"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "multichar_delim_set"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "multichar_delim_string"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "limited"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "a@b@c"; + "a@b@c"; + "a#b#c"; + [ + "a"; + "b"; + "c" + ]; + [ + "a" + ]; + [ + "a"; + "b"; + "c" + ]; + [ + "a"; + ""; + ""; + "c" + ]; + [ + "a@"; + "c" + ]; + [ + "a"; + "b@c" + ] + ]; + [ + "@a@b@c"; + "@a@b@c"; + "#a#b#c"; + [ + ""; + "a"; + "b"; + "c" + ]; + [ + "" + ]; + [ + "a"; + "b"; + "c" + ]; + [ + ""; + "a"; + ""; + ""; + "c" + ]; + [ + "@a@"; + "c" + ]; + [ + ""; + "a@b@c" + ] + ]; + [ + "@@@a@a"; + "@@@a@a"; + "###a#a"; + [ + ""; + ""; + ""; + "a"; + "a" + ]; + [ + "" + ]; + [ + "a"; + "a" + ]; + [ + ""; + ""; + ""; + "a"; + "a" + ]; + [ + "@@@a@a" + ]; + [ + ""; + "@@a@a" + ] + ]; + [ + "d#e#f"; + "d#e#f"; + "d#e#f"; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ] + ]; + [ + "d"; + "d"; + "d"; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ] + ]; + [ + ""; + ""; + ""; + [ + "" + ]; + [ + "" + ]; + []; + [ + "" + ]; + [ + "" + ]; + [ + "" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt new file mode 100644 index 00000000000..b149ad38a60 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_List_v0_/results.txt @@ -0,0 +1,125 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "not_equals_to_original"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "not_equals_to_original_skip_empty"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "equals_to_original"; + [ + "DataType"; + "String" + ] + ]; + [ + "multichar"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "a@b@c"; + #; + #; + "a@b@c"; + [ + "a"; + "b"; + "c" + ] + ]; + [ + "@a@b@c"; + #; + #; + "@a@b@c"; + [ + "a"; + "b"; + "c" + ] + ]; + [ + "@@@a@a"; + [ + "@@@a@a" + ]; + [ + "@@@a@a" + ]; + "@@@a@a"; + [ + "a"; + "a" + ] + ]; + [ + "d#e#f"; + #; + #; + "d#e#f"; + [ + "d"; + "e"; + "f" + ] + ]; + [ + "d"; + #; + #; + "d"; + [ + "d" + ] + ]; + [ + ""; + #; + #; + ""; + [] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt new file mode 100644 index 00000000000..6fbf37a9f9b --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Remove_/results.txt @@ -0,0 +1,173 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "all"; + [ + "DataType"; + "String" + ] + ]; + [ + "first"; + [ + "DataType"; + "String" + ] + ]; + [ + "last"; + [ + "DataType"; + "String" + ] + ]; + [ + "first2"; + [ + "DataType"; + "String" + ] + ]; + [ + "last2"; + [ + "DataType"; + "String" + ] + ]; + [ + "first3"; + [ + "DataType"; + "String" + ] + ]; + [ + "last3"; + [ + "DataType"; + "String" + ] + ]; + [ + "hwruall"; + [ + "DataType"; + "String" + ] + ]; + [ + "hwrufirst"; + [ + "DataType"; + "String" + ] + ]; + [ + "hwrulast"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + "fd"; + "fds"; + "fds"; + "fda"; + "fds"; + "fdsa"; + "fdsa"; + "fdsa"; + "fdsa"; + "fdsa" + ]; + [ + "aswedfg"; + "wedfg"; + "swedfg"; + "swedfg"; + "swedfg"; + "awedfg"; + "aswedfg"; + "aswedfg"; + "aswedfg"; + "aswedfg"; + "aswedfg" + ]; + [ + "asdadsaasd"; + "ddd"; + "sdadsaasd"; + "asdadsasd"; + "sdadsaasd"; + "asdadsaad"; + "asdadsaasd"; + "asdadsaasd"; + "asdadsaasd"; + "asdadsaasd"; + "asdadsaasd" + ]; + [ + "gdsfsassas"; + "gdf"; + "gdsfsssas"; + "gdsfsasss"; + "gdfsassas"; + "gdsfsassa"; + "gdsfsassas"; + "gdsfsassas"; + "gdsfsassas"; + "gdsfsassas"; + "gdsfsassas" + ]; + [ + ""; + ""; + ""; + ""; + ""; + ""; + ""; + ""; + ""; + ""; + "" + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"; + "\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt new file mode 100644 index 00000000000..9320ac1c18a --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_ReplaceFirstLast_/results.txt @@ -0,0 +1,84 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "DataType"; + "String" + ] + ]; + [ + "column2"; + [ + "DataType"; + "String" + ] + ]; + [ + "column3"; + [ + "DataType"; + "String" + ] + ]; + [ + "column4"; + [ + "DataType"; + "String" + ] + ]; + [ + "column5"; + [ + "DataType"; + "String" + ] + ]; + [ + "column6"; + [ + "DataType"; + "String" + ] + ]; + [ + "column7"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "gzas"; + "gzzzsas"; + "gsas"; + "gasas"; + "gasz"; + "gaszzzs"; + "gass"; + "gasas" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt new file mode 100644 index 00000000000..2ac3566c61d --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_Replace_/results.txt @@ -0,0 +1,134 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "all"; + [ + "DataType"; + "String" + ] + ]; + [ + "first"; + [ + "DataType"; + "String" + ] + ]; + [ + "last"; + [ + "DataType"; + "String" + ] + ]; + [ + "first2"; + [ + "DataType"; + "String" + ] + ]; + [ + "last2"; + [ + "DataType"; + "String" + ] + ]; + [ + "first3"; + [ + "DataType"; + "String" + ] + ]; + [ + "last3"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "fdsa"; + "fdsa"; + "fdsz"; + "fdsz"; + "fdszz"; + "fdszz"; + "fds"; + "fds" + ]; + [ + "aswedfg"; + "zzzwedfg"; + "zswedfg"; + "zswedfg"; + "zzswedfg"; + "zzswedfg"; + "swedfg"; + "swedfg" + ]; + [ + "asdadsaasd"; + "zzzdadsazzzd"; + "zsdadsaasd"; + "asdadsazsd"; + "zzsdadsaasd"; + "asdadsazzsd"; + "sdadsaasd"; + "asdadsasd" + ]; + [ + "gdsfsassas"; + "gdsfszzzszzz"; + "gdsfszssas"; + "gdsfsasszs"; + "gdsfszzssas"; + "gdsfsasszzs"; + "gdsfsssas"; + "gdsfsasss" + ]; + [ + ""; + ""; + ""; + ""; + ""; + ""; + ""; + "" + ]; + [ + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"; + "`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt new file mode 100644 index 00000000000..b1bff8a57b8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StreamFormat_/results.txt @@ -0,0 +1,208 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "right_pad"; + [ + "DataType"; + "String" + ] + ]; + [ + "left_pad"; + [ + "DataType"; + "String" + ] + ]; + [ + "right_pad_zero"; + [ + "DataType"; + "String" + ] + ]; + [ + "left_pad_zero"; + [ + "DataType"; + "String" + ] + ]; + [ + "hex"; + [ + "DataType"; + "String" + ] + ]; + [ + "shex"; + [ + "DataType"; + "String" + ] + ]; + [ + "bin"; + [ + "DataType"; + "String" + ] + ]; + [ + "sbin"; + [ + "DataType"; + "String" + ] + ]; + [ + "hex_text"; + [ + "DataType"; + "String" + ] + ]; + [ + "bin_text"; + [ + "DataType"; + "String" + ] + ]; + [ + "duration"; + [ + "DataType"; + "String" + ] + ]; + [ + "quantity"; + [ + "DataType"; + "String" + ] + ]; + [ + "bytes"; + [ + "DataType"; + "String" + ] + ]; + [ + "prec"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "qwertyui"; + "qwertyui "; + " qwertyui"; + "qwertyui000000000000"; + "000000000000qwertyui"; + "0x00000000499602D2"; + "-0x000000000000007B"; + "0b0000000000000000000000000000000001001001100101100000001011010010"; + "-0b0000000000000000000000000000000000000000000000000000000001111011"; + "71 77 65 72 74 79 75 69"; + "01110001 01110111 01100101 01110010 01110100 01111001 01110101 01101001"; + "20m 34s"; + "1.23G"; + "1.15GiB"; + "-0.009963" + ]; + [ + "asdfghjl"; + "asdfghjl "; + " asdfghjl"; + "asdfghjl000000000000"; + "000000000000asdfghjl"; + "0x000000024CB016EA"; + "-0x00000000000001C8"; + "0b0000000000000000000000000000001001001100101100000001011011101010"; + "-0b0000000000000000000000000000000000000000000000000000000111001000"; + "61 73 64 66 67 68 6A 6C"; + "01100001 01110011 01100100 01100110 01100111 01101000 01101010 01101100"; + "2h 44m 36s"; + "9.88G"; + "9.2GiB"; + "-0.03694" + ]; + [ + "zxcvbnm?"; + "zxcvbnm? "; + " zxcvbnm?"; + "zxcvbnm?000000000000"; + "000000000000zxcvbnm?"; + "0x00000002540BE3FF"; + "-0x0000000000000315"; + "0b0000000000000000000000000000001001010100000010111110001111111111"; + "-0b0000000000000000000000000000000000000000000000000000001100010101"; + "7A 78 63 76 62 6E 6D 3F"; + "01111010 01111000 01100011 01110110 01100010 01101110 01101101 00111111"; + "2h 46m 40s"; + "10G"; + "9.31GiB"; + "-0.06391" + ]; + [ + "12345678"; + "12345678 "; + " 12345678"; + "12345678000000000000"; + "00000000000012345678"; + "0x0000000000000000"; + "0x0000000000000000"; + "0b0000000000000000000000000000000000000000000000000000000000000000"; + "0b0000000000000000000000000000000000000000000000000000000000000000"; + "31 32 33 34 35 36 37 38"; + "00110001 00110010 00110011 00110100 00110101 00110110 00110111 00111000"; + "0us"; + "0"; + "0B"; + "0" + ]; + [ + "!@#$%^&*"; + "!@#$%^&* "; + " !@#$%^&*"; + "!@#$%^&*000000000000"; + "000000000000!@#$%^&*"; + "0x0000000223557439"; + "-0x00000000000003E7"; + "0b0000000000000000000000000000001000100011010101010111010000111001"; + "-0b0000000000000000000000000000000000000000000000000000001111100111"; + "21 40 23 24 25 5E 26 2A"; + "00100001 01000000 00100011 00100100 00100101 01011110 00100110 00101010"; + "2h 33m 2s"; + "9.18G"; + "8.55GiB"; + "-0.08092" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt new file mode 100644 index 00000000000..a665105224f --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUDF_/results.txt @@ -0,0 +1,169 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "b32enc"; + [ + "DataType"; + "String" + ] + ]; + [ + "b64enc"; + [ + "DataType"; + "String" + ] + ]; + [ + "b64encu"; + [ + "DataType"; + "String" + ] + ]; + [ + "cesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "cunesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "xenc"; + [ + "DataType"; + "String" + ] + ]; + [ + "henc"; + [ + "DataType"; + "String" + ] + ]; + [ + "hdec"; + [ + "DataType"; + "String" + ] + ]; + [ + "cgesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "cgunesc"; + [ + "DataType"; + "String" + ] + ]; + [ + "clps"; + [ + "DataType"; + "String" + ] + ]; + [ + "strp"; + [ + "DataType"; + "String" + ] + ]; + [ + "clpst"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="; + "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="; + "ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ,"; + " !qwe rty uiop [ ]$"; + " !qwe rty uiop [ ]$"; + "202020217177652072747920202075696F70205B205D24"; + " !qwe rty uiop [ ]$"; + " !qwe rty uiop [ ]$"; + "+++!qwe+rty+++uiop+%5B+%5D$"; + " !qwe rty uiop [ ]$"; + " !qwe rty uiop [ ]$"; + "!qwe rty uiop [ ]$"; + "!qwe ..." + ]; + [ + "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"; + "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA=="; + "QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"; + "@as dfgh jkl\\\\n;'% "; + "@as dfgh jkl\n;'% "; + "4061732020202020202064666768206A6B6C5C6E3B27252020"; + "@as dfgh jkl\\n;'% "; + "@as dfgh jkl\\n;'% "; + "@as+++++++dfgh+jkl%5Cn;%27%25++"; + "@as dfgh jkl\\n;'% "; + "@as dfgh jkl\\n;'% "; + "@as dfgh jkl\\n;'%"; + "@as ..." + ]; + [ + "EAQCAI32PBRQS5TCNYQASCQIEBWSYLRPH5PCAIBA"; + "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8/XiAgIA=="; + "ICAgI3p4Ywl2Ym4gCQoIIG0sLi8_XiAgIA,,"; + " #zxc\\tvbn \\t\\n\\x08 m,./?^ "; + " #zxc\tvbn \t\n\x08 m,./?^ "; + "202020237A78630976626E20090A08206D2C2E2F3F5E202020"; + " #zxc\tvbn \t\n\x08 m,./?^ "; + " #zxc\tvbn \t\n\x08 m,./?^ "; + "+++%23zxc%09vbn+%09%0A%08+m%2C./%3F%5E+++"; + " #zxc\tvbn \t\n\x08 m,./?^ "; + " #zxc vbn \x08 m,./?^ "; + "#zxc\tvbn \t\n\x08 m,./?^"; + "#zxc ..." + ]; + [ + "GEQTEQBTEM2CINJFGZPDOJRYFI4SQMBJFVPT2KZMHQXD4==="; + "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg=="; + "MSEyQDMjNCQ1JTZeNyY4KjkoMCktXz0rLDwuPg,,"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "31213240332334243525365E3726382A392830292D5F3D2B2C3C2E3E"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3%234$5%256%5E7%268*9%280%29-_%3D%2B%2C%3C.%3E"; + "1!2@3#4$5%6^7&8*9(0)-_= ,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@3#4$5%6^7&8*9(0)-_=+,<.>"; + "1!2@ ..." + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt new file mode 100644 index 00000000000..26b182f9343 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_StringUnsafeUDF_/results.txt @@ -0,0 +1,158 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "b32dec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "b32sdec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "b64dec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "b64sdec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "xdec"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + " !qwe rty uiop [ ]$" + ]; + [ + " !qwe rty uiop [ ]$" + ]; + [ + [ + "EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOS" + ] + ]; + #; + # + ]; + [ + [ + [ + "QIAEXLvMggAcAECCAFgAQUALyg==" + ] + ]; + #; + [ + " !qwe rty uiop [ ]$" + ]; + [ + " !qwe rty uiop [ ]$" + ]; + # + ]; + [ + [ + [ + "0DQNA0D4P/93QP6/z4NA0DQP98Dxfg0DodA6PQ==" + ] + ]; + #; + #; + #; + [ + " !qwe rty uiop [ ]$" + ] + ]; + [ + [ + "@as dfgh jkl\\n;'% " + ]; + [ + "@as dfgh jkl\\n;'% " + ]; + [ + [ + "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA" + ] + ]; + [ + [ + "IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA" + ] + ]; + # + ]; + [ + [ + [ + "gYoECABAgAQaIM6AAAAAubn0goBAAA==" + ] + ]; + #; + [ + "@as dfgh jkl\\n;'% " + ]; + [ + "@as dfgh jkl\\n;'% " + ]; + # + ]; + [ + [ + [ + "4DwP70DQNA0DQNA0D3Pe9/wNA8DwfC6LxNh1/XdA0A==" + ] + ]; + #; + #; + #; + [ + "@as dfgh jkl\\n;'% " + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt new file mode 100644 index 00000000000..441e62fd21b --- /dev/null +++ b/yql/essentials/udfs/common/string/test/canondata/test.test_To_/results.txt @@ -0,0 +1,294 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "ascii_lower"; + [ + "DataType"; + "String" + ] + ]; + [ + "ascii_upper"; + [ + "DataType"; + "String" + ] + ]; + [ + "ascii_title"; + [ + "DataType"; + "String" + ] + ]; + [ + "lower"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "upper"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "title"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "reverse"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "byte_list"; + [ + "ListType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "from_byte_list"; + [ + "DataType"; + "String" + ] + ]; + [ + "from_lazy_byte_list"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "test"; + "test"; + "TEST"; + "Test"; + [ + "test" + ]; + [ + "TEST" + ]; + [ + "Test" + ]; + [ + "tset" + ]; + [ + "116"; + "101"; + "115"; + "116" + ]; + "test"; + "test" + ]; + [ + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + [ + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82" + ]; + [ + "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2" + ]; + [ + "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82" + ]; + [ + "\xD1\x82\xD1\x81\xD0\xB5\xD1\x82" + ]; + [ + "209"; + "130"; + "208"; + "181"; + "209"; + "129"; + "209"; + "130" + ]; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82" + ]; + [ + "TeSt"; + "test"; + "TEST"; + "Test"; + [ + "test" + ]; + [ + "TEST" + ]; + [ + "Test" + ]; + [ + "tSeT" + ]; + [ + "84"; + "101"; + "83"; + "116" + ]; + "TeSt"; + "TeSt" + ]; + [ + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + [ + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82" + ]; + [ + "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2" + ]; + [ + "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82" + ]; + [ + "\xD0\xA2\xD1\x81\xD0\x95\xD1\x82" + ]; + [ + "209"; + "130"; + "208"; + "149"; + "209"; + "129"; + "208"; + "162" + ]; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2" + ]; + [ + "Eyl\xC3\xBCl"; + "eyl\xC3\xBCl"; + "EYL\xC3\xBCL"; + "Eyl\xC3\xBCl"; + [ + "eyl\xC3\xBCl" + ]; + [ + "EYL\xC3\x9CL" + ]; + [ + "Eyl\xC3\xBCl" + ]; + [ + "l\xC3\xBClyE" + ]; + [ + "69"; + "121"; + "108"; + "195"; + "188"; + "108" + ]; + "Eyl\xC3\xBCl"; + "Eyl\xC3\xBCl" + ]; + [ + "6"; + "6"; + "6"; + "6"; + [ + "6" + ]; + [ + "6" + ]; + [ + "6" + ]; + [ + "6" + ]; + [ + "54" + ]; + "6"; + "6" + ]; + [ + ""; + ""; + ""; + ""; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + []; + ""; + "" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in new file mode 100644 index 00000000000..26a46b0f6c6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.in @@ -0,0 +1,5 @@ +{"value"="qweRTY123$%?"}; +{"value"="asdFGHjkl:'|"}; +{"value"="zxcvbnm"}; +{"value"="1234567890"}; +{"value"="!@#$%^&*()_+{}"}; diff --git a/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql new file mode 100644 index 00000000000..f6e74d87462 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/AsciiChecks.sql @@ -0,0 +1,10 @@ +SELECT + String::IsAscii(value) as isascii, + String::IsAsciiSpace(value) as isspace, + String::IsAsciiUpper(value) as isupper, + String::IsAsciiLower(value) as islower, + String::IsAsciiDigit(value) as isdigit, + String::IsAsciiAlpha(value) as isalpha, + String::IsAsciiAlnum(value) as isalnum, + String::IsAsciiHex(value) as ishex +FROM Input diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Decode.in b/yql/essentials/udfs/common/string/test/cases/Base32Decode.in new file mode 100644 index 00000000000..34af8b23d47 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Base32Decode.in @@ -0,0 +1,4 @@ +{"key"="1";subkey="";"value"="ORSXG5A="}; +{"key"="2";subkey="";"value"="KRSXG5CUMVZXI==="}; +{"key"="3";subkey="";"value"="MFYHA3DF"}; +{"key"="4";subkey="";"value"="hmmmm===hmmmm"}; diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql b/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql new file mode 100644 index 00000000000..51b47ec1665 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Base32Decode.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +SELECT + value, + String::Base32StrictDecode(value) AS strict_decoded, + String::Base32Decode(value) AS decoded +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Encode.in b/yql/essentials/udfs/common/string/test/cases/Base32Encode.in new file mode 100644 index 00000000000..c0051d04efd --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Base32Encode.in @@ -0,0 +1,3 @@ +{"key"="1";subkey="";"value"="test"}; +{"key"="2";subkey="";"value"="TestTest"}; +{"key"="3";subkey="";"value"="apple"}; diff --git a/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql b/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql new file mode 100644 index 00000000000..1ff9e3e4078 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Base32Encode.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +SELECT + value, + String::Base32Encode(value) AS encoded +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in new file mode 100644 index 00000000000..26a46b0f6c6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.in @@ -0,0 +1,5 @@ +{"value"="qweRTY123$%?"}; +{"value"="asdFGHjkl:'|"}; +{"value"="zxcvbnm"}; +{"value"="1234567890"}; +{"value"="!@#$%^&*()_+{}"}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql new file mode 100644 index 00000000000..d8bf9e942be --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockAsciiChecks.sql @@ -0,0 +1,13 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + String::IsAscii(value) as isascii, + String::IsAsciiSpace(value) as isspace, + String::IsAsciiUpper(value) as isupper, + String::IsAsciiLower(value) as islower, + String::IsAsciiDigit(value) as isdigit, + String::IsAsciiAlpha(value) as isalpha, + String::IsAsciiAlnum(value) as isalnum, + String::IsAsciiHex(value) as ishex +FROM Input diff --git a/yql/essentials/udfs/common/string/test/cases/BlockFind.sql b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql new file mode 100644 index 00000000000..f1c855bcc11 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockFind.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +pragma UseBlocks; +SELECT + value, + String::Contains(value, "as") AS contains, + String::LevensteinDistance(value, "as") AS levenstein +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql b/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql new file mode 100644 index 00000000000..4c285b78d07 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockRemove.sql @@ -0,0 +1,16 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + value, + String::RemoveAll(value, "as") AS all, + String::RemoveFirst(value, "a") AS first, + String::RemoveLast(value, "a") AS last, + String::RemoveFirst(value, "as") AS first2, + String::RemoveLast(value, "as") AS last2, + String::RemoveFirst(value, "") AS first3, + String::RemoveLast(value, "") AS last3, + String::RemoveAll(value, "`") AS hwruall, + String::RemoveFirst(value, "`") AS hwrufirst, + String::RemoveLast(value, "`") AS hwrulast, +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql b/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql new file mode 100644 index 00000000000..030e36050cd --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockReplace.sql @@ -0,0 +1,13 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + value, + String::ReplaceAll(value, "as", "zzz") AS all, + String::ReplaceFirst(value, "a", "z") AS first, + String::ReplaceLast(value, "a", "z") AS last, + String::ReplaceFirst(value, "a", "zz") AS first2, + String::ReplaceLast(value, "a", "zz") AS last2, + String::ReplaceFirst(value, "a", "") AS first3, + String::ReplaceLast(value, "a", "") AS last3 +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in new file mode 100644 index 00000000000..1a446c4e488 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in @@ -0,0 +1,5 @@ +{"key"="1";"subkey"="1";"value"="qwertyui";"biguint"=1234567890u;"negint"=-123}; +{"key"="2";"subkey"="2";"value"="asdfghjl";"biguint"=9876543210u;"negint"=-456}; +{"key"="3";"subkey"="3";"value"="zxcvbnm?";"biguint"=9999999999u;"negint"=-789}; +{"key"="4";"subkey"="4";"value"="12345678";"biguint"=0000000000u;"negint"=-000}; +{"key"="5";"subkey"="5";"value"="!@#$%^&*";"biguint"=9182737465u;"negint"=-999}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr new file mode 100644 index 00000000000..bbc040040c8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.in.attr @@ -0,0 +1,9 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"String"]]; + ["subkey";["DataType";"String"]]; + ["value";["DataType";"String"]]; + ["biguint";["DataType";"Uint64"]]; + ["negint";["DataType";"Int64"]] + ]]; +}} diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql new file mode 100644 index 00000000000..8b61758a964 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStreamFormat.sql @@ -0,0 +1,20 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + value, + String::RightPad(value, 20) AS right_pad, + String::LeftPad(value, 20) AS left_pad, + String::RightPad(value, 20, "0") AS right_pad_zero, + String::LeftPad(value, 20, "0") AS left_pad_zero, + String::Hex(biguint) AS hex, + String::SHex(negint) AS shex, + String::Bin(biguint) AS bin, + String::SBin(negint) AS sbin, + String::HexText(value) AS hex_text, + String::BinText(value) AS bin_text, + String::HumanReadableDuration(biguint) AS duration, + String::HumanReadableQuantity(biguint) AS quantity, + String::HumanReadableBytes(biguint) AS bytes, + String::Prec(negint / 12345.6789, 4) AS prec +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in new file mode 100644 index 00000000000..a9d378e0590 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.in @@ -0,0 +1,4 @@ +{"value"=" !qwe rty uiop [ ]$"}; +{"value"="@as dfgh jkl\\n;'\% "}; +{"value"=" #zxc\tvbn \t\n\b m,./?^ "}; +{"value"="1!2@3#4$5%6^7&8*9(0)-_=+,<.>"}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql new file mode 100644 index 00000000000..1f96f5d62b0 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUDF.sql @@ -0,0 +1,18 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + String::Base32Encode(value) as b32enc, + String::Base64Encode(value) as b64enc, + String::Base64EncodeUrl(value) as b64encu, + String::EscapeC(value) as cesc, + String::UnescapeC(value) as cunesc, + String::HexEncode(value) as xenc, + String::EncodeHtml(value) as henc, + String::DecodeHtml(value) as hdec, + String::CgiEscape(value) as cgesc, + String::CgiUnescape(value) as cgunesc, + String::Collapse(value) as clps, + String::Strip(value) as strp, + String::CollapseText(value, 9) as clpst, +FROM Input diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in new file mode 100644 index 00000000000..2c15dd67ac6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.in @@ -0,0 +1,6 @@ +{"value"="EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="}; +{"value"="ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="}; +{"value"="202020217177652072747920202075696F70205B205D24"}; +{"value"="IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"}; +{"value"="QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"}; +{"value"="4061732020202020202064666768206A6B6C5C6E3B27252020"}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql new file mode 100644 index 00000000000..82f82f50d9d --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockStringUnsafeUDF.sql @@ -0,0 +1,10 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + String::Base32Decode(value) as b32dec, + String::Base32StrictDecode(value) AS b32sdec, + String::Base64Decode(value) as b64dec, + String::Base64StrictDecode(value) AS b64sdec, + String::HexDecode(value) as xdec, +FROM Input diff --git a/yql/essentials/udfs/common/string/test/cases/BlockTo.in b/yql/essentials/udfs/common/string/test/cases/BlockTo.in new file mode 100644 index 00000000000..93a00f7db8d --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockTo.in @@ -0,0 +1,7 @@ +{"key"="1";"subkey"="1";"value"="test"}; +{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"}; +{"key"="3";"subkey"="3";"value"="TeSt"}; +{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"}; +{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"}; +{"key"="6";"subkey"="6";"value"="6"}; +{"key"="4";"subkey"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/string/test/cases/BlockTo.sql b/yql/essentials/udfs/common/string/test/cases/BlockTo.sql new file mode 100644 index 00000000000..628febe899e --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/BlockTo.sql @@ -0,0 +1,9 @@ +/* XXX: Enable UseBlocks pragma and provide input to trigger block execution. */ +PRAGMA UseBlocks; + +SELECT + value, + String::AsciiToLower(value) AS ascii_lower, + String::AsciiToUpper(value) AS ascii_upper, + String::AsciiToTitle(value) AS ascii_title, +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in new file mode 100644 index 00000000000..27fc322b1ae --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.in @@ -0,0 +1,3 @@ +{"key"="1";"subkey"="1";"value"="a b c"}; +{"key"="2";"subkey"="2";"value"="d"}; +{"key"="3";"subkey"="3";"value"=""}; diff --git a/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql new file mode 100644 index 00000000000..2dab551eb1c --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ExtendAndTake.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ + +$split = ($row) -> { + return String::SplitToList($row.value, " ", true AS SkipEmpty, false AS DelimeterString); +}; + +SELECT + $split(TableRow()), + ListExtend($split(TableRow()), $split(TableRow()))[1] +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/Find.sql b/yql/essentials/udfs/common/string/test/cases/Find.sql new file mode 100644 index 00000000000..273553dcf9e --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Find.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + value, + String::Contains(value, "as") AS contains, + String::HasPrefix(value, "as") AS prefix, + String::StartsWith(value, "as") AS starts, + String::HasSuffix(value, "as") AS suffix, + String::EndsWith(value, "as") AS ends, + String::Find(value, "as") AS find, + String::ReverseFind(value, "as") AS rfind, + String::LevensteinDistance(value, "as") AS levenstein +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/List.in b/yql/essentials/udfs/common/string/test/cases/List.in new file mode 100644 index 00000000000..949cf26c776 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/List.in @@ -0,0 +1,6 @@ +{"key"="1";"subkey"="1";"value"="a@b@c"}; +{"key"="1";"subkey"="1";"value"="@a@b@c"}; +{"key"="1";"subkey"="1";"value"="@@@a@a"}; +{"key"="2";"subkey"="2";"value"="d#e#f"}; +{"key"="3";"subkey"="3";"value"="d"}; +{"key"="4";"subkey"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/string/test/cases/List.sql b/yql/essentials/udfs/common/string/test/cases/List.sql new file mode 100644 index 00000000000..42b983074e5 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/List.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + value, + Ensure(value, String::JoinFromList(String::SplitToList(value, "@"), "@") == value) AS equals_to_original, + String::JoinFromList(String::SplitToList(value, "@"), "#") AS replace_delimeter, + String::SplitToList(value, "@") AS just_split, + String::SplitToList(value, "@")[0] as first, + String::SplitToList(value, "@", true AS SkipEmpty) AS skip_empty, + String::SplitToList(value, "b@", false AS DelimeterString) AS multichar_delim_set, + String::SplitToList(value, "b@", true AS DelimeterString) AS multichar_delim_string, + String::SplitToList(value, "@", 1 AS Limit) AS limited +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/List_v0.in b/yql/essentials/udfs/common/string/test/cases/List_v0.in new file mode 100644 index 00000000000..949cf26c776 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/List_v0.in @@ -0,0 +1,6 @@ +{"key"="1";"subkey"="1";"value"="a@b@c"}; +{"key"="1";"subkey"="1";"value"="@a@b@c"}; +{"key"="1";"subkey"="1";"value"="@@@a@a"}; +{"key"="2";"subkey"="2";"value"="d#e#f"}; +{"key"="3";"subkey"="3";"value"="d"}; +{"key"="4";"subkey"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/string/test/cases/List_v0.sql b/yql/essentials/udfs/common/string/test/cases/List_v0.sql new file mode 100644 index 00000000000..36d984dc6a8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/List_v0.sql @@ -0,0 +1,27 @@ +/* syntax version 1 */ +-- use SplitToList settings which are used as defaults in v0 syntax +SELECT + value, + IF ( + String::Contains(value, "@@"), + Ensure( + value, + String::JoinFromList(String::SplitToList(value, "@", true AS SkipEmpty, false AS DelimeterString), "@") != value, + value + ) + ) AS not_equals_to_original, + IF ( + String::Contains(value, "@@"), + Ensure( + value, + String::JoinFromList(String::SplitToList(value, "@", true AS SkipEmpty, false AS DelimeterString), "@") != value, + value + ) + ) AS not_equals_to_original_skip_empty, + Ensure( + value, + String::JoinFromList(String::SplitToList(value, "@", false AS SkipEmpty, false AS DelimeterString), "@") == value, + value + ) AS equals_to_original, + String::SplitToList(value, "@#", true AS SkipEmpty, false AS DelimeterString) AS multichar +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/Remove.sql b/yql/essentials/udfs/common/string/test/cases/Remove.sql new file mode 100644 index 00000000000..8bfe2c92e26 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Remove.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +SELECT + value, + String::RemoveAll(value, "as") AS all, + String::RemoveFirst(value, "a") AS first, + String::RemoveLast(value, "a") AS last, + String::RemoveFirst(value, "as") AS first2, + String::RemoveLast(value, "as") AS last2, + String::RemoveFirst(value, "") AS first3, + String::RemoveLast(value, "") AS last3, + String::RemoveAll(value, "`") AS hwruall, + String::RemoveFirst(value, "`") AS hwrufirst, + String::RemoveLast(value, "`") AS hwrulast, +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/Replace.sql b/yql/essentials/udfs/common/string/test/cases/Replace.sql new file mode 100644 index 00000000000..0eea32a3e41 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/Replace.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +SELECT + value, + String::ReplaceAll(value, "as", "zzz") AS all, + String::ReplaceFirst(value, "a", "z") AS first, + String::ReplaceLast(value, "a", "z") AS last, + String::ReplaceFirst(value, "a", "zz") AS first2, + String::ReplaceLast(value, "a", "zz") AS last2, + String::ReplaceFirst(value, "a", "") AS first3, + String::ReplaceLast(value, "a", "") AS last3 +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql b/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql new file mode 100644 index 00000000000..6a83400d424 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/ReplaceFirstLast.sql @@ -0,0 +1,10 @@ +SELECT + String::ReplaceFirst("gasas", "as", "z"), + String::ReplaceFirst("gasas", "a", "zzz"), + String::ReplaceFirst("gasas", "a", ""), + String::ReplaceFirst("gasas", "e", "z"), + String::ReplaceLast("gasas", "as", "z"), + String::ReplaceLast("gasas", "a", "zzz"), + String::ReplaceLast("gasas", "a", ""), + String::ReplaceLast("gasas", "k", "ey"); + diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.in b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in new file mode 100644 index 00000000000..1a446c4e488 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in @@ -0,0 +1,5 @@ +{"key"="1";"subkey"="1";"value"="qwertyui";"biguint"=1234567890u;"negint"=-123}; +{"key"="2";"subkey"="2";"value"="asdfghjl";"biguint"=9876543210u;"negint"=-456}; +{"key"="3";"subkey"="3";"value"="zxcvbnm?";"biguint"=9999999999u;"negint"=-789}; +{"key"="4";"subkey"="4";"value"="12345678";"biguint"=0000000000u;"negint"=-000}; +{"key"="5";"subkey"="5";"value"="!@#$%^&*";"biguint"=9182737465u;"negint"=-999}; diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr new file mode 100644 index 00000000000..bbc040040c8 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.in.attr @@ -0,0 +1,9 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"String"]]; + ["subkey";["DataType";"String"]]; + ["value";["DataType";"String"]]; + ["biguint";["DataType";"Uint64"]]; + ["negint";["DataType";"Int64"]] + ]]; +}} diff --git a/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql b/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql new file mode 100644 index 00000000000..46ee9a7c688 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StreamFormat.sql @@ -0,0 +1,19 @@ +/* syntax version 1 */ + +SELECT + value, + String::RightPad(value, 20) AS right_pad, + String::LeftPad(value, 20) AS left_pad, + String::RightPad(value, 20, "0") AS right_pad_zero, + String::LeftPad(value, 20, "0") AS left_pad_zero, + String::Hex(biguint) AS hex, + String::SHex(negint) AS shex, + String::Bin(biguint) AS bin, + String::SBin(negint) AS sbin, + String::HexText(value) AS hex_text, + String::BinText(value) AS bin_text, + String::HumanReadableDuration(biguint) AS duration, + String::HumanReadableQuantity(biguint) AS quantity, + String::HumanReadableBytes(biguint) AS bytes, + String::Prec(negint / 12345.6789, 4) AS prec +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/StringUDF.in b/yql/essentials/udfs/common/string/test/cases/StringUDF.in new file mode 100644 index 00000000000..a9d378e0590 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StringUDF.in @@ -0,0 +1,4 @@ +{"value"=" !qwe rty uiop [ ]$"}; +{"value"="@as dfgh jkl\\n;'\% "}; +{"value"=" #zxc\tvbn \t\n\b m,./?^ "}; +{"value"="1!2@3#4$5%6^7&8*9(0)-_=+,<.>"}; diff --git a/yql/essentials/udfs/common/string/test/cases/StringUDF.sql b/yql/essentials/udfs/common/string/test/cases/StringUDF.sql new file mode 100644 index 00000000000..77af707acb0 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StringUDF.sql @@ -0,0 +1,15 @@ +SELECT + String::Base32Encode(value) as b32enc, + String::Base64Encode(value) as b64enc, + String::Base64EncodeUrl(value) as b64encu, + String::EscapeC(value) as cesc, + String::UnescapeC(value) as cunesc, + String::HexEncode(value) as xenc, + String::EncodeHtml(value) as henc, + String::DecodeHtml(value) as hdec, + String::CgiEscape(value) as cgesc, + String::CgiUnescape(value) as cgunesc, + String::Collapse(value) as clps, + String::Strip(value) as strp, + String::CollapseText(value, 9) as clpst, +FROM Input diff --git a/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in new file mode 100644 index 00000000000..2c15dd67ac6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.in @@ -0,0 +1,6 @@ +{"value"="EAQCAILRO5SSA4TUPEQCAIDVNFXXAIC3EBOSI==="}; +{"value"="ICAgIXF3ZSBydHkgICB1aW9wIFsgXSQ="}; +{"value"="202020217177652072747920202075696F70205B205D24"}; +{"value"="IBQXGIBAEAQCAIBAMRTGO2BANJVWYXDOHMTSKIBA"}; +{"value"="QGFzICAgICAgIGRmZ2ggamtsXG47JyUgIA,,"}; +{"value"="4061732020202020202064666768206A6B6C5C6E3B27252020"}; diff --git a/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql new file mode 100644 index 00000000000..dab39cbd391 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/StringUnsafeUDF.sql @@ -0,0 +1,7 @@ +SELECT + String::Base32Decode(value) as b32dec, + String::Base32StrictDecode(value) AS b32sdec, + String::Base64Decode(value) as b64dec, + String::Base64StrictDecode(value) AS b64sdec, + String::HexDecode(value) as xdec, +FROM Input diff --git a/yql/essentials/udfs/common/string/test/cases/To.in b/yql/essentials/udfs/common/string/test/cases/To.in new file mode 100644 index 00000000000..93a00f7db8d --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/To.in @@ -0,0 +1,7 @@ +{"key"="1";"subkey"="1";"value"="test"}; +{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"}; +{"key"="3";"subkey"="3";"value"="TeSt"}; +{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"}; +{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"}; +{"key"="6";"subkey"="6";"value"="6"}; +{"key"="4";"subkey"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/string/test/cases/To.sql b/yql/essentials/udfs/common/string/test/cases/To.sql new file mode 100644 index 00000000000..a7faf41efe6 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/To.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +SELECT + value, + String::AsciiToLower(value) AS ascii_lower, + String::AsciiToUpper(value) AS ascii_upper, + String::AsciiToTitle(value) AS ascii_title, + String::ToLower(value) AS lower, + String::ToUpper(value) AS upper, + String::ToTitle(value) AS title, + String::Reverse(value) AS reverse, + String::ToByteList(value) AS byte_list, + String::FromByteList(String::ToByteList(value)) AS from_byte_list, + String::FromByteList(YQL::LazyList(String::ToByteList(value))) AS from_lazy_byte_list +FROM Input; diff --git a/yql/essentials/udfs/common/string/test/cases/default.in b/yql/essentials/udfs/common/string/test/cases/default.in new file mode 100644 index 00000000000..182158fdf67 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/cases/default.in @@ -0,0 +1,6 @@ +{"key"="1";"subkey"="1";"value"="fdsa"}; +{"key"="2";"subkey"="2";"value"="aswedfg"}; +{"key"="3";"subkey"="3";"value"="asdadsaasd"}; +{"key"="4";"subkey"="4";"value"="gdsfsassas"}; +{"key"="5";"subkey"="5";"value"=""}; +{"key"="6";"subkey"="6";"value"="`Привет, мир!`"}; diff --git a/yql/essentials/udfs/common/string/test/ya.make b/yql/essentials/udfs/common/string/test/ya.make new file mode 100644 index 00000000000..87d8b667780 --- /dev/null +++ b/yql/essentials/udfs/common/string/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/string) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/string/ya.make b/yql/essentials/udfs/common/string/ya.make new file mode 100644 index 00000000000..12ae827ad17 --- /dev/null +++ b/yql/essentials/udfs/common/string/ya.make @@ -0,0 +1,38 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319905679 OUT_NOAUTO libstring_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(string_udf) + + YQL_ABI_VERSION( + 2 + 37 + 0 + ) + + SRCS( + string_udf.cpp + ) + + PEERDIR( + yql/essentials/public/udf/arrow + library/cpp/charset + library/cpp/deprecated/split + library/cpp/html/pcdata + library/cpp/string_utils/base32 + library/cpp/string_utils/base64 + library/cpp/string_utils/levenshtein_diff + library/cpp/string_utils/quote + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) + + diff --git a/yql/essentials/udfs/common/top/test/canondata/result.json b/yql/essentials/udfs/common/top/test/canondata/result.json new file mode 100644 index 00000000000..c09d321f5a3 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/result.json @@ -0,0 +1,47 @@ +{ + "test.test[BottomByTuple]": [ + { + "uri": "file://test.test_BottomByTuple_/results.txt" + } + ], + "test.test[BottomBy]": [ + { + "uri": "file://test.test_BottomBy_/results.txt" + } + ], + "test.test[Bottom]": [ + { + "uri": "file://test.test_Bottom_/results.txt" + } + ], + "test.test[TopBy]": [ + { + "uri": "file://test.test_TopBy_/results.txt" + } + ], + "test.test[TopList]": [ + { + "uri": "file://test.test_TopList_/results.txt" + } + ], + "test.test[TopTuple]": [ + { + "uri": "file://test.test_TopTuple_/results.txt" + } + ], + "test.test[TopVariant]": [ + { + "uri": "file://test.test_TopVariant_/results.txt" + } + ], + "test.test[Top]": [ + { + "uri": "file://test.test_Top_/results.txt" + } + ], + "test.test[Window]": [ + { + "uri": "file://test.test_Window_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt new file mode 100644 index 00000000000..2d0670bd687 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomByTuple_/results.txt @@ -0,0 +1,119 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + [ + "13"; + "f" + ]; + [ + "2"; + "b" + ]; + [ + "4"; + "d" + ]; + [ + "5"; + "a" + ]; + [ + "7"; + "c" + ]; + [ + "8"; + "e" + ] + ] + ]; + [ + "2"; + [ + [ + "1"; + "g" + ]; + [ + "2"; + "c" + ]; + [ + "3"; + "e" + ]; + [ + "4"; + "a" + ]; + [ + "6"; + "b" + ]; + [ + "9"; + "d" + ]; + [ + "9"; + "f" + ]; + [ + "9"; + "h" + ] + ] + ]; + [ + "3"; + [ + [ + "1"; + "a" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt new file mode 100644 index 00000000000..9a5eb425658 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_BottomBy_/results.txt @@ -0,0 +1,61 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + "2b"; + "4d"; + "5a"; + "7c"; + "8e" + ] + ]; + [ + "2"; + [ + "1g"; + "2c"; + "3e"; + "4a"; + "6b" + ] + ]; + [ + "3"; + [ + "1a" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt new file mode 100644 index 00000000000..4b34ad59ea8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_Bottom_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + "13"; + "2"; + "4"; + "5" + ] + ]; + [ + "2"; + [ + "1"; + "2"; + "3"; + "4" + ] + ]; + [ + "3"; + [ + "1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt new file mode 100644 index 00000000000..32e6a4c374a --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopBy_/results.txt @@ -0,0 +1,63 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + "8e"; + "7c"; + "5a"; + "4d"; + "2b"; + "13f" + ] + ]; + [ + "2"; + [ + "9d"; + "9f"; + "9h"; + "6b"; + "4a"; + "3e" + ] + ]; + [ + "3"; + [ + "1a" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt new file mode 100644 index 00000000000..5997422bf79 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopList_/results.txt @@ -0,0 +1,57 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "1"; + "2"; + "3" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "2" + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "1" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt new file mode 100644 index 00000000000..6c5e14b7e28 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopTuple_/results.txt @@ -0,0 +1,103 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + [ + "8"; + "e" + ]; + [ + "7"; + "c" + ]; + [ + "5"; + "a" + ]; + [ + "4"; + "d" + ]; + [ + "2"; + "b" + ] + ] + ]; + [ + "2"; + [ + [ + "9"; + "h" + ]; + [ + "9"; + "f" + ]; + [ + "9"; + "d" + ]; + [ + "6"; + "b" + ]; + [ + "4"; + "a" + ] + ] + ]; + [ + "3"; + [ + [ + "1"; + "a" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt new file mode 100644 index 00000000000..39139793b1b --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_TopVariant_/results.txt @@ -0,0 +1,56 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "VariantType"; + [ + "TupleType"; + [ + [ + "DataType"; + "Int32" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "1"; + "str" + ]; + [ + "0"; + "1" + ]; + [ + "0"; + "1" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt new file mode 100644 index 00000000000..cc053f78cbb --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_Top_/results.txt @@ -0,0 +1,57 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "Float" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + "13"; + "8"; + "7" + ] + ]; + [ + "2"; + [ + "9"; + "9"; + "9" + ] + ]; + [ + "3"; + [ + "1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt b/yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt new file mode 100644 index 00000000000..2035b95bedd --- /dev/null +++ b/yql/essentials/udfs/common/top/test/canondata/test.test_Window_/results.txt @@ -0,0 +1,1030 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "upcr_top"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "1" + ] + ]; + [ + "2"; + "7"; + [ + "7"; + "1" + ] + ]; + [ + "3"; + "5"; + [ + "7"; + "5"; + "1" + ] + ]; + [ + "4"; + "4"; + [ + "7"; + "5"; + "4" + ] + ]; + [ + "5"; + "3"; + [ + "7"; + "5"; + "4" + ] + ]; + [ + "6"; + "11"; + [ + "11"; + "7"; + "5" + ] + ]; + [ + "7"; + "2"; + [ + "11"; + "7"; + "5" + ] + ]; + [ + "8"; + "11"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "9"; + "0"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "10"; + "6"; + [ + "11"; + "11"; + "7" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "upuf_top"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "2"; + "7"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "3"; + "5"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "4"; + "4"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "5"; + "3"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "6"; + "11"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "7"; + "2"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "8"; + "11"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "9"; + "0"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "10"; + "6"; + [ + "11"; + "11"; + "7" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "cruf_top"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "2"; + "7"; + [ + "11"; + "11"; + "7" + ] + ]; + [ + "3"; + "5"; + [ + "11"; + "11"; + "6" + ] + ]; + [ + "4"; + "4"; + [ + "11"; + "11"; + "6" + ] + ]; + [ + "5"; + "3"; + [ + "11"; + "11"; + "6" + ] + ]; + [ + "6"; + "11"; + [ + "11"; + "11"; + "6" + ] + ]; + [ + "7"; + "2"; + [ + "11"; + "6"; + "2" + ] + ]; + [ + "8"; + "11"; + [ + "11"; + "6"; + "0" + ] + ]; + [ + "9"; + "0"; + [ + "6"; + "0" + ] + ]; + [ + "10"; + "6"; + [ + "6" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "cr22_top"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "7"; + "5"; + "1" + ] + ]; + [ + "2"; + "7"; + [ + "7"; + "5"; + "4" + ] + ]; + [ + "3"; + "5"; + [ + "7"; + "5"; + "4" + ] + ]; + [ + "4"; + "4"; + [ + "11"; + "7"; + "5" + ] + ]; + [ + "5"; + "3"; + [ + "11"; + "5"; + "4" + ] + ]; + [ + "6"; + "11"; + [ + "11"; + "11"; + "4" + ] + ]; + [ + "7"; + "2"; + [ + "11"; + "11"; + "3" + ] + ]; + [ + "8"; + "11"; + [ + "11"; + "11"; + "6" + ] + ]; + [ + "9"; + "0"; + [ + "11"; + "6"; + "2" + ] + ]; + [ + "10"; + "6"; + [ + "11"; + "6"; + "0" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "upcr_bottom"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "1" + ] + ]; + [ + "2"; + "7"; + [ + "1"; + "7" + ] + ]; + [ + "3"; + "5"; + [ + "1"; + "5"; + "7" + ] + ]; + [ + "4"; + "4"; + [ + "1"; + "4"; + "5" + ] + ]; + [ + "5"; + "3"; + [ + "1"; + "3"; + "4" + ] + ]; + [ + "6"; + "11"; + [ + "1"; + "3"; + "4" + ] + ]; + [ + "7"; + "2"; + [ + "1"; + "2"; + "3" + ] + ]; + [ + "8"; + "11"; + [ + "1"; + "2"; + "3" + ] + ]; + [ + "9"; + "0"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "10"; + "6"; + [ + "0"; + "1"; + "2" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "upuf_bottom"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "2"; + "7"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "3"; + "5"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "4"; + "4"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "5"; + "3"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "6"; + "11"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "7"; + "2"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "8"; + "11"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "9"; + "0"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "10"; + "6"; + [ + "0"; + "1"; + "2" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "cruf_bottom"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "0"; + "1"; + "2" + ] + ]; + [ + "2"; + "7"; + [ + "0"; + "2"; + "3" + ] + ]; + [ + "3"; + "5"; + [ + "0"; + "2"; + "3" + ] + ]; + [ + "4"; + "4"; + [ + "0"; + "2"; + "3" + ] + ]; + [ + "5"; + "3"; + [ + "0"; + "2"; + "3" + ] + ]; + [ + "6"; + "11"; + [ + "0"; + "2"; + "6" + ] + ]; + [ + "7"; + "2"; + [ + "0"; + "2"; + "6" + ] + ]; + [ + "8"; + "11"; + [ + "0"; + "6"; + "11" + ] + ]; + [ + "9"; + "0"; + [ + "0"; + "6" + ] + ]; + [ + "10"; + "6"; + [ + "6" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "idx"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "x"; + [ + "DataType"; + "Int32" + ] + ]; + [ + "cr22_botto"; + [ + "ListType"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + "1"; + [ + "1"; + "5"; + "7" + ] + ]; + [ + "2"; + "7"; + [ + "1"; + "4"; + "5" + ] + ]; + [ + "3"; + "5"; + [ + "1"; + "3"; + "4" + ] + ]; + [ + "4"; + "4"; + [ + "3"; + "4"; + "5" + ] + ]; + [ + "5"; + "3"; + [ + "2"; + "3"; + "4" + ] + ]; + [ + "6"; + "11"; + [ + "2"; + "3"; + "4" + ] + ]; + [ + "7"; + "2"; + [ + "0"; + "2"; + "3" + ] + ]; + [ + "8"; + "11"; + [ + "0"; + "2"; + "6" + ] + ]; + [ + "9"; + "0"; + [ + "0"; + "2"; + "6" + ] + ]; + [ + "10"; + "6"; + [ + "0"; + "6"; + "11" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/cases/Bottom.in b/yql/essentials/udfs/common/top/test/cases/Bottom.in new file mode 100644 index 00000000000..1532ec03bb8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Bottom.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="5";"value"="a"}; +{"key"="1";"subkey"="2";"value"="b"}; +{"key"="1";"subkey"="7";"value"="c"}; +{"key"="1";"subkey"="4";"value"="d"}; +{"key"="1";"subkey"="8";"value"="e"}; +{"key"="1";"subkey"="13";"value"="f"}; +{"key"="2";"subkey"="4";"value"="a"}; +{"key"="2";"subkey"="6";"value"="b"}; +{"key"="2";"subkey"="2";"value"="c"}; +{"key"="2";"subkey"="9";"value"="d"}; +{"key"="2";"subkey"="3";"value"="e"}; +{"key"="2";"subkey"="9";"value"="f"}; +{"key"="2";"subkey"="1";"value"="g"}; +{"key"="2";"subkey"="9";"value"="h"}; +{"key"="3";"subkey"="1";"value"="a"}; diff --git a/yql/essentials/udfs/common/top/test/cases/Bottom.in.attr b/yql/essentials/udfs/common/top/test/cases/Bottom.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Bottom.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/top/test/cases/Bottom.sql b/yql/essentials/udfs/common/top/test/cases/Bottom.sql new file mode 100644 index 00000000000..b04ba740c8e --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Bottom.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + BOTTOM(subkey, 4u) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/top/test/cases/BottomBy.in b/yql/essentials/udfs/common/top/test/cases/BottomBy.in new file mode 100644 index 00000000000..1532ec03bb8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/BottomBy.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="5";"value"="a"}; +{"key"="1";"subkey"="2";"value"="b"}; +{"key"="1";"subkey"="7";"value"="c"}; +{"key"="1";"subkey"="4";"value"="d"}; +{"key"="1";"subkey"="8";"value"="e"}; +{"key"="1";"subkey"="13";"value"="f"}; +{"key"="2";"subkey"="4";"value"="a"}; +{"key"="2";"subkey"="6";"value"="b"}; +{"key"="2";"subkey"="2";"value"="c"}; +{"key"="2";"subkey"="9";"value"="d"}; +{"key"="2";"subkey"="3";"value"="e"}; +{"key"="2";"subkey"="9";"value"="f"}; +{"key"="2";"subkey"="1";"value"="g"}; +{"key"="2";"subkey"="9";"value"="h"}; +{"key"="3";"subkey"="1";"value"="a"}; diff --git a/yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr b/yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/BottomBy.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/top/test/cases/BottomBy.sql b/yql/essentials/udfs/common/top/test/cases/BottomBy.sql new file mode 100644 index 00000000000..2d0718da727 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/BottomBy.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + BOTTOM_BY(subkey || value, CAST(subkey AS Uint64), 5u) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in new file mode 100644 index 00000000000..1532ec03bb8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="5";"value"="a"}; +{"key"="1";"subkey"="2";"value"="b"}; +{"key"="1";"subkey"="7";"value"="c"}; +{"key"="1";"subkey"="4";"value"="d"}; +{"key"="1";"subkey"="8";"value"="e"}; +{"key"="1";"subkey"="13";"value"="f"}; +{"key"="2";"subkey"="4";"value"="a"}; +{"key"="2";"subkey"="6";"value"="b"}; +{"key"="2";"subkey"="2";"value"="c"}; +{"key"="2";"subkey"="9";"value"="d"}; +{"key"="2";"subkey"="3";"value"="e"}; +{"key"="2";"subkey"="9";"value"="f"}; +{"key"="2";"subkey"="1";"value"="g"}; +{"key"="2";"subkey"="9";"value"="h"}; +{"key"="3";"subkey"="1";"value"="a"}; diff --git a/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql new file mode 100644 index 00000000000..cdbda066c72 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/BottomByTuple.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + BOTTOM_BY(AsTuple(subkey, value), AsTuple(subkey, value), 10u) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/top/test/cases/Top.in b/yql/essentials/udfs/common/top/test/cases/Top.in new file mode 100644 index 00000000000..1532ec03bb8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Top.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="5";"value"="a"}; +{"key"="1";"subkey"="2";"value"="b"}; +{"key"="1";"subkey"="7";"value"="c"}; +{"key"="1";"subkey"="4";"value"="d"}; +{"key"="1";"subkey"="8";"value"="e"}; +{"key"="1";"subkey"="13";"value"="f"}; +{"key"="2";"subkey"="4";"value"="a"}; +{"key"="2";"subkey"="6";"value"="b"}; +{"key"="2";"subkey"="2";"value"="c"}; +{"key"="2";"subkey"="9";"value"="d"}; +{"key"="2";"subkey"="3";"value"="e"}; +{"key"="2";"subkey"="9";"value"="f"}; +{"key"="2";"subkey"="1";"value"="g"}; +{"key"="2";"subkey"="9";"value"="h"}; +{"key"="3";"subkey"="1";"value"="a"}; diff --git a/yql/essentials/udfs/common/top/test/cases/Top.in.attr b/yql/essentials/udfs/common/top/test/cases/Top.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Top.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/top/test/cases/Top.sql b/yql/essentials/udfs/common/top/test/cases/Top.sql new file mode 100644 index 00000000000..6a03eca4ee9 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Top.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + TOP(CAST(subkey AS Float), 3u) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/top/test/cases/TopBy.in b/yql/essentials/udfs/common/top/test/cases/TopBy.in new file mode 100644 index 00000000000..1532ec03bb8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopBy.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="5";"value"="a"}; +{"key"="1";"subkey"="2";"value"="b"}; +{"key"="1";"subkey"="7";"value"="c"}; +{"key"="1";"subkey"="4";"value"="d"}; +{"key"="1";"subkey"="8";"value"="e"}; +{"key"="1";"subkey"="13";"value"="f"}; +{"key"="2";"subkey"="4";"value"="a"}; +{"key"="2";"subkey"="6";"value"="b"}; +{"key"="2";"subkey"="2";"value"="c"}; +{"key"="2";"subkey"="9";"value"="d"}; +{"key"="2";"subkey"="3";"value"="e"}; +{"key"="2";"subkey"="9";"value"="f"}; +{"key"="2";"subkey"="1";"value"="g"}; +{"key"="2";"subkey"="9";"value"="h"}; +{"key"="3";"subkey"="1";"value"="a"}; diff --git a/yql/essentials/udfs/common/top/test/cases/TopBy.in.attr b/yql/essentials/udfs/common/top/test/cases/TopBy.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopBy.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/top/test/cases/TopBy.sql b/yql/essentials/udfs/common/top/test/cases/TopBy.sql new file mode 100644 index 00000000000..b22309185f7 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopBy.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + TOP_BY(subkey || value, subkey, 6u) +FROM Input +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/top/test/cases/TopList.sql b/yql/essentials/udfs/common/top/test/cases/TopList.sql new file mode 100644 index 00000000000..2b06e919fde --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopList.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +select TOP(x,10) from +(select [[1,2],[1],[1,2,3],[1],[1,2],[1]] as x) +flatten list by x; diff --git a/yql/essentials/udfs/common/top/test/cases/TopTuple.in b/yql/essentials/udfs/common/top/test/cases/TopTuple.in new file mode 100644 index 00000000000..1532ec03bb8 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopTuple.in @@ -0,0 +1,15 @@ +{"key"="1";"subkey"="5";"value"="a"}; +{"key"="1";"subkey"="2";"value"="b"}; +{"key"="1";"subkey"="7";"value"="c"}; +{"key"="1";"subkey"="4";"value"="d"}; +{"key"="1";"subkey"="8";"value"="e"}; +{"key"="1";"subkey"="13";"value"="f"}; +{"key"="2";"subkey"="4";"value"="a"}; +{"key"="2";"subkey"="6";"value"="b"}; +{"key"="2";"subkey"="2";"value"="c"}; +{"key"="2";"subkey"="9";"value"="d"}; +{"key"="2";"subkey"="3";"value"="e"}; +{"key"="2";"subkey"="9";"value"="f"}; +{"key"="2";"subkey"="1";"value"="g"}; +{"key"="2";"subkey"="9";"value"="h"}; +{"key"="3";"subkey"="1";"value"="a"}; diff --git a/yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr b/yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopTuple.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/top/test/cases/TopTuple.sql b/yql/essentials/udfs/common/top/test/cases/TopTuple.sql new file mode 100644 index 00000000000..c9d77826967 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopTuple.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, + TOP(AsTuple(subkey, value), 5u) +FROM Input +GROUP BY key +ORDER BY key
\ No newline at end of file diff --git a/yql/essentials/udfs/common/top/test/cases/TopVariant.sql b/yql/essentials/udfs/common/top/test/cases/TopVariant.sql new file mode 100644 index 00000000000..7f74dfda347 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/TopVariant.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +$vt1 = Variant<Int32,String>; +select TOP(x,3) from +(select [Variant(1,"0",$vt1),Variant("str","1",$vt1),Variant(1,"0",$vt1)] as x) +flatten list by x; diff --git a/yql/essentials/udfs/common/top/test/cases/Window.sql b/yql/essentials/udfs/common/top/test/cases/Window.sql new file mode 100644 index 00000000000..657f765fa99 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/cases/Window.sql @@ -0,0 +1,30 @@ +/* syntax version 1 */ +USE plato; + +$src = [ + <|x:1, idx:1|>, + <|x:7, idx:2|>, + <|x:5, idx:3|>, + <|x:4, idx:4|>, + <|x:3, idx:5|>, + <|x:11, idx:6|>, + <|x:2, idx:7|>, + <|x:11, idx:8|>, + <|x:0, idx:9|>, + <|x:6, idx:10|>, +]; + +INSERT INTO @src +SELECT * FROM AS_TABLE($src) ORDER BY idx; + +COMMIT; + +SELECT idx, x, TOP(x, 3) OVER (ORDER BY idx ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as upcr_top FROM @src ORDER BY idx; +SELECT idx, x, TOP(x, 3) OVER () as upuf_top FROM @src ORDER BY idx; +SELECT idx, x, TOP(x, 3) OVER (ORDER BY idx ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as cruf_top FROM @src ORDER BY idx; +SELECT idx, x, TOP(x, 3) OVER (ORDER BY idx ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as cr22_top FROM @src ORDER BY idx; + +SELECT idx, x, BOTTOM(x, 3) OVER (ORDER BY idx ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as upcr_bottom FROM @src ORDER BY idx; +SELECT idx, x, BOTTOM(x, 3) OVER () as upuf_bottom FROM @src ORDER BY idx; +SELECT idx, x, BOTTOM(x, 3) OVER (ORDER BY idx ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as cruf_bottom FROM @src ORDER BY idx; +SELECT idx, x, BOTTOM(x, 3) OVER (ORDER BY idx ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as cr22_botto FROM @src ORDER BY idx; diff --git a/yql/essentials/udfs/common/top/test/ya.make b/yql/essentials/udfs/common/top/test/ya.make new file mode 100644 index 00000000000..9bbd032b576 --- /dev/null +++ b/yql/essentials/udfs/common/top/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/top) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/top/top_udf.cpp b/yql/essentials/udfs/common/top/top_udf.cpp new file mode 100644 index 00000000000..766067dda5d --- /dev/null +++ b/yql/essentials/udfs/common/top/top_udf.cpp @@ -0,0 +1,954 @@ +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_type_ops.h> + +#include <library/cpp/containers/top_keeper/top_keeper.h> + +#include <util/generic/set.h> + +#include <algorithm> +#include <iterator> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + +using TUnboxedValuePair = std::pair<TUnboxedValue, TUnboxedValue>; + +template <EDataSlot Slot, bool IsTop> +struct TDataCompare { + bool operator()(const TUnboxedValue& left, const TUnboxedValue& right) const { + if (IsTop) { + return CompareValues<Slot>(left, right) > 0; + } else { + return CompareValues<Slot>(left, right) < 0; + } + } +}; + +template <EDataSlot Slot, bool IsTop> +struct TDataPairCompare { + bool operator()(const TUnboxedValuePair& left, const TUnboxedValuePair& right) const { + if (IsTop) { + return CompareValues<Slot>(left.first, right.first) > 0; + } else { + return CompareValues<Slot>(left.first, right.first) < 0; + } + } +}; + +template <bool IsTop> +struct TGenericCompare { + ICompare::TPtr Compare; + + bool operator()(const TUnboxedValue& left, const TUnboxedValue& right) const { + if (IsTop) { + return Compare->Less(right, left); + } else { + return Compare->Less(left, right); + } + } +}; + +template <bool IsTop> +struct TGenericPairCompare { + ICompare::TPtr Compare; + + bool operator()(const TUnboxedValuePair& left, const TUnboxedValuePair& right) const { + if (IsTop) { + return Compare->Less(right.first, left.first); + } else { + return Compare->Less(left.first, right.first); + } + } +}; + +template <typename TValue, typename TCompare, typename TAllocator> +class TTopKeeperContainer { + TTopKeeper<TValue, TCompare, true, TAllocator> Keeper; + using TOrderedSet = TMultiSet<TValue, TCompare, TAllocator>; + TMaybe<TOrderedSet> OrderedSet; + size_t MaxSize = 0; + bool Finalized = false; + TCompare Compare; +public: + explicit TTopKeeperContainer(TCompare compare) + : Keeper(0, compare) + , Compare(compare) + {} + + TVector<TValue, TAllocator> GetInternal() { + if (OrderedSet) { + TVector<TValue, TAllocator> result; + std::copy(OrderedSet->begin(), OrderedSet->end(), std::back_inserter(result)); + return result; + } + Finalized = true; + return Keeper.GetInternal(); + } + + void Insert(const TValue& value) { + if (MaxSize == 0) { + return; + } + if (Finalized && !OrderedSet) { + const auto& items = Keeper.Extract(); + OrderedSet = TOrderedSet{items.begin(), items.end(), Compare}; + } + if (OrderedSet) { + if (OrderedSet->size() < MaxSize) { + OrderedSet->insert(value); + return; + } + Y_ENSURE(OrderedSet->size() == MaxSize); + Y_ENSURE(!OrderedSet->empty()); + auto last = --OrderedSet->end(); + if (Compare(value, *last)) { + OrderedSet->erase(last); + OrderedSet->insert(value); + } + return; + } + Keeper.Insert(value); + } + + bool IsEmpty() const { + return OrderedSet ? OrderedSet->empty() : Keeper.IsEmpty(); + } + + size_t GetSize() const { + return OrderedSet ? OrderedSet->size() : Keeper.GetSize(); + } + + size_t GetMaxSize() const { + return MaxSize; + } + + void SetMaxSize(size_t newMaxSize) { + MaxSize = newMaxSize; + if (Finalized && !OrderedSet) { + auto items = Keeper.Extract(); + auto begin = items.begin(); + auto end = begin + Min(MaxSize, items.size()); + OrderedSet = TOrderedSet{begin, end, Compare}; + } + if (OrderedSet) { + while (OrderedSet->size() > MaxSize) { + auto last = --OrderedSet->end(); + OrderedSet->erase(last); + } + return; + } + + Keeper.SetMaxSize(MaxSize); + } +}; + +template <typename TCompare> +class TTopKeeperWrapperBase { +protected: + TTopKeeperContainer<TUnboxedValue, TCompare, TUnboxedValue::TAllocator> Keeper; + +protected: + explicit TTopKeeperWrapperBase(TCompare compare) + : Keeper(compare) + {} + + void Init(const TUnboxedValuePod& value, ui32 maxSize) { + Keeper.SetMaxSize(maxSize); + AddValue(value); + } + + void Merge(TTopKeeperWrapperBase& left, TTopKeeperWrapperBase& right) { + Keeper.SetMaxSize(left.Keeper.GetMaxSize()); + for (const auto& item : left.Keeper.GetInternal()) { + AddValue(item); + } + for (const auto& item : right.Keeper.GetInternal()) { + AddValue(item); + } + } + + void Deserialize(const TUnboxedValuePod& serialized) { + auto maxSize = serialized.GetElement(0).Get<ui32>(); + auto list = serialized.GetElement(1); + + Keeper.SetMaxSize(maxSize); + const auto listIter = list.GetListIterator(); + for (TUnboxedValue current; listIter.Next(current);) { + AddValue(current); + } + } + +public: + void AddValue(const TUnboxedValuePod& value) { + Keeper.Insert(TUnboxedValuePod(value)); + } + + TUnboxedValue Serialize(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Keeper.GetSize(), values); + + for (const auto& item : Keeper.GetInternal()) { + *values++ = item; + } + + TUnboxedValue* items = nullptr; + auto result = builder->NewArray(2U, items); + items[0] = TUnboxedValuePod((ui32)Keeper.GetMaxSize()); + items[1] = list; + + return result; + } + + TUnboxedValue GetResult(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Keeper.GetSize(), values); + + for (const auto& item : Keeper.GetInternal()) { + *values++ = item; + } + return list; + } +}; + +template <typename TCompare> +class TTopKeeperPairWrapperBase { +protected: + TTopKeeperContainer<TUnboxedValuePair, TCompare, TStdAllocatorForUdf<TUnboxedValuePair>> Keeper; + +protected: + explicit TTopKeeperPairWrapperBase(TCompare compare) + : Keeper(compare) + {} + + void Init(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize) { + Keeper.SetMaxSize(maxSize); + AddValue(key, payload); + } + + void Merge(TTopKeeperPairWrapperBase& left, TTopKeeperPairWrapperBase& right) { + Keeper.SetMaxSize(left.Keeper.GetMaxSize()); + for (const auto& item : left.Keeper.GetInternal()) { + AddValue(item.first, item.second); + } + for (const auto& item : right.Keeper.GetInternal()) { + AddValue(item.first, item.second); + } + } + + void Deserialize(const TUnboxedValuePod& serialized) { + auto maxSize = serialized.GetElement(0).Get<ui32>(); + auto list = serialized.GetElement(1); + + Keeper.SetMaxSize(maxSize); + const auto listIter = list.GetListIterator(); + for (TUnboxedValue current; listIter.Next(current);) { + AddValue(current.GetElement(0), current.GetElement(1)); + } + } + +public: + void AddValue(const TUnboxedValuePod& key, const TUnboxedValuePod& payload) { + Keeper.Insert(std::make_pair(TUnboxedValuePod(key), TUnboxedValuePod(payload))); + } + + TUnboxedValue Serialize(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Keeper.GetSize(), values); + + for (const auto& item : Keeper.GetInternal()) { + TUnboxedValue* items = nullptr; + auto pair = builder->NewArray(2U, items); + items[0] = item.first; + items[1] = item.second; + *values++ = pair; + } + + TUnboxedValue* items = nullptr; + auto result = builder->NewArray(2U, items); + items[0] = TUnboxedValuePod((ui32)Keeper.GetMaxSize()); + items[1] = list; + + return result; + } + + TUnboxedValue GetResult(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Keeper.GetSize(), values); + + for (const auto& item : Keeper.GetInternal()) { + *values++ = item.second; + } + return list; + } +}; + + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopKeeperDataWrapper; + +template <EDataSlot Slot, bool IsTop> +class TTopKeeperDataWrapper<Slot, false, IsTop> + : public TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>> +{ +public: + using TBase = TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>>; + + TTopKeeperDataWrapper(const TUnboxedValuePod& value, ui32 maxSize) + : TBase(TDataCompare<Slot, IsTop>()) + { + TBase::Init(value, maxSize); + } + + TTopKeeperDataWrapper(TTopKeeperDataWrapper& left, TTopKeeperDataWrapper& right) + : TBase(TDataCompare<Slot, IsTop>()) + { + TBase::Merge(left, right); + } + + explicit TTopKeeperDataWrapper(const TUnboxedValuePod& serialized) + : TBase(TDataCompare<Slot, IsTop>()) + { + TBase::Deserialize(serialized); + } +}; + +template <EDataSlot Slot, bool IsTop> +class TTopKeeperDataWrapper<Slot, true, IsTop> + : public TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>> +{ +public: + using TBase = TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>>; + + TTopKeeperDataWrapper(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize) + : TBase(TDataPairCompare<Slot, IsTop>()) + { + TBase::Init(key, payload, maxSize); + } + + TTopKeeperDataWrapper(TTopKeeperDataWrapper& left, TTopKeeperDataWrapper& right) + : TBase(TDataPairCompare<Slot, IsTop>()) + { + TBase::Merge(left, right); + } + + explicit TTopKeeperDataWrapper(const TUnboxedValuePod& serialized) + : TBase(TDataPairCompare<Slot, IsTop>()) + { + TBase::Deserialize(serialized); + } +}; + +template <bool HasKey, bool IsTop> +class TTopKeeperWrapper; + +template <bool IsTop> +class TTopKeeperWrapper<false, IsTop> + : public TTopKeeperWrapperBase<TGenericCompare<IsTop>> +{ +public: + using TBase = TTopKeeperWrapperBase<TGenericCompare<IsTop>>; + + TTopKeeperWrapper(const TUnboxedValuePod& value, ui32 maxSize, ICompare::TPtr compare) + : TBase(TGenericCompare<IsTop>{compare}) + { + TBase::Init(value, maxSize); + } + + TTopKeeperWrapper(TTopKeeperWrapper& left, TTopKeeperWrapper& right, ICompare::TPtr compare) + : TBase(TGenericCompare<IsTop>{compare}) + { + TBase::Merge(left, right); + } + + TTopKeeperWrapper(const TUnboxedValuePod& serialized, ICompare::TPtr compare) + : TBase(TGenericCompare<IsTop>{compare}) + { + TBase::Deserialize(serialized); + } +}; + +template <bool IsTop> +class TTopKeeperWrapper<true, IsTop> + : public TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>> +{ +public: + using TBase = TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>>; + + TTopKeeperWrapper(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize, ICompare::TPtr compare) + : TBase(TGenericPairCompare<IsTop>{compare}) + { + TBase::Init(key, payload, maxSize); + } + + TTopKeeperWrapper(TTopKeeperWrapper& left, TTopKeeperWrapper& right, ICompare::TPtr compare) + : TBase(TGenericPairCompare<IsTop>{compare}) + { + TBase::Merge(left, right); + } + + TTopKeeperWrapper(const TUnboxedValuePod& serialized, ICompare::TPtr compare) + : TBase(TGenericPairCompare<IsTop>{compare}) + { + TBase::Deserialize(serialized); + } +}; + + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopResourceData; + +template <EDataSlot Slot, bool HasKey, bool IsTop> +TTopResourceData<Slot, HasKey, IsTop>* GetTopResourceData(const TUnboxedValuePod& arg) { + TTopResourceData<Slot, HasKey, IsTop>::Validate(arg); + return static_cast<TTopResourceData<Slot, HasKey, IsTop>*>(arg.AsBoxed().Get()); +} + +template <bool HasKey, bool IsTop> +class TTopResource; + +template <bool HasKey, bool IsTop> +TTopResource<HasKey, IsTop>* GetTopResource(const TUnboxedValuePod& arg) { + TTopResource<HasKey, IsTop>::Validate(arg); + return static_cast<TTopResource<HasKey, IsTop>*>(arg.AsBoxed().Get()); +} + + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopCreateData : public TBoxedValue { +private: + template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + return TUnboxedValuePod( + new TTopResourceData<Slot, HasKey, IsTop>(args[0], args[1].Get<ui32>())); + } + + template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + return TUnboxedValuePod( + new TTopResourceData<Slot, HasKey, IsTop>(args[0], args[1], args[2].Get<ui32>())); + } + + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return RunImpl(args); + } +}; + +template <bool HasKey, bool IsTop> +class TTopCreate : public TBoxedValue { +private: + template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + return TUnboxedValuePod( + new TTopResource<HasKey, IsTop>(args[0], args[1].Get<ui32>(), Compare_)); + } + + template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + return TUnboxedValuePod( + new TTopResource<HasKey, IsTop>(args[0], args[1], args[2].Get<ui32>(), Compare_)); + } + + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return RunImpl(args); + } + +public: + explicit TTopCreate(ICompare::TPtr compare) + : Compare_(compare) + {} + +private: + ICompare::TPtr Compare_; +}; + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopAddValueData : public TBoxedValue { +private: + template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); + resource->Get()->AddValue(args[1]); + return TUnboxedValuePod(resource); + } + + template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); + resource->Get()->AddValue(args[1], args[2]); + return TUnboxedValuePod(resource); + } + + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return RunImpl(args); + } +}; + +template <bool HasKey, bool IsTop> +class TTopAddValue : public TBoxedValue { +private: + template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + auto resource = GetTopResource<HasKey, IsTop>(args[0]); + resource->Get()->AddValue(args[1]); + return TUnboxedValuePod(resource); + } + + template <bool HasKey_ = HasKey, typename std::enable_if_t<HasKey_>* = nullptr> + TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { + auto resource = GetTopResource<HasKey, IsTop>(args[0]); + resource->Get()->AddValue(args[1], args[2]); + return TUnboxedValuePod(resource); + } + + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return RunImpl(args); + } + +public: + explicit TTopAddValue(ICompare::TPtr) + {} +}; + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopSerializeData : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); + return resource->Get()->Serialize(valueBuilder); + } +}; + +template <bool HasKey, bool IsTop> +class TTopSerialize : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + auto resource = GetTopResource<HasKey, IsTop>(args[0]); + return resource->Get()->Serialize(valueBuilder); + } + +public: + explicit TTopSerialize(ICompare::TPtr) + {} +}; + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopDeserializeData : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return TUnboxedValuePod(new TTopResourceData<Slot, HasKey, IsTop>(args[0])); + } +}; + +template <bool HasKey, bool IsTop> +class TTopDeserialize : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + return TUnboxedValuePod(new TTopResource<HasKey, IsTop>(args[0], Compare_)); + } + +public: + explicit TTopDeserialize(ICompare::TPtr compare) + : Compare_(compare) + {} + +private: + ICompare::TPtr Compare_; +}; + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopMergeData : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto left = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); + auto right = GetTopResourceData<Slot, HasKey, IsTop>(args[1]); + return TUnboxedValuePod(new TTopResourceData<Slot, HasKey, IsTop>(*left->Get(), *right->Get())); + } +}; + +template <bool HasKey, bool IsTop> +class TTopMerge : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto left = GetTopResource<HasKey, IsTop>(args[0]); + auto right = GetTopResource<HasKey, IsTop>(args[1]); + return TUnboxedValuePod(new TTopResource<HasKey, IsTop>(*left->Get(), *right->Get(), Compare_)); + } + +public: + explicit TTopMerge(ICompare::TPtr compare) + : Compare_(compare) + {} + +private: + ICompare::TPtr Compare_; +}; + +template <EDataSlot Slot, bool HasKey, bool IsTop> +class TTopGetResultData : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); + return resource->Get()->GetResult(valueBuilder); + } +}; + +template <bool HasKey, bool IsTop> +class TTopGetResult : public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + auto resource = GetTopResource<HasKey, IsTop>(args[0]); + return resource->Get()->GetResult(valueBuilder); + } + +public: + explicit TTopGetResult(ICompare::TPtr) + {} +}; + + +#define RESOURCE(slot, hasKey, isTop) \ +extern const char TopResourceName_##slot##_##hasKey##_##isTop[] = \ + "Top.TopResource."#slot"."#hasKey"."#isTop; \ +template <> \ +class TTopResourceData<EDataSlot::slot, hasKey, isTop>: \ + public TBoxedResource< \ + TTopKeeperDataWrapper<EDataSlot::slot, hasKey, isTop>, \ + TopResourceName_##slot##_##hasKey##_##isTop> \ +{ \ +public: \ + template <typename... Args> \ + inline TTopResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + {} \ +}; + +#define RESOURCE_00(slot, ...) RESOURCE(slot, false, false) +#define RESOURCE_01(slot, ...) RESOURCE(slot, false, true) +#define RESOURCE_10(slot, ...) RESOURCE(slot, true, false) +#define RESOURCE_11(slot, ...) RESOURCE(slot, true, true) + +UDF_TYPE_ID_MAP(RESOURCE_00) +UDF_TYPE_ID_MAP(RESOURCE_01) +UDF_TYPE_ID_MAP(RESOURCE_10) +UDF_TYPE_ID_MAP(RESOURCE_11) + +#define MAKE_IMPL(operation, slot, hasKey, isTop) \ + case EDataSlot::slot: \ + builder.Implementation(new operation<EDataSlot::slot, hasKey, isTop>); \ + break; + +#define CREATE_00(slot, ...) MAKE_IMPL(TTopCreateData, slot, false, false) +#define CREATE_01(slot, ...) MAKE_IMPL(TTopCreateData, slot, false, true) +#define CREATE_10(slot, ...) MAKE_IMPL(TTopCreateData, slot, true, false) +#define CREATE_11(slot, ...) MAKE_IMPL(TTopCreateData, slot, true, true) + +#define ADD_VALUE_00(slot, ...) MAKE_IMPL(TTopAddValueData, slot, false, false) +#define ADD_VALUE_01(slot, ...) MAKE_IMPL(TTopAddValueData, slot, false, true) +#define ADD_VALUE_10(slot, ...) MAKE_IMPL(TTopAddValueData, slot, true, false) +#define ADD_VALUE_11(slot, ...) MAKE_IMPL(TTopAddValueData, slot, true, true) + +#define MERGE_00(slot, ...) MAKE_IMPL(TTopMergeData, slot, false, false) +#define MERGE_01(slot, ...) MAKE_IMPL(TTopMergeData, slot, false, true) +#define MERGE_10(slot, ...) MAKE_IMPL(TTopMergeData, slot, true, false) +#define MERGE_11(slot, ...) MAKE_IMPL(TTopMergeData, slot, true, true) + +#define SERIALIZE_00(slot, ...) MAKE_IMPL(TTopSerializeData, slot, false, false) +#define SERIALIZE_01(slot, ...) MAKE_IMPL(TTopSerializeData, slot, false, true) +#define SERIALIZE_10(slot, ...) MAKE_IMPL(TTopSerializeData, slot, true, false) +#define SERIALIZE_11(slot, ...) MAKE_IMPL(TTopSerializeData, slot, true, true) + +#define DESERIALIZE_00(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, false, false) +#define DESERIALIZE_01(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, false, true) +#define DESERIALIZE_10(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, true, false) +#define DESERIALIZE_11(slot, ...) MAKE_IMPL(TTopDeserializeData, slot, true, true) + +#define GET_RESULT_00(slot, ...) MAKE_IMPL(TTopGetResultData, slot, false, false) +#define GET_RESULT_01(slot, ...) MAKE_IMPL(TTopGetResultData, slot, false, true) +#define GET_RESULT_10(slot, ...) MAKE_IMPL(TTopGetResultData, slot, true, false) +#define GET_RESULT_11(slot, ...) MAKE_IMPL(TTopGetResultData, slot, true, true) + +#define MAKE_TYPE(slot, hasKey, isTop) \ + case EDataSlot::slot: \ + topType = builder.Resource(TopResourceName_##slot##_##hasKey##_##isTop); \ + break; + +#define TYPE_00(slot, ...) MAKE_TYPE(slot, false, false) +#define TYPE_01(slot, ...) MAKE_TYPE(slot, false, true) +#define TYPE_10(slot, ...) MAKE_TYPE(slot, true, false) +#define TYPE_11(slot, ...) MAKE_TYPE(slot, true, true) + +#define PARAMETRIZE(action) \ + if (hasKey) { \ + if (isTop) { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_11) \ + } \ + } else { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_10) \ + } \ + } \ + } else { \ + if (isTop) { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_01) \ + } \ + } else { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_00) \ + } \ + } \ + } + + +#define RESOURCE_GENERIC(hasKey, isTop) \ +extern const char TopResourceName_Generic_##hasKey##_##isTop[] = \ + "Top.TopResource.Generic."#hasKey"."#isTop; \ +template <> \ +class TTopResource<hasKey, isTop>: \ + public TBoxedResource< \ + TTopKeeperWrapper<hasKey, isTop>, \ + TopResourceName_Generic_##hasKey##_##isTop> \ +{ \ +public: \ + template <typename... Args> \ + inline TTopResource(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + {} \ +}; + +RESOURCE_GENERIC(false, false) +RESOURCE_GENERIC(false, true) +RESOURCE_GENERIC(true, false) +RESOURCE_GENERIC(true, true) + +#define MAKE_IMPL_GENERIC(operation, hasKey, isTop) \ + builder.Implementation(new operation<hasKey, isTop>(compare)); + +#define CREATE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopCreate, hasKey, isTop) +#define ADD_VALUE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopAddValue, hasKey, isTop) +#define MERGE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopMerge, hasKey, isTop) +#define SERIALIZE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopSerialize, hasKey, isTop) +#define DESERIALIZE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopDeserialize, hasKey, isTop) +#define GET_RESULT_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopGetResult, hasKey, isTop) + +#define TYPE_GENERIC(hasKey, isTop) \ + topType = builder.Resource(TopResourceName_Generic_##hasKey##_##isTop); + +#define PARAMETRIZE_GENERIC(action) \ + if (hasKey) { \ + if (isTop) { \ + action(true, true) \ + } else { \ + action(true, false) \ + } \ + } else { \ + if (isTop) { \ + action(false, true) \ + } else { \ + action(false, false) \ + } \ + } + + +static const auto CreateName = TStringRef::Of("Create"); +static const auto AddValueName = TStringRef::Of("AddValue"); +static const auto SerializeName = TStringRef::Of("Serialize"); +static const auto DeserializeName = TStringRef::Of("Deserialize"); +static const auto MergeName = TStringRef::Of("Merge"); +static const auto GetResultName = TStringRef::Of("GetResult"); + +class TTopModule : public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Top"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(AddValueName)->SetTypeAwareness(); + sink.Add(SerializeName)->SetTypeAwareness(); + sink.Add(DeserializeName)->SetTypeAwareness(); + sink.Add(MergeName)->SetTypeAwareness(); + sink.Add(GetResultName)->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + try { + bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); + + if (typeConfig.Size() != 2) { + builder.SetError(TStringBuilder() << "Invalid type config: " << typeConfig.Data()); + return; + } + + bool hasKey = (typeConfig.Data()[0] == '1'); + bool isTop = (typeConfig.Data()[1] == '1'); + + auto typeHelper = builder.TypeInfoHelper(); + + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } + + auto valueType = userTypeInspector.GetElementType(2); + auto keyType = valueType; + auto payloadType = valueType; + + if (hasKey) { + auto keyPayloadTypeInspector = TTupleTypeInspector(*typeHelper, valueType); + if (!keyPayloadTypeInspector || keyPayloadTypeInspector.GetElementsCount() != 2) { + builder.SetError("Key/payload type is not a 2-tuple"); + return; + } + + keyType = keyPayloadTypeInspector.GetElementType(0); + payloadType = keyPayloadTypeInspector.GetElementType(1); + } + + bool isGeneric = false; + ICompare::TPtr compare; + TMaybe<EDataSlot> slot; + + auto keyTypeInspector = TDataTypeInspector(*typeHelper, keyType); + if (!keyTypeInspector) { + isGeneric = true; + compare = builder.MakeCompare(keyType); + if (!compare) { + return; + } + } else { + slot = FindDataSlot(keyTypeInspector.GetTypeId()); + if (!slot) { + builder.SetError("Unknown data type"); + return; + } + if (!(GetDataTypeInfo(*slot).Features & NUdf::CanCompare)) { + builder.SetError("Data type is not comparable"); + return; + } + } + + auto serializedListType = builder.List()->Item(valueType).Build(); + auto serializedType = builder.Tuple()->Add<ui32>().Add(serializedListType).Build(); + + TType* topType = nullptr; + if (isGeneric) { + PARAMETRIZE_GENERIC(TYPE_GENERIC) + } else { + PARAMETRIZE(TYPE) + } + + if (name == CreateName) { + if (hasKey) { + builder.Args()->Add(keyType).Add(payloadType).Add<ui32>().Done().Returns(topType); + } else { + builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topType); + } + + if (!typesOnly) { + if (isGeneric) { + PARAMETRIZE_GENERIC(CREATE_GENERIC) + } else { + PARAMETRIZE(CREATE) + } + } + builder.IsStrict(); + } + + if (name == AddValueName) { + if (hasKey) { + builder.Args()->Add(topType).Add(keyType).Add(payloadType).Done().Returns(topType); + } else { + builder.Args()->Add(topType).Add(valueType).Done().Returns(topType); + } + + if (!typesOnly) { + if (isGeneric) { + PARAMETRIZE_GENERIC(ADD_VALUE_GENERIC) + } else { + PARAMETRIZE(ADD_VALUE) + } + } + builder.IsStrict(); + } + + if (name == SerializeName) { + builder.Args()->Add(topType).Done().Returns(serializedType); + + if (!typesOnly) { + if (isGeneric) { + PARAMETRIZE_GENERIC(SERIALIZE_GENERIC) + } else { + PARAMETRIZE(SERIALIZE) + } + } + builder.IsStrict(); + } + + if (name == DeserializeName) { + builder.Args()->Add(serializedType).Done().Returns(topType); + + if (!typesOnly) { + if (isGeneric) { + PARAMETRIZE_GENERIC(DESERIALIZE_GENERIC) + } else { + PARAMETRIZE(DESERIALIZE) + } + } + } + + if (name == MergeName) { + builder.Args()->Add(topType).Add(topType).Done().Returns(topType); + + if (!typesOnly) { + if (isGeneric) { + PARAMETRIZE_GENERIC(MERGE_GENERIC) + } else { + PARAMETRIZE(MERGE) + } + } + builder.IsStrict(); + } + + if (name == GetResultName) { + auto listType = builder.List()->Item(payloadType).Build(); + + builder.Args()->Add(topType).Done().Returns(listType); + + if (!typesOnly) { + if (isGeneric) { + PARAMETRIZE_GENERIC(GET_RESULT_GENERIC) + } else { + PARAMETRIZE(GET_RESULT) + } + } + builder.IsStrict(); + } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } +}; + +} // namespace + +REGISTER_MODULES(TTopModule) + diff --git a/yql/essentials/udfs/common/top/ya.make b/yql/essentials/udfs/common/top/ya.make new file mode 100644 index 00000000000..5032048e3ba --- /dev/null +++ b/yql/essentials/udfs/common/top/ya.make @@ -0,0 +1,30 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319906274 OUT_NOAUTO libtop_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(top_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + top_udf.cpp + ) + + PEERDIR( + library/cpp/containers/top_keeper + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) + diff --git a/yql/essentials/udfs/common/topfreq/static/static_udf.cpp b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp new file mode 100644 index 00000000000..4075bfa9c2b --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp @@ -0,0 +1,10 @@ +#include "topfreq_udf.h" + +namespace NYql { + namespace NUdf { + NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule() { + return new TTopFreqModule(); + } + + } +} diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.cpp b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp new file mode 100644 index 00000000000..c118b52d0a1 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp @@ -0,0 +1,213 @@ +#include "topfreq.h" +#include <cmath> +#include <algorithm> + +using namespace NKikimr; +using namespace NUdf; + +template <typename THash, typename TEquals> +TTopFreqBase<THash, TEquals>::TTopFreqBase(THash hash, TEquals equals) + : Indices_(0, hash, equals) +{} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize) { + MinSize_ = minSize; + MaxSize_ = maxSize; + + Freqs_.reserve(MaxSize_ + 1); + Indices_.reserve(MaxSize_ + 1); + + AddValue(value); +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::Merge(const TTopFreqBase& topFreq1, const TTopFreqBase& topFreq2) { + MinSize_ = std::max(topFreq1.MinSize_, topFreq2.MinSize_); + MaxSize_ = std::max(topFreq1.MaxSize_, topFreq2.MaxSize_); + + Freqs_.reserve(std::max(MaxSize_ + 1, ui32(topFreq1.Freqs_.size() + topFreq2.Freqs_.size()))); + Indices_.reserve(MaxSize_ + 1); + + Add(topFreq1); + Add(topFreq2); +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::Deserialize(const TUnboxedValuePod& serialized) { + MinSize_ = serialized.GetElement(0).Get<ui32>(); + MaxSize_ = serialized.GetElement(1).Get<ui32>(); + + Freqs_.reserve(MaxSize_ + 1); + Indices_.reserve(MaxSize_ + 1); + + const auto listIter = serialized.GetElement(2).GetListIterator(); + for (TUnboxedValue current; listIter.Next(current);) { + Update(current.GetElement(1), current.GetElement(0).Get<ui64>()); + } +} + +template <typename THash, typename TEquals> +TUnboxedValue TTopFreqBase<THash, TEquals>::Convert(const IValueBuilder* valueBuilder) const { + TUnboxedValue* values = nullptr; + const auto list = valueBuilder->NewArray(Freqs_.size(), values); + for (const auto& item : Freqs_) { + TUnboxedValue* items = nullptr; + *values++ = valueBuilder->NewArray(2U, items); + items[0] = TUnboxedValuePod(item.second); + items[1] = item.first; + } + return list; +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::Add(const TTopFreqBase& otherModeCalc) { + for (auto& it : otherModeCalc.Freqs_) { + Update(it.first, it.second); + } + + TryCompress(); +} + +template <typename THash, typename TEquals> +TUnboxedValue TTopFreqBase<THash, TEquals>::Get(const IValueBuilder* builder, ui32 resultSize) { + resultSize = std::min(resultSize, ui32(Freqs_.size())); + Compress(resultSize, true); + return Convert(builder); +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::AddValue(const TUnboxedValuePod& value) { + Update(value, 1); + TryCompress(); +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::Update(const TUnboxedValuePod& value, ui64 freq) { + Freqs_.emplace_back(TUnboxedValuePod(value), freq); + auto mapInsertResult = Indices_.emplace(TUnboxedValuePod(value), Freqs_.size() - 1); + + if (!mapInsertResult.second) { + Freqs_[mapInsertResult.first->second].second += freq; + Freqs_.pop_back(); + } +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::TryCompress() { + auto freqSize = Freqs_.size(); + if (freqSize > MaxSize_) { + Compress(MinSize_); + } +} + +template <typename THash, typename TEquals> +void TTopFreqBase<THash, TEquals>::Compress(ui32 newSize, bool sort) { + auto compare = [](const TVectorElement& v1, const TVectorElement& v2) { + return v1.second > v2.second; + }; + + if (sort) { + std::sort(Freqs_.begin(), Freqs_.end(), compare); + } else { + std::nth_element(Freqs_.begin(), Freqs_.begin() + newSize - 1, Freqs_.end(), compare); + } + + Indices_.clear(); + Freqs_.resize(newSize); + + for (ui32 i = 0; i < newSize; i++) { + Indices_[Freqs_[i].first] = i; + } +} + +template <typename THash, typename TEquals> +TUnboxedValue TTopFreqBase<THash, TEquals>::Serialize(const IValueBuilder* builder) { + if (ui32(Freqs_.size()) > MinSize_) { + Compress(MinSize_); + } + + TUnboxedValue* items = nullptr; + auto tuple = builder->NewArray(3U, items); + items[0] = TUnboxedValuePod(MinSize_); + items[1] = TUnboxedValuePod(MaxSize_); + items[2] = Convert(builder); + return tuple; +} + +template <EDataSlot Slot> +TTopFreqData<Slot>::TTopFreqData(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize) + : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>()) +{ + TBase::Init(value, minSize, maxSize); +} + +template <EDataSlot Slot> +TTopFreqData<Slot>::TTopFreqData(const TTopFreqData& topFreq1, const TTopFreqData& topFreq2) + : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>()) +{ + TBase::Merge(topFreq1, topFreq2); +} + +template <EDataSlot Slot> +TTopFreqData<Slot>::TTopFreqData(const TUnboxedValuePod& serialized) + : TBase(TUnboxedValueHash<Slot>(), TUnboxedValueEquals<Slot>()) +{ + TBase::Deserialize(serialized); +} + +template <EDataSlot Slot> +TUnboxedValue TTopFreqData<Slot>::Serialize(const IValueBuilder* builder) { + return TBase::Serialize(builder); +} + +template <EDataSlot Slot> +TUnboxedValue TTopFreqData<Slot>::Get(const IValueBuilder* builder, ui32 resultSize) { + return TBase::Get(builder, resultSize); +} + +template <EDataSlot Slot> +void TTopFreqData<Slot>::AddValue(const TUnboxedValuePod& value) { + TBase::AddValue(value); +} + +#define INSTANCE_FOR(slot, ...) \ + template class TTopFreqData<EDataSlot::slot>; + +UDF_TYPE_ID_MAP(INSTANCE_FOR) + +#undef INSTANCE_FOR + +TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize, + IHash::TPtr hash, IEquate::TPtr equate) + : TBase(TGenericHash{hash}, TGenericEquals{equate}) +{ + TBase::Init(value, minSize, maxSize); +} + +TTopFreqGeneric::TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2, + IHash::TPtr hash, IEquate::TPtr equate) + : TBase(TGenericHash{hash}, TGenericEquals{equate}) +{ + TBase::Merge(topFreq1, topFreq2); +} + +TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& serialized, + IHash::TPtr hash, IEquate::TPtr equate) + : TBase(TGenericHash{hash}, TGenericEquals{equate}) +{ + TBase::Deserialize(serialized); +} + +TUnboxedValue TTopFreqGeneric::Serialize(const IValueBuilder* builder) { + return TBase::Serialize(builder); +} + +TUnboxedValue TTopFreqGeneric::Get(const IValueBuilder* builder, ui32 resultSize) { + return TBase::Get(builder, resultSize); +} + +void TTopFreqGeneric::AddValue(const TUnboxedValuePod& value) { + TBase::AddValue(value); +} + diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.h b/yql/essentials/udfs/common/topfreq/static/topfreq.h new file mode 100644 index 00000000000..b10574f33f6 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/static/topfreq.h @@ -0,0 +1,97 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_allocator.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_type_ops.h> + +#include <unordered_map> + +template <typename THash, typename TEquals> +class TTopFreqBase { +protected: + using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; + using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; + using IValueBuilder = NKikimr::NUdf::IValueBuilder; + + using TVectorElement = std::pair<TUnboxedValue, ui64>; + using TVectorType = std::vector<TVectorElement, NKikimr::NUdf::TStdAllocatorForUdf<TVectorElement>>; + + TVectorType Freqs_; + std::unordered_map<TUnboxedValue, ui32, THash, TEquals, NKikimr::NUdf::TStdAllocatorForUdf<std::pair<const TUnboxedValue, ui32>>> Indices_; + ui32 MinSize_ = 0; + ui32 MaxSize_ = 0; + + void Add(const TTopFreqBase& otherCalc); + void Update(const TUnboxedValuePod& key, const ui64 value); + void TryCompress(); + void Compress(ui32 newSize, bool sort = false); + TUnboxedValue Convert(const IValueBuilder* valueBuilder) const; + +protected: + TTopFreqBase(THash hash, TEquals equals); + + void Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize); + void Merge(const TTopFreqBase& TopFreq1, const TTopFreqBase& TopFreq2); + void Deserialize(const TUnboxedValuePod& serialized); + + TUnboxedValue Serialize(const IValueBuilder* builder); + TUnboxedValue Get(const IValueBuilder* builder, ui32 resultSize); + void AddValue(const TUnboxedValuePod& value); +}; + +template <NKikimr::NUdf::EDataSlot Slot> +class TTopFreqData + : public TTopFreqBase< + NKikimr::NUdf::TUnboxedValueHash<Slot>, + NKikimr::NUdf::TUnboxedValueEquals<Slot>> +{ +public: + using TBase = TTopFreqBase< + NKikimr::NUdf::TUnboxedValueHash<Slot>, + NKikimr::NUdf::TUnboxedValueEquals<Slot>>; + + TTopFreqData(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize); + TTopFreqData(const TTopFreqData& topFreq1, const TTopFreqData& topFreq2); + TTopFreqData(const NKikimr::NUdf::TUnboxedValuePod& serialized); + + NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder); + NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize); + void AddValue(const NKikimr::NUdf::TUnboxedValuePod& value); +}; + +struct TGenericHash { + NKikimr::NUdf::IHash::TPtr Hash; + + std::size_t operator()(const NKikimr::NUdf::TUnboxedValuePod& value) const { + return Hash->Hash(value); + } +}; + +struct TGenericEquals { + NKikimr::NUdf::IEquate::TPtr Equate; + + bool operator()( + const NKikimr::NUdf::TUnboxedValuePod& left, + const NKikimr::NUdf::TUnboxedValuePod& right) const + { + return Equate->Equals(left, right); + } +}; + +class TTopFreqGeneric + : public TTopFreqBase<TGenericHash, TGenericEquals> +{ +public: + using TBase = TTopFreqBase<TGenericHash, TGenericEquals>; + + TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize, + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2, + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& serialized, + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + + NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder); + NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize); + void AddValue(const NKikimr::NUdf::TUnboxedValuePod& value); +}; diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h new file mode 100644 index 00000000000..d6df05a048f --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h @@ -0,0 +1,393 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_value_builder.h> +#include <yql/essentials/public/udf/udf_registrator.h> +#include <yql/essentials/public/udf/udf_type_builder.h> +#include <yql/essentials/public/udf/udf_type_inspection.h> +#include <yql/essentials/public/udf/udf_value.h> +#include <yql/essentials/public/udf/udf_types.h> +#include "topfreq.h" +#include <algorithm> +#include <array> + +using namespace NYql; +using namespace NUdf; + +namespace { + extern const char TopFreqResourceNameGeneric[] = "TopFreq.TopFreqResource.Generic"; + class TTopFreqResource: + public TBoxedResource<TTopFreqGeneric, TopFreqResourceNameGeneric> + { + public: + template <typename... Args> + inline TTopFreqResource(Args&&... args) + : TBoxedResource(std::forward<Args>(args)...) + {} + }; + + template <EDataSlot Slot> + class TTopFreqResourceData; + + template <EDataSlot Slot> + TTopFreqResourceData<Slot>* GetTopFreqResourceData(const TUnboxedValuePod& arg) { + TTopFreqResourceData<Slot>::Validate(arg); + return static_cast<TTopFreqResourceData<Slot>*>(arg.AsBoxed().Get()); + } + + TTopFreqResource* GetTopFreqResource(const TUnboxedValuePod& arg) { + TTopFreqResource::Validate(arg); + return static_cast<TTopFreqResource*>(arg.AsBoxed().Get()); + } + + + template <EDataSlot Slot> + class TTopFreq_CreateData: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + ui32 minSize = args[1].Get<ui32>(); + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0], minSize, minSize * 2)); + } + }; + + class TTopFreq_Create: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + ui32 minSize = args[1].Get<ui32>(); + return TUnboxedValuePod(new TTopFreqResource(args[0], minSize, minSize * 2, Hash_, Equate_)); + } + + public: + TTopFreq_Create(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + {} + + private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; + }; + + template <EDataSlot Slot> + class TTopFreq_AddValueData: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq = GetTopFreqResourceData<Slot>(args[0]); + topFreq->Get()->AddValue(args[1]); + return TUnboxedValuePod(topFreq); + } + }; + + class TTopFreq_AddValue: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq = GetTopFreqResource(args[0]); + topFreq->Get()->AddValue(args[1]); + return TUnboxedValuePod(topFreq); + } + }; + + template <EDataSlot Slot> + class TTopFreq_SerializeData: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder); + } + }; + + class TTopFreq_Serialize: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResource(args[0])->Get()->Serialize(valueBuilder); + } + }; + + template <EDataSlot Slot> + class TTopFreq_DeserializeData: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0])); + } + }; + + class TTopFreq_Deserialize: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + return TUnboxedValuePod(new TTopFreqResource(args[0], Hash_, Equate_)); + } + + public: + TTopFreq_Deserialize(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + {} + + private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; + }; + + template <EDataSlot Slot> + class TTopFreq_MergeData: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq0 = GetTopFreqResourceData<Slot>(args[0]); + const auto topFreq1 = GetTopFreqResourceData<Slot>(args[1]); + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(*topFreq0->Get(), *topFreq1->Get())); + } + }; + + class TTopFreq_Merge: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq0 = GetTopFreqResource(args[0]); + const auto topFreq1 = GetTopFreqResource(args[1]); + return TUnboxedValuePod(new TTopFreqResource(*topFreq0->Get(), *topFreq1->Get(), Hash_, Equate_)); + } + + public: + TTopFreq_Merge(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + {} + + private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; + }; + + template <EDataSlot Slot> + class TTopFreq_GetData: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResourceData<Slot>(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); + } + }; + + class TTopFreq_Get: public TBoxedValue { + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResource(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); + } + }; + + +#define MAKE_RESOURCE(slot, ...) \ + extern const char TopFreqResourceName##slot[] = "TopFreq.TopFreqResource."#slot; \ + template <> \ + class TTopFreqResourceData<EDataSlot::slot>: \ + public TBoxedResource<TTopFreqData<EDataSlot::slot>, TopFreqResourceName##slot> \ + { \ + public: \ + template <typename... Args> \ + inline TTopFreqResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + {} \ + }; + + UDF_TYPE_ID_MAP(MAKE_RESOURCE) + +#define MAKE_IMPL(operation, slot) \ + case EDataSlot::slot: \ + builder.Implementation(new operation<EDataSlot::slot>); \ + break; + +#define MAKE_CREATE(slot, ...) MAKE_IMPL(TTopFreq_CreateData, slot) +#define MAKE_ADD_VALUE(slot, ...) MAKE_IMPL(TTopFreq_AddValueData, slot) +#define MAKE_SERIALIZE(slot, ...) MAKE_IMPL(TTopFreq_SerializeData, slot) +#define MAKE_DESERIALIZE(slot, ...) MAKE_IMPL(TTopFreq_DeserializeData, slot) +#define MAKE_MERGE(slot, ...) MAKE_IMPL(TTopFreq_MergeData, slot) +#define MAKE_GET(slot, ...) MAKE_IMPL(TTopFreq_GetData, slot) + +#define MAKE_TYPE(slot, ...) \ + case EDataSlot::slot: \ + topFreqType = builder.Resource(TopFreqResourceName##slot); \ + break; + + + static const auto CreateName = TStringRef::Of("TopFreq_Create"); + static const auto AddValueName = TStringRef::Of("TopFreq_AddValue"); + static const auto SerializeName = TStringRef::Of("TopFreq_Serialize"); + static const auto DeserializeName = TStringRef::Of("TopFreq_Deserialize"); + static const auto MergeName = TStringRef::Of("TopFreq_Merge"); + static const auto GetName = TStringRef::Of("TopFreq_Get"); + + class TTopFreqModule: public IUdfModule { + public: + TStringRef Name() const { + return TStringRef::Of("TopFreq"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(AddValueName)->SetTypeAwareness(); + sink.Add(SerializeName)->SetTypeAwareness(); + sink.Add(DeserializeName)->SetTypeAwareness(); + sink.Add(MergeName)->SetTypeAwareness(); + sink.Add(GetName)->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + try { + const bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); + + auto typeHelper = builder.TypeInfoHelper(); + + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } + + bool isGeneric = false; + IHash::TPtr hash; + IEquate::TPtr equate; + TMaybe<EDataSlot> slot; + + auto valueType = userTypeInspector.GetElementType(2); + auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType); + if (!valueTypeInspector) { + isGeneric = true; + hash = builder.MakeHash(valueType); + equate = builder.MakeEquate(valueType); + if (!hash || !equate) { + return; + } + } else { + slot = FindDataSlot(valueTypeInspector.GetTypeId()); + if (!slot) { + builder.SetError("Unknown data type"); + return; + } + const auto& features = NUdf::GetDataTypeInfo(*slot).Features; + if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) { + builder.SetError("Data type is not hashable or equatable"); + return; + } + } + + auto serializedItemType = builder.Tuple()->Add<ui64>().Add(valueType).Build(); + auto serializedListType = builder.List()->Item(serializedItemType).Build(); + auto serializedType = builder.Tuple()->Add<ui32>().Add<ui32>().Add(serializedListType).Build(); + + TType* topFreqType = nullptr; + if (isGeneric) { + topFreqType = builder.Resource(TopFreqResourceNameGeneric); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_TYPE) + } + } + + if (name == CreateName) { + builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topFreqType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreq_Create(hash, equate)); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_CREATE) + } + } + } + builder.IsStrict(); + } + + if (name == AddValueName) { + builder.Args()->Add(topFreqType).Add(valueType).Done().Returns(topFreqType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreq_AddValue); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) + } + } + } + builder.IsStrict(); + } + + if (name == MergeName) { + builder.Args()->Add(topFreqType).Add(topFreqType).Done().Returns(topFreqType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreq_Merge(hash, equate)); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_MERGE) + } + } + } + builder.IsStrict(); + } + + if (name == SerializeName) { + builder.Args()->Add(topFreqType).Done().Returns(serializedType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreq_Serialize); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_SERIALIZE) + } + } + } + builder.IsStrict(); + } + + if (name == DeserializeName) { + builder.Args()->Add(serializedType).Done().Returns(topFreqType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreq_Deserialize(hash, equate)); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) + } + } + } + } + + if (name == GetName) { + ui32 indexF, indexV; + auto itemType = builder.Struct()->AddField<ui64>("Frequency", &indexF).AddField("Value", valueType, &indexV).Build(); + auto resultType = builder.List()->Item(itemType).Build(); + + builder.Args()->Add(topFreqType).Add<ui32>().Done().Returns(resultType); + + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreq_Get); + } else { + switch (*slot) { + UDF_TYPE_ID_MAP(MAKE_GET) + } + } + } + builder.IsStrict(); + } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } + }; + +} // namespace diff --git a/yql/essentials/udfs/common/topfreq/static/ya.make b/yql/essentials/udfs/common/topfreq/static/ya.make new file mode 100644 index 00000000000..95838f33c49 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/static/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 28 + 0 +) + +SRCS( + static_udf.cpp + topfreq.cpp +) + +PEERDIR( + yql/essentials/public/udf +) + +END() diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/result.json b/yql/essentials/udfs/common/topfreq/test/canondata/result.json new file mode 100644 index 00000000000..db452a16ce2 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/canondata/result.json @@ -0,0 +1,27 @@ +{ + "test.test[Floats]": [ + { + "uri": "file://test.test_Floats_/results.txt" + } + ], + "test.test[Mode]": [ + { + "uri": "file://test.test_Mode_/results.txt" + } + ], + "test.test[TopFreqStruct]": [ + { + "uri": "file://test.test_TopFreqStruct_/results.txt" + } + ], + "test.test[TopFreqTuple]": [ + { + "uri": "file://test.test_TopFreqTuple_/results.txt" + } + ], + "test.test[TopFreq]": [ + { + "uri": "file://test.test_TopFreq_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt new file mode 100644 index 00000000000..8eac384cff2 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Floats_/results.txt @@ -0,0 +1,55 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "Value"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + "2"; + "-0" + ]; + [ + "2"; + "nan" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt new file mode 100644 index 00000000000..9cd67bf0f96 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_Mode_/results.txt @@ -0,0 +1,68 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "Value"; + [ + "DataType"; + "Int32" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "abc"; + [ + [ + "3"; + "23" + ] + ] + ]; + [ + "bbb"; + [ + [ + "3"; + "37" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt new file mode 100644 index 00000000000..e1cf8558d68 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqStruct_/results.txt @@ -0,0 +1,103 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "Value"; + [ + "StructType"; + [ + [ + "k"; + [ + "DataType"; + "String" + ] + ]; + [ + "v"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + [ + "4"; + [ + "101"; + "1" + ] + ]; + [ + "2"; + [ + "103"; + "1" + ] + ]; + [ + "2"; + [ + "104"; + "1" + ] + ] + ] + ]; + [ + "2"; + [ + [ + "4"; + [ + "037"; + "2" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt new file mode 100644 index 00000000000..c131783fc9d --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreqTuple_/results.txt @@ -0,0 +1,97 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "Value"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "1"; + [ + [ + "4"; + [ + "101"; + "1" + ] + ]; + [ + "2"; + [ + "103"; + "1" + ] + ]; + [ + "2"; + [ + "104"; + "1" + ] + ] + ] + ]; + [ + "2"; + [ + [ + "4"; + [ + "037"; + "2" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt new file mode 100644 index 00000000000..c666b42f01d --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/canondata/test.test_TopFreq_/results.txt @@ -0,0 +1,83 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "key"; + [ + "OptionalType"; + [ + "DataType"; + "Uint32" + ] + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "StructType"; + [ + [ + "Frequency"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "Value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "1" + ]; + [ + [ + "4"; + "101" + ]; + [ + "2"; + "103" + ]; + [ + "2"; + "104" + ] + ] + ]; + [ + [ + "2" + ]; + [ + [ + "4"; + "037" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Floats.in b/yql/essentials/udfs/common/topfreq/test/cases/Floats.in new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/Floats.in diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Floats.sql b/yql/essentials/udfs/common/topfreq/test/cases/Floats.sql new file mode 100644 index 00000000000..3c2515dc7ee --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/Floats.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +select topfreq(x,10,10) from ( +select frombytes(tobytes(-0.0),Double) as x +union all +select frombytes(tobytes(+0.0),Double) as x +union all +select Double("nan") as x +union all +select Double("nan") as x +) diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Mode.in b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in new file mode 100644 index 00000000000..6cd4d921916 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in @@ -0,0 +1,8 @@ +{"key"="075";"subkey"="1";"value"="abc"}; +{"key"="023";"subkey"="2";"value"="abc"}; +{"key"="023";"subkey"="3";"value"="abc"}; +{"key"="023";"subkey"="4";"value"="abc"}; +{"key"="037";"subkey"="5";"value"="bbb"}; +{"key"="037";"subkey"="6";"value"="bbb"}; +{"key"="200";"subkey"="7";"value"="bbb"}; +{"key"="037";"subkey"="8";"value"="bbb"}; diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/Mode.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/topfreq/test/cases/Mode.sql b/yql/essentials/udfs/common/topfreq/test/cases/Mode.sql new file mode 100644 index 00000000000..da1ade32e8d --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/Mode.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, Mode(value) +FROM +(SELECT + cast (key as Int32) as value, + "" as subkey, + value as key +FROM Input) +AS tmp +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in new file mode 100644 index 00000000000..b4c81f94c8b --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in @@ -0,0 +1,16 @@ +{"key"="101";"subkey"="1";"value"="1"}; +{"key"="101";"subkey"="2";"value"="1"}; +{"key"="101";"subkey"="3";"value"="1"}; +{"key"="101";"subkey"="4";"value"="1"}; +{"key"="103";"subkey"="1";"value"="1"}; +{"key"="103";"subkey"="2";"value"="1"}; +{"key"="104";"subkey"="3";"value"="1"}; +{"key"="104";"subkey"="4";"value"="1"}; +{"key"="102";"subkey"="1";"value"="1"}; +{"key"="105";"subkey"="2";"value"="1"}; +{"key"="106";"subkey"="3";"value"="1"}; +{"key"="107";"subkey"="4";"value"="1"}; +{"key"="037";"subkey"="5";"value"="2"}; +{"key"="037";"subkey"="6";"value"="2"}; +{"key"="037";"subkey"="7";"value"="2"}; +{"key"="037";"subkey"="8";"value"="2"}; diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql new file mode 100644 index 00000000000..79c412cc875 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreq.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +USE plato; + +SELECT + key, TopFreq(value, 3, 5u) +FROM +(SELECT + key as value, + "" as subkey, + cast (value as Uint32) as key +FROM Input) +AS tmp +GROUP BY key +ORDER BY key diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in new file mode 100644 index 00000000000..b4c81f94c8b --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in @@ -0,0 +1,16 @@ +{"key"="101";"subkey"="1";"value"="1"}; +{"key"="101";"subkey"="2";"value"="1"}; +{"key"="101";"subkey"="3";"value"="1"}; +{"key"="101";"subkey"="4";"value"="1"}; +{"key"="103";"subkey"="1";"value"="1"}; +{"key"="103";"subkey"="2";"value"="1"}; +{"key"="104";"subkey"="3";"value"="1"}; +{"key"="104";"subkey"="4";"value"="1"}; +{"key"="102";"subkey"="1";"value"="1"}; +{"key"="105";"subkey"="2";"value"="1"}; +{"key"="106";"subkey"="3";"value"="1"}; +{"key"="107";"subkey"="4";"value"="1"}; +{"key"="037";"subkey"="5";"value"="2"}; +{"key"="037";"subkey"="6";"value"="2"}; +{"key"="037";"subkey"="7";"value"="2"}; +{"key"="037";"subkey"="8";"value"="2"}; diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql new file mode 100644 index 00000000000..b188ce2e5d6 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqStruct.sql @@ -0,0 +1,8 @@ +/* syntax version 1 */ +USE plato; + +SELECT + value, TopFreq(AsStruct(key as k, value as v), 3, 5u) +FROM Input +GROUP BY value +ORDER BY value
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in new file mode 100644 index 00000000000..b4c81f94c8b --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in @@ -0,0 +1,16 @@ +{"key"="101";"subkey"="1";"value"="1"}; +{"key"="101";"subkey"="2";"value"="1"}; +{"key"="101";"subkey"="3";"value"="1"}; +{"key"="101";"subkey"="4";"value"="1"}; +{"key"="103";"subkey"="1";"value"="1"}; +{"key"="103";"subkey"="2";"value"="1"}; +{"key"="104";"subkey"="3";"value"="1"}; +{"key"="104";"subkey"="4";"value"="1"}; +{"key"="102";"subkey"="1";"value"="1"}; +{"key"="105";"subkey"="2";"value"="1"}; +{"key"="106";"subkey"="3";"value"="1"}; +{"key"="107";"subkey"="4";"value"="1"}; +{"key"="037";"subkey"="5";"value"="2"}; +{"key"="037";"subkey"="6";"value"="2"}; +{"key"="037";"subkey"="7";"value"="2"}; +{"key"="037";"subkey"="8";"value"="2"}; diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr new file mode 100644 index 00000000000..2a151e9c475 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.in.attr @@ -0,0 +1,30 @@ +{ + "_yql_row_spec" = { + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ] + } +} diff --git a/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql new file mode 100644 index 00000000000..72dd648f0b1 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/cases/TopFreqTuple.sql @@ -0,0 +1,8 @@ +/* syntax version 1 */ +USE plato; + +SELECT + value, TopFreq(AsTuple(key, value), 3, 5u) +FROM Input +GROUP BY value +ORDER BY value
\ No newline at end of file diff --git a/yql/essentials/udfs/common/topfreq/test/ya.make b/yql/essentials/udfs/common/topfreq/test/ya.make new file mode 100644 index 00000000000..1ba3eac0d98 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/topfreq) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/topfreq/topfreq_udf.cpp b/yql/essentials/udfs/common/topfreq/topfreq_udf.cpp new file mode 100644 index 00000000000..7107f2bd006 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/topfreq_udf.cpp @@ -0,0 +1,3 @@ +#include "static/topfreq_udf.h" + +REGISTER_MODULES(TTopFreqModule) diff --git a/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp new file mode 100644 index 00000000000..9ce7b8561fb --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp @@ -0,0 +1,451 @@ +#include <util/random/shuffle.h> +#include <library/cpp/testing/unittest/registar.h> +#include <yql/essentials/minikql/mkql_function_registry.h> +#include <yql/essentials/minikql/mkql_program_builder.h> +#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h> +#include <yql/essentials/minikql/computation/mkql_computation_node.h> +#include <yql/essentials/minikql/comp_nodes/mkql_factories.h> +#include <util/random/random.h> +#include <array> +#include <yql/essentials/udfs/common/topfreq/static/topfreq_udf.h> + +namespace NYql { + using namespace NKikimr::NMiniKQL; + namespace NUdf { + extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule(); + } + + class TSetup { + public: + TSetup() + : MutableFunctionRegistry_(CreateFunctionRegistry(CreateBuiltinRegistry())->Clone()) + , RandomProvider_(CreateDeterministicRandomProvider(1)) + , TimeProvider_(CreateDeterministicTimeProvider(10000000)) + , Alloc_(__LOCATION__) + , Env_(Alloc_) + { + MutableFunctionRegistry_->AddModule("", "TopFreq", NUdf::CreateTopFreqModule()); + PgmBuidler_.Reset(new TProgramBuilder(Env_, *MutableFunctionRegistry_)); + } + + TProgramBuilder& GetProgramBuilder() { + return *PgmBuidler_.Get(); + } + + NUdf::TUnboxedValue GetValue(TRuntimeNode& node) { + Explorer_.Walk(node.GetNode(), Env_); + + TComputationPatternOpts opts(Alloc_.Ref(), Env_, GetBuiltinFactory(), + MutableFunctionRegistry_.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + Pattern_ = MakeComputationPattern(Explorer_, node, {}, opts); + Graph_ = Pattern_->Clone(opts.ToComputationOptions(*RandomProvider_, *TimeProvider_)); + + return Graph_->GetValue(); + } + + private: + using IMutableFunctionRegistryPtr = TIntrusivePtr<IMutableFunctionRegistry>; + using IRandomProviderPtr = TIntrusivePtr<IRandomProvider>; + using ITimeProviderPtr = TIntrusivePtr<ITimeProvider>; + + IMutableFunctionRegistryPtr MutableFunctionRegistry_; + IRandomProviderPtr RandomProvider_; + ITimeProviderPtr TimeProvider_; + TScopedAlloc Alloc_; + TTypeEnvironment Env_; + THolder<TProgramBuilder> PgmBuidler_; + IComputationPattern::TPtr Pattern_; + THolder<IComputationGraph> Graph_; + TExploringNodeVisitor Explorer_; + }; + + Y_UNIT_TEST_SUITE(TUDFTopFreqTest) { + Y_UNIT_TEST(SimpleTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<i32>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Int32"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + TRuntimeNode pgmTopFreq; + { + auto val = pgmBuilder.NewDataLiteral<i32>(3); + auto param = pgmBuilder.NewDataLiteral<ui32>(10); + + TVector<TRuntimeNode> params = {val, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 9; n++) { + auto value = pgmBuilder.NewDataLiteral<i32>(1); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 7; n++) { + auto value = pgmBuilder.NewDataLiteral<i32>(4); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(4); + TVector<TRuntimeNode> params = {pgmTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 1); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 4); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 7); + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 3); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 1); + + UNIT_ASSERT(!listIterator.Next(item)); + } + + Y_UNIT_TEST(MergingTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); + auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + TRuntimeNode pgmTopFreq; + { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 1; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 4; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(5); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 1; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(3); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmTopFreq2; + { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 5; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + TVector<TRuntimeNode> params = {pgmTopFreq2, value}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 5; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(5); + TVector<TRuntimeNode> params = {pgmTopFreq2, value}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmTopFreq3; + { + TVector<TRuntimeNode> params = {pgmTopFreq, pgmTopFreq2}; + pgmTopFreq3 = pgmBuilder.Apply(udfTopFreq_Merge, params); + } + + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {pgmTopFreq3, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<ui64>(), 5); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); + + UNIT_ASSERT(!listIterator.Next(item)); + } + + Y_UNIT_TEST(SerializedTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<bool>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Bool"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); + auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); + + auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, + pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); + + auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); + auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); + + TRuntimeNode pgmTopFreq; + { + auto value = pgmBuilder.NewDataLiteral<bool>(true); + auto param = pgmBuilder.NewDataLiteral<ui32>(10); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 7; n++) { + auto value = pgmBuilder.NewDataLiteral<bool>(true); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 10; n++) { + auto value = pgmBuilder.NewDataLiteral<bool>(false); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmSerializedTopFreq; + { + TVector<TRuntimeNode> params = {pgmTopFreq}; + pgmSerializedTopFreq = pgmBuilder.Apply(udfTopFreq_Serialize, params); + } + + TRuntimeNode pgmDeserializedTopFreq; + { + TVector<TRuntimeNode> params = {pgmSerializedTopFreq}; + pgmDeserializedTopFreq = pgmBuilder.Apply(udfTopFreq_Deserialize, params); + } + + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(3); + TVector<TRuntimeNode> params = {pgmDeserializedTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), false); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 10); + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), true); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 8); + + UNIT_ASSERT(!listIterator.Next(item)); + } + + Y_UNIT_TEST(ApproxTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); + auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); + auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); + + auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, + pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); + + auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); + auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); + + static const ui64 BigNum = 20; + static const ui64 BigEach = 5000; + static const ui64 SmallNum = 500; + static const ui64 SmallEach = 20; + static const ui64 Total = BigNum * BigEach + SmallNum * SmallEach; + static const i32 AskFor = 25; + static const ui64 BlockSize = 200; + static const ui64 BlockCount = 10; + static const i32 WorksIfAtLeast = 15; + + std::array<ui64, Total> values; + std::array<TRuntimeNode, BlockCount> pgmTopFreqs; + + i32 curIndex = 0; + for (ui64 i = 1; i <= BigNum; i++) { + for (ui64 j = 0; j < BigEach; j++) { + values[curIndex++] = i; + } + } + + for (ui64 i = BigNum + 1; i <= BigNum + SmallNum; i++) { + for (ui64 j = 0; j < SmallEach; j++) { + values[curIndex++] = i; + } + } + + Shuffle(values.begin(), values.end()); + + TVector<TRuntimeNode> params; + TRuntimeNode param; + TRuntimeNode pgmvalue; + + for (ui64 i = 0; i < BlockCount; i++) { + { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[i * BlockSize]); + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmvalue, param}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (ui64 j = i * BlockSize + 1; j < (i + 1) * BlockSize; j++) { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[j]); + params = {pgmTopFreqs[i], pgmvalue}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + { + params = {pgmTopFreqs[i]}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Serialize, params); + } + } + + TRuntimeNode pgmMainTopFreq; + { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(Total + 2); + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmvalue, param}; + pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (ui64 i = 0; i < BlockCount; i++) { + params = {pgmTopFreqs[i]}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Deserialize, params); + + params = {pgmMainTopFreq, pgmTopFreqs[i]}; + pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Merge, params); + } + + TRuntimeNode pgmReturn; + { + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmMainTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + ui32 found = 0; + + for (ui64 i = 0; i < AskFor; i++) { + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + ui64 current = item.GetElement(1).Get<ui64>(); + if (current <= BigNum) + found++; + } + + UNIT_ASSERT(!listIterator.Skip()); + UNIT_ASSERT(found >= WorksIfAtLeast); + } + } +} diff --git a/yql/essentials/udfs/common/topfreq/ut/ya.make b/yql/essentials/udfs/common/topfreq/ut/ya.make new file mode 100644 index 00000000000..702e9a7214a --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/ut/ya.make @@ -0,0 +1,15 @@ +UNITTEST_FOR(yql/essentials/udfs/common/topfreq/static) + +SRCS( + ../topfreq_udf_ut.cpp +) + +PEERDIR( + yql/essentials/minikql/comp_nodes/llvm14 + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql/pg_dummy +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/yql/essentials/udfs/common/topfreq/ya.make b/yql/essentials/udfs/common/topfreq/ya.make new file mode 100644 index 00000000000..44ec1309ed3 --- /dev/null +++ b/yql/essentials/udfs/common/topfreq/ya.make @@ -0,0 +1,32 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319906760 OUT_NOAUTO libtopfreq_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(topfreq_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + topfreq_udf.cpp + ) + + PEERDIR( + yql/essentials/udfs/common/topfreq/static + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test + ut +) + + diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp new file mode 100644 index 00000000000..3e90765e405 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp @@ -0,0 +1 @@ +#include "unicode_base_udf.h"
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h new file mode 100644 index 00000000000..6cbf4b493ad --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -0,0 +1,534 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_allocator.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/utils/utf8.h> + +#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h> +#include <library/cpp/unicode/normalization/normalization.h> +#include <library/cpp/unicode/set/unicode_set.h> + +#include <library/cpp/deprecated/split/split_iterator.h> +#include <util/string/join.h> +#include <util/string/reverse.h> +#include <util/string/split.h> +#include <util/string/subst.h> +#include <util/charset/wide.h> +#include <util/charset/utf8.h> +#include <util/string/strip.h> +#include <util/string/ascii.h> +#include <util/charset/unidata.h> + +using namespace NYql; +using namespace NUdf; +using namespace NUnicode; + +namespace { + + template <class It> + struct TIsUnicodeSpaceAdapter { + bool operator()(const It& it) const noexcept { + return IsSpace(*it); + } + }; + + template <class It> + TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) { + return {}; + } + +#define NORMALIZE_UDF_MAP(XX) \ + XX(Normalize, NFC) \ + XX(NormalizeNFD, NFD) \ + XX(NormalizeNFC, NFC) \ + XX(NormalizeNFKD, NFKD) \ + XX(NormalizeNFKC, NFKC) + +#define IS_CATEGORY_UDF_MAP(XX) \ + XX(IsAscii, IsAscii) \ + XX(IsSpace, IsSpace) \ + XX(IsUpper, IsUpper) \ + XX(IsLower, IsLower) \ + XX(IsDigit, IsDigit) \ + XX(IsAlpha, IsAlpha) \ + XX(IsAlnum, IsAlnum) \ + XX(IsHex, IsHexdigit) + +#define NORMALIZE_UDF(name, mode) \ + SIMPLE_UDF(T##name, TUtf8(TAutoMap<TUtf8>)) { \ + const auto& inputRef = args[0].AsStringRef(); \ + const TUtf16String& input = UTF8ToWide(inputRef.Data(), inputRef.Size()); \ + const TString& output = WideToUTF8(Normalize<mode>(input)); \ + return valueBuilder->NewString(output); \ + } + +#define IS_CATEGORY_UDF(udfName, function) \ + SIMPLE_UDF(T##udfName, bool(TAutoMap<TUtf8>)) { \ + Y_UNUSED(valueBuilder); \ + const TStringBuf input(args[0].AsStringRef()); \ + bool result = true; \ + wchar32 rune; \ + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); \ + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); \ + while (cur != last) { \ + ReadUTF8CharAndAdvance(rune, cur, last); \ + if (!function(rune)) { \ + result = false; \ + break; \ + } \ + } \ + return TUnboxedValuePod(result); \ + } + + NORMALIZE_UDF_MAP(NORMALIZE_UDF) + IS_CATEGORY_UDF_MAP(IS_CATEGORY_UDF) + + SIMPLE_UDF(TIsUtf, bool(TOptional<char*>)) { + Y_UNUSED(valueBuilder); + if (args[0]) { + return TUnboxedValuePod(IsUtf8(args[0].AsStringRef())); + } else { + return TUnboxedValuePod(false); + } + } + + SIMPLE_UDF(TGetLength, ui64(TAutoMap<TUtf8>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + size_t result; + GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); + return TUnboxedValuePod(static_cast<ui64>(result)); + } + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TToUint64, ui64(TAutoMap<TUtf8>, TOptional<ui16>), 1) { + Y_UNUSED(valueBuilder); + const TString inputStr(args[0].AsStringRef()); + const char* input = inputStr.data(); + const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0)); + char* pos = nullptr; + unsigned long long res = std::strtoull(input, &pos, base); + ui64 ret = static_cast<ui64>(res); + if (!res && pos == input) { + UdfTerminate("Input string is not a number"); + } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { + UdfTerminate("Converted value falls out of Uint64 range"); + } else if (*pos) { + UdfTerminate("Input string contains junk after the number"); + } + return TUnboxedValuePod(ret); + } + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTryToUint64, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), 1) { + Y_UNUSED(valueBuilder); + const TString inputStr(args[0].AsStringRef()); + const char* input = inputStr.data(); + const int base = static_cast<int>(args[1].GetOrDefault<ui16>(0)); + char* pos = nullptr; + unsigned long long res = std::strtoull(input, &pos, base); + ui64 ret = static_cast<ui64>(res); + if (!res && pos == input) { + return TUnboxedValuePod(); + } + if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { + return TUnboxedValuePod(); + } + if (*pos) { + return TUnboxedValuePod(); + } + return TUnboxedValuePod(ret); + } + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSubstring, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), 1) { + const TStringBuf input(args[0].AsStringRef()); + size_t from = args[1].GetOrDefault<ui64>(0); + size_t len = !args[2] ? TStringBuf::npos : size_t(args[2].Get<ui64>()); + return valueBuilder->NewString(SubstrUTF8(input, from, len)); + } + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TFind, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const std::string_view string(args[0].AsStringRef()); + const std::string_view needle(args[1].AsStringRef()); + std::string_view::size_type pos = 0U; + + if (auto p = args[2].GetOrDefault<ui64>(0ULL)) { + for (auto ptr = string.data(); p && pos < string.size(); --p) { + const auto width = WideCharSize(*ptr); + pos += width; + ptr += width; + } + } + + if (const auto find = string.find(needle, pos); std::string_view::npos != find) { + size_t result; + GetNumberOfUTF8Chars(string.data(), find, result); + return TUnboxedValuePod(static_cast<ui64>(result)); + } + return TUnboxedValuePod(); + } + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TRFind, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const std::string_view string(args[0].AsStringRef()); + const std::string_view needle(args[1].AsStringRef()); + std::string_view::size_type pos = std::string_view::npos; + + if (auto p = args[2].GetOrDefault<ui64>(std::string_view::npos); std::string_view::npos != p) { + pos = 0ULL; + for (auto ptr = string.data(); p && pos < string.size(); --p) { + const auto width = WideCharSize(*ptr); + pos += width; + ptr += width; + } + } + + if (const auto find = string.rfind(needle, pos); std::string_view::npos != find) { + size_t result; + GetNumberOfUTF8Chars(string.data(), find, result); + return TUnboxedValuePod(static_cast<ui64>(result)); + } + return TUnboxedValuePod(); + } + + using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; + + template <typename TIt> + static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + const TIt& it, + TTmpVector& result) { + for (const auto& elem : it) { + result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); + } + } + + template <typename TIt> + static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const TUtf16String::const_iterator start, + const TIt& it, + TTmpVector& result) { + const std::string_view& original = input.AsStringRef(); + size_t charPos = 0U, bytePos = 0U; + for (const auto& elem : it) { + for (const size_t next = std::distance(start, elem.TokenStart()); charPos < next; ++charPos) + bytePos += WideCharSize(original[bytePos]); + const auto from = bytePos; + + for (const size_t next = charPos + std::distance(elem.TokenStart(), elem.TokenDelim()); charPos < next; ++charPos) + bytePos += WideCharSize(original[bytePos]); + const auto size = bytePos - from; + result.emplace_back(valueBuilder->SubString(input, from, size)); + } + } + + template <typename TIt, typename TStrIt> + static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const TStrIt from, + TIt& it, + bool skipEmpty, + TTmpVector& result) { + if (skipEmpty) { + SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); + } else { + SplitToListImpl(valueBuilder, input, from, it, result); + } + } + + constexpr char delimeterStringName[] = "DelimeterString"; + constexpr char skipEmptyName[] = "SkipEmpty"; + constexpr char limitName[] = "Limit"; + using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; + using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; + using TLimitArg = TNamedArg<ui64, limitName>; + + SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<TUtf8>( + TOptional<TUtf8>, + TUtf8, + TDelimeterStringArg, + TSkipEmptyArg, + TLimitArg + ), + 3) { + TTmpVector result; + if (args[0]) { + const bool delimiterString = args[2].GetOrDefault<bool>(true); + const bool skipEmpty = args[3].GetOrDefault<bool>(false); + const auto limit = args[4].GetOrDefault<ui64>(0); + if (delimiterString) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view delimeter(args[1].AsStringRef()); + if (limit) { + auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitByString(delimeter); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } else { + const auto& input = UTF8ToWide(args[0].AsStringRef()); + const auto& delimeter = UTF8ToWide(args[1].AsStringRef()); + if (limit) { + auto it = StringSplitter(input).SplitBySet(delimeter.c_str()).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitBySet(delimeter.c_str()); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } + } + return valueBuilder->NewList(result.data(), result.size()); + } + + SIMPLE_UDF(TJoinFromList, TUtf8(TAutoMap<TListType<TOptional<TUtf8>>>, TUtf8)) { + const auto input = args[0].GetListIterator(); + const std::string_view delimeter(args[1].AsStringRef()); + std::vector<TString> items; + + for (TUnboxedValue current; input.Next(current);) { + if (current) { + items.emplace_back(current.AsStringRef()); + } + } + + return valueBuilder->NewString(JoinSeq(delimeter, items)); + } + + SIMPLE_UDF(TLevensteinDistance, ui64(TAutoMap<TUtf8>, TAutoMap<TUtf8>)) { + Y_UNUSED(valueBuilder); + const TStringBuf left(args[0].AsStringRef()); + const TStringBuf right(args[1].AsStringRef()); + const TUtf16String& leftWide = UTF8ToWide(left); + const TUtf16String& rightWide = UTF8ToWide(right); + const ui64 result = NLevenshtein::Distance(leftWide, rightWide); + return TUnboxedValuePod(result); + } + + SIMPLE_UDF(TReplaceAll, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8)) { + if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) + return valueBuilder->NewString(result); + else + return args[0]; + } + + SIMPLE_UDF(TReplaceFirst, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; + } + + SIMPLE_UDF(TReplaceLast, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; + } + + SIMPLE_UDF(TRemoveAll, TUtf8(TAutoMap<TUtf8>, TUtf8)) { + TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const TUtf32String remove = UTF8ToUTF32<true>(args[1].AsStringRef()); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + size_t tpos = 0; + for (const wchar32 c : input) { + if (!chars.contains(c)) { + input[tpos++] = c; + } + } + if (tpos != input.size()) { + input.resize(tpos); + return valueBuilder->NewString(WideToUTF8(input)); + } + return args[0]; + } + + SIMPLE_UDF(TRemoveFirst, TUtf8(TAutoMap<TUtf8>, TUtf8)) { + TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const TUtf32String remove = UTF8ToUTF32<true>(args[1].AsStringRef()); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars.contains(*it)) { + input.erase(it); + return valueBuilder->NewString(WideToUTF8(input)); + } + } + return args[0]; + } + + SIMPLE_UDF(TRemoveLast, TUtf8(TAutoMap<TUtf8>, TUtf8)) { + TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const TUtf32String remove = UTF8ToUTF32<true>(args[1].AsStringRef()); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars.contains(*it)) { + input.erase(input.crend() - it - 1, 1); + return valueBuilder->NewString(WideToUTF8(input)); + } + } + return args[0]; + } + + SIMPLE_UDF(TToCodePointList, TListType<ui32>(TAutoMap<TUtf8>)) { + size_t codePointCount = 0; + const auto& inputRef = args[0].AsStringRef(); + if (!GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), codePointCount)) { + // should not happen but still we have to check return code + ythrow yexception() << "Unable to count code points"; + } + + TUnboxedValue* itemsPtr = nullptr; + auto result = valueBuilder->NewArray(codePointCount, itemsPtr); + const unsigned char* current = reinterpret_cast<const unsigned char*>(inputRef.Data()); + const unsigned char* end = current + inputRef.Size(); + wchar32 rune = BROKEN_RUNE; + ui32 codePointIndex = 0; + RECODE_RESULT retcode = RECODE_OK; + while (current < end && RECODE_OK == (retcode = ReadUTF8CharAndAdvance(rune, current, end))) { + if (codePointIndex >= codePointCount) { + // sanity check + ythrow yexception() << "Too big code point index " << codePointIndex << ", expecting only " << codePointCount << " code points"; + } + itemsPtr[codePointIndex++] = TUnboxedValuePod(static_cast<ui32>(rune)); + } + + if (retcode != RECODE_OK) { + ythrow yexception() << "Malformed UTF-8 string"; + } + + return result; + } + + SIMPLE_UDF(TFromCodePointList, TUtf8(TAutoMap<TListType<ui32>>)) { + auto input = args[0]; + if (auto elems = input.GetElements()) { + const auto elemCount = input.GetListLength(); + auto bufferSize = WideToUTF8BufferSize(elemCount); + TTempBuf buffer(bufferSize); + auto bufferPtr = buffer.Data(); + auto bufferEnd = buffer.Data() + bufferSize; + for (ui64 i = 0; i != elemCount; ++i) { + const auto& item = elems[i]; + const wchar32 rune = item.Get<ui32>(); + size_t written = 0; + WideToUTF8(&rune, 1, bufferPtr, written); + Y_ENSURE(written <= 4); + bufferPtr += written; + Y_ENSURE(bufferPtr <= bufferEnd); + } + return valueBuilder->NewString(TStringRef(buffer.Data(), bufferPtr - buffer.Data())); + } + + std::vector<char, NUdf::TStdAllocatorForUdf<char>> buffer; + buffer.reserve(TUnboxedValuePod::InternalBufferSize); + + const auto& iter = input.GetListIterator(); + char runeBuffer[4] = {}; + for (NUdf::TUnboxedValue item; iter.Next(item); ) { + const wchar32 rune = item.Get<ui32>(); + size_t written = 0; + WideToUTF8(&rune, 1, runeBuffer, written); + Y_ENSURE(written <= 4); + buffer.insert(buffer.end(), runeBuffer, runeBuffer + written); + } + + return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); + } + + SIMPLE_UDF(TReverse, TUtf8(TAutoMap<TUtf8>)) { + auto wide = UTF8ToWide(args[0].AsStringRef()); + ReverseInPlace(wide); + return valueBuilder->NewString(WideToUTF8(wide)); + } + + SIMPLE_UDF(TToLower, TUtf8(TAutoMap<TUtf8>)) { + if (auto wide = UTF8ToWide(args->AsStringRef()); ToLower(wide)) + return valueBuilder->NewString(WideToUTF8(wide)); + else + return *args; + } + + SIMPLE_UDF(TToUpper, TUtf8(TAutoMap<TUtf8>)) { + if (auto wide = UTF8ToWide(args->AsStringRef()); ToUpper(wide)) + return valueBuilder->NewString(WideToUTF8(wide)); + else + return *args; + } + + SIMPLE_UDF(TToTitle, TUtf8(TAutoMap<TUtf8>)) { + if (auto wide = UTF8ToWide(args->AsStringRef()); ToTitle(wide)) + return valueBuilder->NewString(WideToUTF8(wide)); + else + return *args; + } + + SIMPLE_UDF(TStrip, TUtf8(TAutoMap<TUtf8>)) { + const TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); + return valueBuilder->NewString(WideToUTF8(result)); + } + + SIMPLE_UDF(TIsUnicodeSet, bool(TAutoMap<TUtf8>, TUtf8)) { + Y_UNUSED(valueBuilder); + const TStringBuf input(args[0].AsStringRef()); + const TUtf16String& customCategory = UTF8ToWide(args[1].AsStringRef()); + TUnicodeSet unicodeSet; + try { + unicodeSet.Parse(customCategory); + } catch (...) { + UdfTerminate((TStringBuilder() << "Failed to parse unicode set: " << CurrentExceptionMessage()).c_str()); + } + bool result = true; + wchar32 rune; + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); + while (cur != last) { + ReadUTF8CharAndAdvance(rune, cur, last); + if (!unicodeSet.Has(rune)) { + result = false; + break; + } + } + return TUnboxedValuePod(result); + } + +#define REGISTER_NORMALIZE_UDF(name, mode) T##name, +#define REGISTER_IS_CATEGORY_UDF(name, function) T##name, +#define EXPORTED_UNICODE_BASE_UDF \ + NORMALIZE_UDF_MAP(REGISTER_NORMALIZE_UDF) \ + IS_CATEGORY_UDF_MAP(REGISTER_IS_CATEGORY_UDF) \ + TIsUtf, \ + TGetLength, \ + TSubstring, \ + TFind, \ + TRFind, \ + TSplitToList, \ + TJoinFromList, \ + TLevensteinDistance, \ + TReplaceAll, \ + TReplaceFirst, \ + TReplaceLast, \ + TRemoveAll, \ + TRemoveFirst, \ + TRemoveLast, \ + TToCodePointList, \ + TFromCodePointList, \ + TReverse, \ + TToLower, \ + TToUpper, \ + TToTitle, \ + TToUint64, \ + TTryToUint64, \ + TStrip, \ + TIsUnicodeSet +} diff --git a/yql/essentials/udfs/common/unicode_base/lib/ya.make b/yql/essentials/udfs/common/unicode_base/lib/ya.make new file mode 100644 index 00000000000..f50858d02ae --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/lib/ya.make @@ -0,0 +1,22 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 27 + 0 +) + +SRCS( + unicode_base_udf.cpp +) + +PEERDIR( + library/cpp/deprecated/split + library/cpp/string_utils/levenshtein_diff + library/cpp/unicode/normalization + library/cpp/unicode/set + yql/essentials/public/udf + yql/essentials/utils +) + +END() diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/result.json b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json new file mode 100644 index 00000000000..0b47a674443 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/result.json @@ -0,0 +1,67 @@ +{ + "test.test[Find]": [ + { + "uri": "file://test.test_Find_/results.txt" + } + ], + "test.test[IsCategory]": [ + { + "uri": "file://test.test_IsCategory_/results.txt" + } + ], + "test.test[List]": [ + { + "uri": "file://test.test_List_/results.txt" + } + ], + "test.test[Remove]": [ + { + "uri": "file://test.test_Remove_/results.txt" + } + ], + "test.test[Replace]": [ + { + "uri": "file://test.test_Replace_/results.txt" + } + ], + "test.test[Strip]": [ + { + "uri": "file://test.test_Strip_/results.txt" + } + ], + "test.test[ToUint64F0]": [ + { + "uri": "file://test.test_ToUint64F0_/extracted" + } + ], + "test.test[ToUint64F1]": [ + { + "uri": "file://test.test_ToUint64F1_/extracted" + } + ], + "test.test[ToUint64F2]": [ + { + "uri": "file://test.test_ToUint64F2_/extracted" + } + ], + "test.test[ToUint64]": [ + { + "uri": "file://test.test_ToUint64_/results.txt" + } + ], + "test.test[To]": [ + { + "uri": "file://test.test_To_/results.txt" + } + ], + "test.test[TryToUint64]": [ + { + "uri": "file://test.test_TryToUint64_/results.txt" + } + ], + "test.test[Unicode]": [ + { + "uri": "file://test.test_Unicode_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt new file mode 100644 index 00000000000..bcccb2b5119 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Find_/results.txt @@ -0,0 +1,86 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "\xC3\xA4stig, m\xC3\266chten, ausf\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m\xC3\xB6gen, \xC3\274berall, regelm"; + "\xC3\266chten, ausf\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m"; + "\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m\xC3\xB6gen, " + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k"; + "\xC3\266chten, ausf\xC3\xBChrlich, sp\xC3\xA4ter, k\xC3\xB6nnen, nat\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r\xC3\274ckw\xC3\xA4rts, k\xC3\xA4mpfen, m"; + "\xC3\xBCrlich, universit\xC3\xA4t, \xC3\266ffentlich, r" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt new file mode 100644 index 00000000000..a6fd861c645 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_IsCategory_/results.txt @@ -0,0 +1,164 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column7"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column8"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column9"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column10"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column11"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column12"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column13"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column14"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column15"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column16"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column17"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %false; + %true; + %false; + %true; + %false; + %true; + %false; + %true; + %false; + %true; + %false; + %true; + %false; + %true; + %false; + %true; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt new file mode 100644 index 00000000000..5cf8e238cfa --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_List_/results.txt @@ -0,0 +1,265 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "equals_to_original"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "replace_delimeter"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "just_split"; + [ + "ListType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "first"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "skip_empty"; + [ + "ListType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "multichar_delim_set"; + [ + "ListType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "multichar_delim_string"; + [ + "ListType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "limited"; + [ + "ListType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "a@b@c"; + "a@b@c"; + "a#b#c"; + [ + "a"; + "b"; + "c" + ]; + [ + "a" + ]; + [ + "a"; + "b"; + "c" + ]; + [ + "a"; + ""; + ""; + "c" + ]; + [ + "a@"; + "c" + ]; + [ + "a"; + "b@c" + ] + ]; + [ + "@a@b@c"; + "@a@b@c"; + "#a#b#c"; + [ + ""; + "a"; + "b"; + "c" + ]; + [ + "" + ]; + [ + "a"; + "b"; + "c" + ]; + [ + ""; + "a"; + ""; + ""; + "c" + ]; + [ + "@a@"; + "c" + ]; + [ + ""; + "a@b@c" + ] + ]; + [ + "@@@a@a"; + "@@@a@a"; + "###a#a"; + [ + ""; + ""; + ""; + "a"; + "a" + ]; + [ + "" + ]; + [ + "a"; + "a" + ]; + [ + ""; + ""; + ""; + "a"; + "a" + ]; + [ + "@@@a@a" + ]; + [ + ""; + "@@a@a" + ] + ]; + [ + "d#e#f"; + "d#e#f"; + "d#e#f"; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ]; + [ + "d#e#f" + ] + ]; + [ + "d"; + "d"; + "d"; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ]; + [ + "d" + ] + ]; + [ + ""; + ""; + ""; + [ + "" + ]; + [ + "" + ]; + []; + [ + "" + ]; + [ + "" + ]; + [ + "" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt new file mode 100644 index 00000000000..11bcb15a2f2 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Remove_/results.txt @@ -0,0 +1,178 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "all"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "first"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "last"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "first2"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "last2"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2" + ]; + [ + "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2" + ]; + [ + "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2" + ] + ]; + [ + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B" + ] + ]; + [ + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB2\xD1\x8B\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2" + ]; + [ + "\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2" + ]; + [ + "\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2" + ] + ]; + [ + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD0\xB0\xD1\x8B" + ] + ]; + [ + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt new file mode 100644 index 00000000000..7390dbdbc32 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Replace_/results.txt @@ -0,0 +1,228 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "all"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "first"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "last"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "first2"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "last2"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "first3"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "last3"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2z\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2z" + ]; + [ + "\xD1\x8B\xD0\xB2\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2" + ]; + [ + "\xD1\x8B\xD0\xB2zzz\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2zzz" + ] + ]; + [ + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8Fz\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0z\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8Fzzz\xD0\xB0\xD1\x87\xD1\x8B" + ]; + [ + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0zzz\xD1\x87\xD1\x8B" + ] + ]; + [ + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "z\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2z" + ]; + [ + "\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2" + ]; + [ + "zzz\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0" + ]; + [ + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2zzz" + ] + ]; + [ + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0zzz\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2z\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0z\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2zzz\xD0\xB0\xD1\x84\xD1\x8B" + ]; + [ + "gd2\xD1\x86\xD0\2713\xD1\x8B\xD0\xB2\xD0\xB0zzz\xD1\x84\xD1\x8B" + ] + ]; + [ + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt new file mode 100644 index 00000000000..613b639ed05 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Strip_/results.txt @@ -0,0 +1,76 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"; + "\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD1\x87\xD1\x81\xD1\x8F\xD0\xB0\xD0\xB0\xD1\x87\xD1\x8B"; + "\xD0\xB0\xD0\xB0\xD0\xB2\xD1\x8B\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0\xD1\x8B\xD0\xB2\xD0\xB0"; + "\xD0\xB0\xD0\xB0\xD0\xB2 \xD1\x8B\xD0\xB0 \xD1\x8B\xD0\xB2\xD0\xB0 \xD1\x8B\xD0\xB2\xD0\xB0"; + "\xD1\x8B\xD0\xB2\xD0\xB0"; + "\xD0\xB2\xD0\xB0\xD0\xBE\xD0\xB0\xD0\xBE"; + "" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted new file mode 100644 index 00000000000..d03cedfb327 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F0_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:2:1: Fatal: Input string is not a number + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted new file mode 100644 index 00000000000..3ed803548b5 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F1_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:2:1: Fatal: Input string contains junk after the number + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted new file mode 100644 index 00000000000..c441fbf4e1b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64F2_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:2:1: Fatal: Converted value falls out of Uint64 range + SELECT + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt new file mode 100644 index 00000000000..9334d2f22d7 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_ToUint64_/results.txt @@ -0,0 +1,76 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "305441741"; + "4"; + "420"; + "1052688"; + "42"; + "33288"; + "101" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt new file mode 100644 index 00000000000..7f7b2525d78 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_To_/results.txt @@ -0,0 +1,102 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "lower"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "upper"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "title"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "reverse"; + [ + "DataType"; + "Utf8" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "test"; + "test"; + "TEST"; + "Test"; + "tset" + ]; + [ + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"; + "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"; + "\xD1\x82\xD1\x81\xD0\xB5\xD1\x82" + ]; + [ + "TeSt"; + "test"; + "TEST"; + "Test"; + "tSeT" + ]; + [ + "\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"; + "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"; + "\xD0\xA2\xD0\x95\xD0\xA1\xD0\xA2"; + "\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82"; + "\xD0\xA2\xD1\x81\xD0\x95\xD1\x82" + ]; + [ + "Eyl\xC3\xBCl"; + "eyl\xC3\xBCl"; + "EYL\xC3\x9CL"; + "Eyl\xC3\xBCl"; + "l\xC3\xBClyE" + ]; + [ + "6"; + "6"; + "6"; + "6"; + "6" + ]; + [ + ""; + ""; + ""; + ""; + "" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt new file mode 100644 index 00000000000..594ac1486c0 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_TryToUint64_/results.txt @@ -0,0 +1,198 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "305441741" + ]; + [ + "4" + ]; + [ + "420" + ]; + [ + "1052688" + ]; + [ + "42" + ]; + [ + "101010" + ]; + [ + "101" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt new file mode 100644 index 00000000000..465ad350553 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/canondata/test.test_Unicode_/results.txt @@ -0,0 +1,509 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "normalize"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "is"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "length"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "one_end_substring"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "two_end_substring"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "remove_all"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "levenstein"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "code_point_list"; + [ + "OptionalType"; + [ + "ListType"; + [ + "DataType"; + "Uint32" + ] + ] + ] + ]; + [ + "from_code_point_list"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "from_lazy_code_point_list"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "reverse"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "find"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "rfind"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "find_from"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "rfind_from"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "Eyl\xC3\xBCl" + ]; + [ + "Eyl\xC3\xBCl" + ]; + %true; + [ + "5" + ]; + [ + "yl\xC3\xBCl" + ]; + [ + "Ey" + ]; + [ + "Eyl\xC3\xBCl" + ]; + [ + "5" + ]; + [ + [ + "69"; + "121"; + "108"; + "252"; + "108" + ] + ]; + [ + "Eyl\xC3\xBCl" + ]; + [ + "Eyl\xC3\xBCl" + ]; + [ + "l\xC3\xBClyE" + ]; + #; + #; + #; + # + ]; + [ + [ + "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" + ]; + [ + "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" + ]; + %true; + [ + "6" + ]; + [ + "\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" + ]; + [ + "\xD0\xB6\xD0\xBD" + ]; + [ + "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" + ]; + [ + "5" + ]; + [ + [ + "1078"; + "1085"; + "1110"; + "1118"; + "1085"; + "1103" + ] + ]; + [ + "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" + ]; + [ + "\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F" + ]; + [ + "\xD1\x8F\xD0\xBD\xD1\x9E\xD1\x96\xD0\xBD\xD0\xB6" + ]; + #; + #; + #; + # + ]; + [ + [ + "\xC3\xBAnora" + ]; + [ + "\xC3\xBAnora" + ]; + %true; + [ + "5" + ]; + [ + "nora" + ]; + [ + "\xC3\xBAn" + ]; + [ + "\xC3\xBAnoa" + ]; + [ + "5" + ]; + [ + [ + "250"; + "110"; + "111"; + "114"; + "97" + ] + ]; + [ + "\xC3\xBAnora" + ]; + [ + "\xC3\xBAnora" + ]; + [ + "aron\xC3\xBA" + ]; + #; + #; + #; + # + ]; + [ + [ + "Ci\xD1\x87 Ci\xD1\x87" + ]; + [ + "Ci\xD1\x87 Ci\xD1\x87" + ]; + %true; + [ + "7" + ]; + [ + "i\xD1\x87 Ci\xD1\x87" + ]; + [ + "Ci" + ]; + [ + "Ci Ci" + ]; + [ + "5" + ]; + [ + [ + "67"; + "105"; + "1095"; + "32"; + "67"; + "105"; + "1095" + ] + ]; + [ + "Ci\xD1\x87 Ci\xD1\x87" + ]; + [ + "Ci\xD1\x87 Ci\xD1\x87" + ]; + [ + "\xD1\x87iC \xD1\x87iC" + ]; + #; + #; + #; + # + ]; + [ + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ]; + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ]; + %true; + [ + "13" + ]; + [ + "\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ]; + [ + "\xD0\xBF\xD1\x80" + ]; + [ + "\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ]; + [ + "5" + ]; + [ + [ + "1087"; + "1088"; + "1080"; + "1074"; + "1077"; + "1090"; + "32"; + "1087"; + "1088"; + "1080"; + "1074"; + "1077"; + "1090" + ] + ]; + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ]; + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ]; + [ + "\xD1\x82\xD0\xB5\xD0\xB2\xD0\xB8\xD1\x80\xD0\xBF \xD1\x82\xD0\xB5\xD0\xB2\xD0\xB8\xD1\x80\xD0\xBF" + ]; + [ + "4" + ]; + [ + "11" + ]; + [ + "11" + ]; + [ + "4" + ] + ]; + [ + [ + "6" + ]; + [ + "6" + ]; + %true; + [ + "1" + ]; + [ + "" + ]; + [ + "6" + ]; + [ + "6" + ]; + [ + "1" + ]; + [ + [ + "54" + ] + ]; + [ + "6" + ]; + [ + "6" + ]; + [ + "6" + ]; + #; + #; + #; + # + ]; + [ + [ + "" + ]; + [ + "" + ]; + %true; + [ + "0" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + [ + "0" + ]; + [ + [] + ]; + [ + "" + ]; + [ + "" + ]; + [ + "" + ]; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Find.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Find.sql new file mode 100644 index 00000000000..9a9a58752e3 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Find.sql @@ -0,0 +1,13 @@ +$text ="lästig, möchten, ausführlich, später, können, natürlich, universität, öffentlich, rückwärts, kämpfen, mögen, überall, regelmäßig"u; + +SELECT + Unicode::Substring($text, Unicode::Find($text, "ä"u), Unicode::RFind($text, "ä"u) - Unicode::Find($text, "ä"u)), + Unicode::Substring($text, Unicode::Find($text, "ö"u), Unicode::RFind($text, "ö"u) - Unicode::Find($text, "ö"u)), + Unicode::Substring($text, Unicode::Find($text, "ü"u), Unicode::RFind($text, "ü"u) - Unicode::Find($text, "ü"u)); + + +SELECT + Unicode::Substring($text, Unicode::Find($text, "ä"u, 30ul), Unicode::RFind($text, "ä"u, 123ul) - Unicode::Find($text, "ä"u, 30ul)), + Unicode::Substring($text, Unicode::Find($text, "ö"u, 9ul), Unicode::RFind($text, "ö"u, 103ul) - Unicode::Find($text, "ö"u, 9ul)), + Unicode::Substring($text, Unicode::Find($text, "ü"u, 45ul), Unicode::RFind($text, "ü"u, 83ul) - Unicode::Find($text, "ü"u, 45ul)); + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql new file mode 100644 index 00000000000..2effa23221e --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/IsCategory.sql @@ -0,0 +1,21 @@ +/* syntax version 1 */ +SELECT + Unicode::IsAscii("sdf"u), + Unicode::IsAscii("выавыа"u), + Unicode::IsSpace(" \u2002\u200a"u), + Unicode::IsSpace("выавыа"u), + Unicode::IsUpper("ФЫВ"u), + Unicode::IsUpper("вВаВыа"u), + Unicode::IsLower("фыв"u), + Unicode::IsLower("вВаВыа"u), + Unicode::IsDigit("1234"u), + Unicode::IsDigit("выавыа"u), + Unicode::IsAlpha("фвфы"u), + Unicode::IsAlpha("вы2в-а"u), + Unicode::IsAlnum("фыв13в"u), + Unicode::IsAlnum("выа1-}ыв"u), + Unicode::IsHex("0F3A4E"u), + Unicode::IsHex("ваоао"u), + Unicode::IsUnicodeSet("ваоао"u, "[вао]"u), + Unicode::IsUnicodeSet("ваоао"u, "[ваб]"u) + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/List.in b/yql/essentials/udfs/common/unicode_base/test/cases/List.in new file mode 100644 index 00000000000..949cf26c776 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/List.in @@ -0,0 +1,6 @@ +{"key"="1";"subkey"="1";"value"="a@b@c"}; +{"key"="1";"subkey"="1";"value"="@a@b@c"}; +{"key"="1";"subkey"="1";"value"="@@@a@a"}; +{"key"="2";"subkey"="2";"value"="d#e#f"}; +{"key"="3";"subkey"="3";"value"="d"}; +{"key"="4";"subkey"="4";"value"=""}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr new file mode 100644 index 00000000000..990efb1ff2c --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/List.in.attr @@ -0,0 +1,12 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["subkey";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + "SortDirections"=[1;1;]; + "SortedBy"=["key";"subkey";]; + "SortedByTypes"=[["DataType";"Utf8";];["DataType";"Utf8";];]; + "SortMembers"=["key";"subkey";]; +}} + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/List.sql b/yql/essentials/udfs/common/unicode_base/test/cases/List.sql new file mode 100644 index 00000000000..814c5cb27b0 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/List.sql @@ -0,0 +1,12 @@ +/* syntax version 1 */ +SELECT + value, + Ensure(value, Unicode::JoinFromList(Unicode::SplitToList(value, "@"u), "@"u) == value) AS equals_to_original, + Unicode::JoinFromList(Unicode::SplitToList(value, "@"u), "#"u) AS replace_delimeter, + Unicode::SplitToList(value, "@"u) AS just_split, + Unicode::SplitToList(value, "@"u)[0] as first, + Unicode::SplitToList(value, "@"u, true AS SkipEmpty) AS skip_empty, + Unicode::SplitToList(value, "b@"u, false AS DelimeterString) AS multichar_delim_set, + Unicode::SplitToList(value, "b@"u, true AS DelimeterString) AS multichar_delim_string, + Unicode::SplitToList(value, "@"u, 1 AS Limit) AS limited +FROM Input; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql new file mode 100644 index 00000000000..ee96037f79b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Remove.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +SELECT + CAST(value AS Utf8), + Unicode::RemoveAll(CAST(value AS Utf8), Utf8("фа")) AS all, + Unicode::RemoveFirst(CAST(value AS Utf8), Utf8("а")) AS first, + Unicode::RemoveLast(CAST(value AS Utf8), Utf8("а")) AS last, + Unicode::RemoveFirst(CAST(value AS Utf8), Utf8("фа")) AS first2, + Unicode::RemoveLast(CAST(value AS Utf8), Utf8("фа")) AS last2 +FROM Input; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql new file mode 100644 index 00000000000..d6239844133 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Replace.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ +SELECT + CAST(value AS Utf8), + Unicode::ReplaceAll(CAST(value AS Utf8), Utf8("аф"), Utf8("zzz")) AS all, + Unicode::ReplaceFirst(CAST(value AS Utf8), Utf8("а"), Utf8("z")) AS first, + Unicode::ReplaceLast(CAST(value AS Utf8), Utf8("а"), Utf8("z")) AS last, + Unicode::ReplaceFirst(CAST(value AS Utf8), Utf8("а"), Utf8("")) AS first2, + Unicode::ReplaceLast(CAST(value AS Utf8), Utf8("а"), Utf8("")) AS last2, + Unicode::ReplaceFirst(CAST(value AS Utf8), Utf8("а"), Utf8("zzz")) AS first3, + Unicode::ReplaceLast(CAST(value AS Utf8), Utf8("а"), Utf8("zzz")) AS last3 +FROM Input; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql new file mode 100644 index 00000000000..45bde163e06 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Strip.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +SELECT + Unicode::Strip("ываыва"u), + Unicode::Strip(" ячсячсяаачы"u), + Unicode::Strip("аавыаываыва "u), + Unicode::Strip("аав ыа ыва ыва "u), + Unicode::Strip("\u2009ыва\n"u), + Unicode::Strip("\u200aваоао\u2002"u), + Unicode::Strip(""u) diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.in b/yql/essentials/udfs/common/unicode_base/test/cases/To.in new file mode 100644 index 00000000000..5effdb9971b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.in @@ -0,0 +1,8 @@ +{"key"="1";"subkey"="1";"value"="test"}; +{"key"="2";"subkey"="2";"value"="\xD1\x82\xD0\xB5\xD1\x81\xD1\x82"}; +{"key"="3";"subkey"="3";"value"="TeSt"}; +{"key"="4";"subkey"="4";"value"="\xD1\x82\xD0\x95\xD1\x81\xD0\xA2"}; +{"key"="5";"subkey"="5";"value"="Eyl\xC3\xBCl"}; +{"key"="6";"subkey"="6";"value"="6"}; +{"key"="4";"subkey"="4";"value"=""}; + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr b/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr new file mode 100644 index 00000000000..990efb1ff2c --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.in.attr @@ -0,0 +1,12 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"Utf8"]]; + ["subkey";["DataType";"Utf8"]]; + ["value";["DataType";"Utf8"]] + ]]; + "SortDirections"=[1;1;]; + "SortedBy"=["key";"subkey";]; + "SortedByTypes"=[["DataType";"Utf8";];["DataType";"Utf8";];]; + "SortMembers"=["key";"subkey";]; +}} + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/To.sql b/yql/essentials/udfs/common/unicode_base/test/cases/To.sql new file mode 100644 index 00000000000..c7207d2dcd6 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/To.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +SELECT + value, + Unicode::ToLower(value) AS lower, + Unicode::ToUpper(value) AS upper, + Unicode::ToTitle(value) AS title, + Unicode::Reverse(value) AS reverse, +FROM Input; + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql new file mode 100644 index 00000000000..c4059a85820 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64.sql @@ -0,0 +1,9 @@ +SELECT + Unicode::ToUint64("0x1234abcd"), + Unicode::ToUint64("0X4"), + Unicode::ToUint64("0644"), + Unicode::ToUint64("0101010", 16), + Unicode::ToUint64("0101010", 2), + Unicode::ToUint64("0101010"), + Unicode::ToUint64("101"); + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg new file mode 100644 index 00000000000..83cfd96179a --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.cfg @@ -0,0 +1,2 @@ +xfail + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql new file mode 100644 index 00000000000..dd1182a562d --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F0.sql @@ -0,0 +1,3 @@ +SELECT + Unicode::ToUint64("hell"); + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg new file mode 100644 index 00000000000..83cfd96179a --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.cfg @@ -0,0 +1,2 @@ +xfail + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql new file mode 100644 index 00000000000..f42380ee803 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F1.sql @@ -0,0 +1,3 @@ +SELECT + Unicode::ToUint64("01238"); + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg new file mode 100644 index 00000000000..83cfd96179a --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.cfg @@ -0,0 +1,2 @@ +xfail + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql new file mode 100644 index 00000000000..1a9b7e2449f --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/ToUint64F2.sql @@ -0,0 +1,3 @@ +SELECT + Unicode::ToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql new file mode 100644 index 00000000000..b2f4fa850ab --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/TryToUint64.sql @@ -0,0 +1,17 @@ +SELECT + Unicode::TryToUint64("hell", 10); + +SELECT + Unicode::TryToUint64("01238", 8); + +SELECT + Unicode::TryToUint64("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); + +SELECT + Unicode::TryToUint64("0x1234abcd", 16), + Unicode::TryToUint64("0X4", 16), + Unicode::TryToUint64("0644", 8), + Unicode::TryToUint64("0101010", 16), + Unicode::TryToUint64("0101010", 2), + Unicode::TryToUint64("0101010", 10), + Unicode::TryToUint64("101", 10); diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in new file mode 100644 index 00000000000..55f0307e35c --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.in @@ -0,0 +1,7 @@ +{"key"="";"subkey"="";"value"="Eyl\xC3\xBCl"}; +{"key"="";"subkey"="";"value"="\xD0\xB6\xD0\xBD\xD1\x96\xD1\x9E\xD0\xBD\xD1\x8F"}; +{"key"="";"subkey"="";"value"="\xC3\xBAnora"}; +{"key"="";"subkey"="";"value"="Ci\xD1\x87 Ci\xD1\x87"}; +{"key"="";"subkey"="";"value"="\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82 \xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82"}; +{"key"="";"subkey"="";"value"="6"}; +{"key"="";"subkey"="";"value"=""}; diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql new file mode 100644 index 00000000000..b330682b6ed --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/Unicode.sql @@ -0,0 +1,19 @@ +/* syntax version 1 */ +SELECT + value AS value, + Unicode::Normalize(value) AS normalize, + Unicode::IsUtf(value) AS is, + Unicode::GetLength(value) AS length, + Unicode::Substring(value, 1) AS one_end_substring, + Unicode::Substring(value, 0, 2) AS two_end_substring, + Unicode::RemoveAll(value, "\xD1\x87пr") AS remove_all, + Unicode::LevensteinDistance(value, value || Unicode::Substring(value, 0, 5)) AS levenstein, + Unicode::ToCodePointList(value) AS code_point_list, + Unicode::FromCodePointList(Unicode::ToCodePointList(value)) AS from_code_point_list, + Unicode::FromCodePointList(YQL::LazyList(Unicode::ToCodePointList(value))) AS from_lazy_code_point_list, + Unicode::Reverse(value) AS reverse, + Unicode::Find(value, "ет"u) AS find, + Unicode::RFind(value, "ет"u) AS rfind, + Unicode::Find(value, "ет"u, 7ul) AS find_from, + Unicode::RFind(value, "ет"u, 7ul) AS rfind_from +FROM (SELECT CAST(value AS Utf8) AS value FROM Input); diff --git a/yql/essentials/udfs/common/unicode_base/test/cases/default.in b/yql/essentials/udfs/common/unicode_base/test/cases/default.in new file mode 100644 index 00000000000..6a9412ca375 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/cases/default.in @@ -0,0 +1,5 @@ +{"key"="1";"subkey"="1";"value"="ываыва"}; +{"key"="2";"subkey"="2";"value"="ячсячсяаачы"}; +{"key"="3";"subkey"="3";"value"="аавыаываыва"}; +{"key"="4";"subkey"="4";"value"="gd2цй3ываафы"}; +{"key"="5";"subkey"="5";"value"=""}; diff --git a/yql/essentials/udfs/common/unicode_base/test/ya.make b/yql/essentials/udfs/common/unicode_base/test/ya.make new file mode 100644 index 00000000000..39cf5f3563b --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/unicode_base) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/unicode_base/unicode_base.cpp b/yql/essentials/udfs/common/unicode_base/unicode_base.cpp new file mode 100644 index 00000000000..366777ab0eb --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/unicode_base.cpp @@ -0,0 +1,4 @@ +#include "lib/unicode_base_udf.h" + +SIMPLE_MODULE(TUnicodeModule, EXPORTED_UNICODE_BASE_UDF) +REGISTER_MODULES(TUnicodeModule) diff --git a/yql/essentials/udfs/common/unicode_base/ya.make b/yql/essentials/udfs/common/unicode_base/ya.make new file mode 100644 index 00000000000..b51e12ffb34 --- /dev/null +++ b/yql/essentials/udfs/common/unicode_base/ya.make @@ -0,0 +1,30 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319907306 OUT_NOAUTO libunicode_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(unicode_udf) + + YQL_ABI_VERSION( + 2 + 27 + 0 + ) + + SRCS( + unicode_base.cpp + ) + + PEERDIR( + yql/essentials/udfs/common/unicode_base/lib + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) + diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp new file mode 100644 index 00000000000..50a3ee8d1f1 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp @@ -0,0 +1 @@ +#include "url_base_udf.h"
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.h b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h new file mode 100644 index 00000000000..04ad1b4e469 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h @@ -0,0 +1,586 @@ +#pragma once + +#include "url_parse.h" +#include "url_query.h" + +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <yql/essentials/public/udf/arrow/udf_arrow_helpers.h> + +#include <library/cpp/tld/tld.h> +#include <library/cpp/charset/wide.h> +#include <library/cpp/unicode/punycode/punycode.h> +#include <library/cpp/string_utils/quote/quote.h> +#include <library/cpp/string_utils/url/url.h> + +#include <util/string/split.h> +#include <util/string/subst.h> + +using namespace NKikimr; +using namespace NUdf; +using namespace NTld; +using namespace NUrlUdf; + +inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) { + const NUri::TParseFlags& parseFlags(TUri::FeaturesRecommended); + return parser.ParseAbs(keyStr, parseFlags) == TUri::ParsedOK; +} + +#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \ + BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const std::string_view url(args[0].AsStringRef()); \ + const std::string_view res(functionName(url)); \ + return res.empty() ? TUnboxedValue() : \ + valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \ + } \ + struct udfName##KernelExec : public TUnaryKernelExec<udfName##KernelExec> { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ + if (!arg) { \ + return sink(TBlockItem()); \ + } \ + const std::string_view url(arg.AsStringRef()); \ + const std::string_view res(functionName(url)); \ + if (res.empty()) { \ + return sink(TBlockItem()); \ + } \ + sink(TBlockItem(TStringRef(res))); \ + } \ + }; \ + END_SIMPLE_ARROW_UDF(udfName, udfName##KernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + TUri url; + const bool success = PrepareUrl(args[0].AsStringRef(), url); + return success + ? valueBuilder->NewString(url.PrintS(TUri::FlagNoFrag)) + : TUnboxedValue(); +} +struct TNormalizeKernelExec : public TUnaryKernelExec<TNormalizeKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + TUri url; + const bool success = PrepareUrl(arg.AsStringRef(), url); + if (success) { + return sink(TBlockItem(TStringRef(url.PrintS(TUri::FlagNoFrag)))); + } + sink(TBlockItem()); + } +}; +END_SIMPLE_ARROW_UDF(TNormalize, TNormalizeKernelExec::Do); + +BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap<char*>)) { + const std::string_view url(args[0].AsStringRef()); + const std::string_view prefix(GetSchemePrefix(url)); + return valueBuilder->SubString(args[0], std::distance(url.begin(), prefix.begin()), prefix.size()); +} +struct TGetSchemeKernelExec : public TUnaryKernelExec<TGetSchemeKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + const std::string_view url(arg.AsStringRef()); + const std::string_view prefix(GetSchemePrefix(url)); + const std::string_view scheme = url.substr(std::distance(url.begin(), prefix.begin()), prefix.size()); + sink(TBlockItem(scheme)); + } +}; +END_SIMPLE_ARROW_UDF(TGetScheme, TGetSchemeKernelExec::Do); + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetHost, GetOnlyHost) + +std::string_view GetHostAndPortAfterCut(const std::string_view url) { + return GetHostAndPort(CutSchemePrefix(url)); +} + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetHostPort, GetHostAndPortAfterCut) + +std::string_view GetSchemeHostParameterized(const std::string_view url) { + return GetSchemeHost(url, /* trimHttp */ false); +} + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetSchemeHost, GetSchemeHostParameterized); + +std::string_view GetSchemeHostPortParameterized(const std::string_view url) { + return GetSchemeHostAndPort(url, /* trimHttp */ false, /* trimDefaultPort */ false); +} + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetSchemeHostPort, GetSchemeHostPortParameterized); + +BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional<ui64>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + Y_UNUSED(valueBuilder); + ui16 port = 0; + TStringBuf scheme, host; + TString lowerUri(args[0].AsStringRef()); + std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri), + lowerUri.begin(), [](unsigned char c){ return std::tolower(c); }); + return TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port + ? TUnboxedValuePod(port) + : TUnboxedValuePod(); +} +struct TGetPortKernelExec : public TUnaryKernelExec<TGetPortKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + ui16 port = 0; + TStringBuf scheme, host; + TString lowerUri(arg.AsStringRef()); + std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri), + lowerUri.begin(), [](unsigned char c){ return std::tolower(c); }); + if (TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port) { + return sink(TBlockItem(port)); + } + sink(TBlockItem()); + } +}; +END_SIMPLE_ARROW_UDF(TGetPort, TGetPortKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const TStringBuf url(args[0].AsStringRef()); + TStringBuf host, tail; + SplitUrlToHostAndPath(url, host, tail); + return tail.StartsWith('/') + ? valueBuilder->NewString(tail) + : valueBuilder->NewString(TString('/').append(tail)); +} +struct TGetTailKernelExec : public TUnaryKernelExec<TGetTailKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + const TStringBuf url(arg.AsStringRef()); + TStringBuf host, tail; + SplitUrlToHostAndPath(url, host, tail); + if (tail.StartsWith('/')) { + return sink(TBlockItem(TStringRef(tail))); + } + sink(TBlockItem(TStringRef(TString('/').append(tail)))); + } +}; +END_SIMPLE_ARROW_UDF(TGetTail, TGetTailKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const std::string_view url(args[0].AsStringRef()); + std::string_view cut(CutSchemePrefix(url)); + const auto s = cut.find('/'); + if (s == std::string_view::npos) { + return valueBuilder->NewString("/"); + } + + cut.remove_prefix(s); + const auto end = cut.find_first_of("?#"); + if (std::string_view::npos != end) { + cut.remove_suffix(cut.size() - end); + } + + return valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length()); +} +struct TGetPathKernelExec : public TUnaryKernelExec<TGetPathKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + const std::string_view url(arg.AsStringRef()); + std::string_view cut(CutSchemePrefix(url)); + const auto s = cut.find('/'); + if (s == std::string_view::npos) { + return sink(TBlockItem(TStringRef("/"))); + } + + cut.remove_prefix(s); + const auto end = cut.find_first_of("?#"); + if (std::string_view::npos != end) { + cut.remove_suffix(cut.size() - end); + } + sink(TBlockItem(TStringRef(cut))); + } +}; +END_SIMPLE_ARROW_UDF(TGetPath, TGetPathKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional<char*>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const std::string_view url(args[0].AsStringRef()); + const auto pos = url.find('#'); + return pos == std::string_view::npos ? TUnboxedValue() : + valueBuilder->SubString(args[0], pos + 1U, url.length() - pos - 1U); +} +struct TGetFragmentKernelExec : public TUnaryKernelExec<TGetFragmentKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + const std::string_view url(arg.AsStringRef()); + const auto pos = url.find('#'); + if (pos == std::string_view::npos) { + return sink(TBlockItem()); + } + return sink(TBlockItem(arg.AsStringRef().Substring(pos + 1U, url.length() - pos - 1U))); + } +}; +END_SIMPLE_ARROW_UDF(TGetFragment, TGetFragmentKernelExec::Do); + +std::optional<std::pair<ui32, ui32>> GetDomain(const std::string_view url, const ui8 level) { + const std::string_view host(GetOnlyHost(url)); + std::vector<std::string_view> parts; + StringSplitter(host).Split('.').AddTo(&parts); + if (level && parts.size() >= level) { + const auto& result = host.substr(std::distance(host.begin(), parts[parts.size() - level].begin())); + if (result.empty()) { + return std::nullopt; + } + return std::make_pair(std::distance(url.begin(), result.begin()), result.size()); + } + return std::nullopt; +} + +BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional<char*>(TOptional<char*>, ui8)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const std::string_view url = args[0].AsStringRef(); + const std::optional<std::pair<ui32, ui32>> resultOpt = GetDomain(url, args[1].Get<ui8>()); + if (!resultOpt) { + return TUnboxedValue(); + } + const std::pair<ui32, ui32> result = *resultOpt; + return valueBuilder->SubString(args[0], result.first, result.second); +} +struct TGetDomainKernelExec : public TBinaryKernelExec<TGetDomainKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) { + return sink(TBlockItem()); + } + const auto resultOpt = GetDomain(arg1.AsStringRef(), arg2.As<ui8>()); + if (!resultOpt) { + return sink(TBlockItem()); + } + const auto result = *resultOpt; + sink(TBlockItem(arg1.AsStringRef().Substring(result.first, result.second))); + } +}; +END_SIMPLE_ARROW_UDF(TGetDomain, TGetDomainKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap<char*>)) { + const TStringBuf url(args[0].AsStringRef()); + return valueBuilder->NewString(GetZone(GetOnlyHost(url))); +} +struct TGetTLDKernelExec : public TUnaryKernelExec<TGetTLDKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + const TStringBuf url(arg.AsStringRef()); + return sink(TBlockItem(GetZone(GetOnlyHost(url)))); + } +}; +END_SIMPLE_ARROW_UDF(TGetTLD, TGetTLDKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + std::vector<std::string_view> parts; + StringSplitter(GetOnlyHost(args[0].AsStringRef())).Split('.').AddTo(&parts); + return TUnboxedValuePod(ui64(parts.size())); +} +struct TGetDomainLevelKernelExec : public TUnaryKernelExec<TGetDomainLevelKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + std::vector<std::string_view> parts; + StringSplitter(GetOnlyHost(arg.AsStringRef())).Split('.').AddTo(&parts); + return sink(TBlockItem(ui64(parts.size()))); + } +}; +END_SIMPLE_ARROW_UDF(TGetDomainLevel, TGetDomainLevelKernelExec::Do); + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSignificantDomain, char*(TAutoMap<char*>, TOptional<TListType<char*>>), 1) { + const std::string_view url(args[0].AsStringRef()); + const std::string_view host(GetOnlyHost(url)); + std::vector<std::string_view> parts; + StringSplitter(host).Split('.').AddTo(&parts); + if (parts.size() > 2) { + const auto& secondLevel = parts.at(parts.size() - 2); + bool secondLevelIsZone = false; + + if (args[1]) { + const auto& zonesIterator = args[1].GetListIterator(); + for (TUnboxedValue item; zonesIterator.Next(item);) { + if (secondLevel == item.AsStringRef()) { + secondLevelIsZone = true; + break; + } + } + } else { + static const std::set<std::string_view> zones{"com", "net", "org", "co", "gov", "edu"}; + secondLevelIsZone = zones.count(secondLevel); + } + + const auto from = parts[parts.size() - (secondLevelIsZone ? 3U : 2U)].begin(); + return valueBuilder->SubString(args[0], std::distance(url.begin(), from), std::distance(from, parts.back().end())); + } + return valueBuilder->SubString(args[0], std::distance(url.begin(), host.begin()), host.length()); +} + +std::optional<std::pair<ui32, ui32>> GetCGIParam(const std::string_view url, const std::string_view key) { + const auto queryStart = url.find('?'); + if (queryStart != std::string_view::npos) { + const auto from = queryStart + 1U; + const auto anc = url.find('#', from); + const auto end = anc == std::string_view::npos ? url.length() : anc; + for (auto pos = from; pos && pos < end; ++pos) { + const auto equal = url.find('=', pos); + const auto amper = url.find('&', pos); + if (equal < amper) { + const auto& param = url.substr(pos, equal - pos); + if (param == key) { + return std::make_pair(equal + 1U, std::min(amper, end) - equal - 1U); + } + } + pos = amper; + } + } + return std::nullopt; +} + +BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional<char*>(TOptional<char*>, char*)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const std::string_view url = args[0].AsStringRef(); + const std::optional<std::pair<ui32, ui32>> resultOpt = GetCGIParam(url, args[1].AsStringRef()); + if (!resultOpt) { + return TUnboxedValue(); + } + const std::pair<ui32, ui32> result = *resultOpt; + return valueBuilder->SubString(args[0], result.first, result.second); +} +struct TGetCGIParamKernelExec : public TBinaryKernelExec<TGetCGIParamKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) { + return sink(TBlockItem()); + } + const auto resultOpt = GetCGIParam(arg1.AsStringRef(), arg2.AsStringRef()); + if (!resultOpt) { + return sink(TBlockItem()); + } + const auto result = *resultOpt; + sink(TBlockItem(arg1.AsStringRef().Substring(result.first, result.second))); + } +}; +END_SIMPLE_ARROW_UDF(TGetCGIParam, TGetCGIParamKernelExec::Do); + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutScheme, CutSchemePrefix) + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutWWW, CutWWWPrefix) + +ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutWWW2, CutWWWNumberedPrefix) + +BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) { + const std::string_view input(args[0].AsStringRef()); + const auto cut = input.find_first_of("?#"); + return std::string_view::npos == cut ? NUdf::TUnboxedValue(args[0]) : valueBuilder->SubString(args[0], 0U, cut); +} +struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec<TCutQueryStringAndFragmentKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + const std::string_view input(arg.AsStringRef()); + const auto cut = input.find_first_of("?#"); + sink(TBlockItem(arg.AsStringRef().Substring(0U, cut))); + } +}; +END_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, TCutQueryStringAndFragmentKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional<char*>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const std::string_view input(args[0].AsStringRef()); + if (input.empty()) { + return NUdf::TUnboxedValuePod(); + } + TString url(input); + UrlEscape(url); + return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url); +} +struct TEncodeKernelExec : public TUnaryKernelExec<TEncodeKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + const std::string_view input(arg.AsStringRef()); + if (input.empty()) { + return sink(TBlockItem()); + } + TString url(input); + UrlEscape(url); + sink(TBlockItem(TStringRef(url))); + } +}; +END_SIMPLE_ARROW_UDF(TEncode, TEncodeKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional<char*>(TOptional<char*>)) { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const std::string_view input(args[0].AsStringRef()); + if (input.empty()) { + return NUdf::TUnboxedValuePod(); + } + TString url(input); + SubstGlobal(url, '+', ' '); + UrlUnescape(url); + return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url); +} +struct TDecodeKernelExec : public TUnaryKernelExec<TDecodeKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + const std::string_view input(arg.AsStringRef()); + if (input.empty()) { + return sink(TBlockItem()); + } + TString url(input); + SubstGlobal(url, '+', ' '); + UrlUnescape(url); + sink(TBlockItem(TStringRef(url))); + } +}; +END_SIMPLE_ARROW_UDF(TDecode, TDecodeKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(IsTld(args[0].AsStringRef())); +} +struct TIsKnownTLDKernelExec : public TUnaryKernelExec<TIsKnownTLDKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + sink(TBlockItem(static_cast<ui8>(IsTld(arg.AsStringRef())))); + } +}; +END_SIMPLE_ARROW_UDF(TIsKnownTLD, TIsKnownTLDKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(IsVeryGoodTld(args[0].AsStringRef())); +} +struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec<TIsWellKnownTLDKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + sink(TBlockItem(static_cast<ui8>(IsVeryGoodTld(arg.AsStringRef())))); + } +}; +END_SIMPLE_ARROW_UDF(TIsWellKnownTLD, TIsWellKnownTLDKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional<char*>(TAutoMap<char*>)) try { + const TUtf16String& input = UTF8ToWide(args[0].AsStringRef()); + return valueBuilder->NewString(HostNameToPunycode(input)); +} catch (TPunycodeError&) { + return TUnboxedValue(); +} +struct THostNameToPunycodeKernelExec : public TUnaryKernelExec<THostNameToPunycodeKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { + const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); + return sink(TBlockItem(TStringRef(HostNameToPunycode(input)))); + } catch (TPunycodeError&) { + return sink(TBlockItem()); + } +}; +END_SIMPLE_ARROW_UDF(THostNameToPunycode, THostNameToPunycodeKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap<char*>)) { + const TUtf16String& input = UTF8ToWide(args[0].AsStringRef()); + return valueBuilder->NewString(ForceHostNameToPunycode(input)); +} +struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec<TForceHostNameToPunycodeKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); + sink(TBlockItem(TStringRef(ForceHostNameToPunycode(input)))); + } +}; +END_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, TForceHostNameToPunycodeKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional<char*>(TAutoMap<char*>)) try { + const TStringRef& input = args[0].AsStringRef(); + const auto& result = WideToUTF8(PunycodeToHostName(input)); + return valueBuilder->NewString(result); +} catch (TPunycodeError&) { + return TUnboxedValue(); +} +struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec<TPunycodeToHostNameKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { + const TStringRef& input = arg.AsStringRef(); + const auto& result = WideToUTF8(PunycodeToHostName(input)); + return sink(TBlockItem(TStringRef(result))); + } catch (TPunycodeError&) { + return sink(TBlockItem()); + } +}; +END_SIMPLE_ARROW_UDF(TPunycodeToHostName, TPunycodeToHostNameKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap<char*>)) { + const TStringRef& input = args[0].AsStringRef(); + const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); + return valueBuilder->NewString(result); +} +struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec<TForcePunycodeToHostNameKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + const TStringRef& input = arg.AsStringRef(); + const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); + sink(TBlockItem(TStringRef(result))); + } +}; +END_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, TForcePunycodeToHostNameKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(CanBePunycodeHostName(args[0].AsStringRef())); +} +struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec<TCanBePunycodeHostNameKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { + sink(TBlockItem(static_cast<ui8>(CanBePunycodeHostName(arg.AsStringRef())))); + } +}; +END_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, TCanBePunycodeHostNameKernelExec::Do); + +#define EXPORTED_URL_BASE_UDF \ + TNormalize, \ + TParse, \ + TGetScheme, \ + TGetHost, \ + TGetHostPort, \ + TGetSchemeHost, \ + TGetSchemeHostPort, \ + TGetPort, \ + TGetTail, \ + TGetPath, \ + TGetFragment, \ + TGetDomain, \ + TGetTLD, \ + TGetDomainLevel, \ + TGetSignificantDomain, \ + TGetCGIParam, \ + TCutScheme, \ + TCutWWW, \ + TCutWWW2, \ + TCutQueryStringAndFragment, \ + TEncode, \ + TDecode, \ + TIsKnownTLD, \ + TIsWellKnownTLD, \ + THostNameToPunycode, \ + TForceHostNameToPunycode, \ + TPunycodeToHostName, \ + TForcePunycodeToHostName, \ + TCanBePunycodeHostName, \ + TQueryStringToList, \ + TQueryStringToDict, \ + TBuildQueryString diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.cpp b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp new file mode 100644 index 00000000000..63015dadc67 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp @@ -0,0 +1,53 @@ +#include "url_parse.h" + +#define FIELD_ADD(name) structBuilder->AddField(#name, optionalStringType, &urlParseIndexes.name); +#define FIELD_FILL(name) \ + if (value.FldIsSet(TUri::Field##name)) { \ + fields[UrlParseIndexes.name] = valueBuilder->NewString(value.GetField(TUri::Field##name)); \ + } + +namespace NUrlUdf { + using namespace NUri; + using namespace NKikimr; + using namespace NUdf; + + TUnboxedValue TParse::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + TUri value; + const auto ParseError = value.ParseAbs(args[0].AsStringRef(), ParseFlags); + TUnboxedValue* fields = nullptr; + const auto result = valueBuilder->NewArray(FieldsCount, fields); + if (ParseError == TUri::ParsedOK) { + FIELD_MAP(FIELD_FILL) + } else { + fields[UrlParseIndexes.ParseError] = valueBuilder->NewString(TStringBuilder() << ParseError); + } + return result; + } + + bool TParse::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + TUrlParseIndexes urlParseIndexes; + + builder.Args(1)->Add<TAutoMap<char*>>(); + const auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const auto structBuilder = builder.Struct(FieldsCount); + structBuilder->AddField("ParseError", optionalStringType, &urlParseIndexes.ParseError); + FIELD_MAP(FIELD_ADD) + builder.Returns(structBuilder->Build()); + + if (!typesOnly) { + builder.Implementation(new TParse(urlParseIndexes)); + } + return true; + } else { + return false; + } + } +} diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.h b/yql/essentials/udfs/common/url_base/lib/url_parse.h new file mode 100644 index 00000000000..b0a1679d1e9 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/url_parse.h @@ -0,0 +1,59 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_helpers.h> +#include <library/cpp/uri/uri.h> + +#define FIELD_MAP(XX) \ + XX(Scheme) \ + XX(User) \ + XX(Pass) \ + XX(Host) \ + XX(Port) \ + XX(Path) \ + XX(Query) \ + XX(Frag) + +#define FIELD_INDEXES(name) ui32 name; + +namespace NUrlUdf { + using namespace NUri; + using namespace NKikimr; + using namespace NUdf; + + struct TUrlParseIndexes { + ui32 ParseError; + FIELD_MAP(FIELD_INDEXES) + }; + + class TParse: public TBoxedValue { + public: + TParse(const TUrlParseIndexes& UrlParseIndexes) + : UrlParseIndexes(UrlParseIndexes) + , ParseFlags(TUri::FeaturesRecommended) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef("Parse"); + return nameRef; + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + + public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + + private: + const TUrlParseIndexes UrlParseIndexes; + const NUri::TParseFlags ParseFlags; + + static constexpr ui32 FieldsCount = sizeof(TUrlParseIndexes) / sizeof(ui32); + }; +} diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.cpp b/yql/essentials/udfs/common/url_base/lib/url_query.cpp new file mode 100644 index 00000000000..f449be22681 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/url_query.cpp @@ -0,0 +1,243 @@ +#include "url_query.h" + +#include <yql/essentials/public/udf/udf_type_printer.h> + +#include <util/string/split.h> + +#include <library/cpp/string_utils/quote/quote.h> + +namespace NUrlUdf { + void TQueryStringParse::MakeSignature(IFunctionTypeInfoBuilder& builder, + const TType* retType) + { + builder.Returns(retType).OptionalArgs(4); + auto args = builder.Args(); + args->Add<TAutoMap<TQueryStr>>(); + args->Add<TKeepBlankValuesNArg>(); + args->Add<TStrictNArg>(); + args->Add<TMaxFieldsNArg>(); + args->Add<TSeparatorNArg>().Done(); + } + + std::vector<std::pair<TString, TString>> + TQueryStringParse::RunImpl(const TUnboxedValuePod* args) const { + const std::string_view query(args[0].AsStringRef()); + if (query.empty()) + return {}; + const bool keepBlankValues = args[1].GetOrDefault(false); + const bool strict = args[2].GetOrDefault(true); + const ui32 maxFieldCnt = args[3].GetOrDefault(Max<ui32>()); + const std::string_view sep(args[4] ? args[4].AsStringRef() : "&"); + + std::vector<TStringBuf> parts; + StringSplitter(query).SplitByString(sep).Collect(&parts); + if (parts.size() > maxFieldCnt) { + UdfTerminate((TStringBuilder() << Pos_ << "Max number of fields (" << maxFieldCnt + << ") exceeded: got " << parts.size()).data()); + } + + std::vector<std::pair<TString, TString>> pairs; + for (const TStringBuf& part: parts) { + if (part.empty() && !strict) { + continue; + } + TVector<TString> nvPair = StringSplitter(part).Split('=').Limit(2); + if (nvPair.size() != 2) { + if (strict) { + UdfTerminate((TStringBuilder() << Pos_ << "Bad query field: \"" + << nvPair[0] << "\"").data()); + } + if (keepBlankValues) { + nvPair.emplace_back(""); + } else { + continue; + } + } + if (!nvPair[1].empty() || keepBlankValues) { + CGIUnescape(nvPair[0]); + CGIUnescape(nvPair[1]); + pairs.emplace_back(nvPair[0], nvPair[1]); + } + } + return pairs; + } + + bool TQueryStringToList::DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + MakeSignature(builder, GetListType(builder)); + if (!typesOnly) { + builder.Implementation(new TQueryStringToList(builder.GetSourcePosition())); + } + return true; + } + return false; + } + + TUnboxedValue TQueryStringToList::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const auto pairs = RunImpl(args); + std::vector<TUnboxedValue> ret; + for (const auto& nvPair : pairs) { + TUnboxedValue* pair = nullptr; + auto item = valueBuilder->NewArray(2U, pair); + pair[0] = valueBuilder->NewString(nvPair.first); + pair[1] = valueBuilder->NewString(nvPair.second); + ret.push_back(item); + } + return valueBuilder->NewList(ret.data(), ret.size()); + } + + bool TQueryStringToDict::DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + auto dictType = GetDictType(builder); + MakeSignature(builder, dictType); + if (!typesOnly) { + builder.Implementation(new TQueryStringToDict(dictType, + builder.GetSourcePosition())); + } + return true; + } + return false; + } + + TUnboxedValue TQueryStringToDict::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const auto pairs = RunImpl(args); + auto ret = valueBuilder->NewDict(DictType_, TDictFlags::Hashed | TDictFlags::Multi); + for (const auto& nvPair : pairs) { + ret->Add(valueBuilder->NewString(nvPair.first), + valueBuilder->NewString(nvPair.second)); + } + return ret->Build(); + } + + TUnboxedValue TBuildQueryString::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const std::string_view sep(args[1] ? args[1].AsStringRef() : "&"); + TStringBuilder ret; + + switch(FirstArgTypeId_) { + case EFirstArgTypeId::Dict: { + TUnboxedValue key, value; + const auto dictIt = args[0].GetDictIterator(); + ui64 wasItem = 0; + while (dictIt.NextPair(key, value)) { + TString keyEscaped = CGIEscapeRet(key.AsStringRef()); + const auto listIt = value.GetListIterator(); + TUnboxedValue item; + while (listIt.Next(item)) { + if (wasItem++) + ret << sep; + if (item) { + ret << keyEscaped << '=' << CGIEscapeRet(item.AsStringRef()); + } else { + ret << keyEscaped << '='; + } + } + } + break; + } + case EFirstArgTypeId::FlattenDict: { + TUnboxedValue key, value; + const auto dictIt = args[0].GetDictIterator(); + ui64 wasKey = 0; + while (dictIt.NextPair(key, value)) { + if (wasKey++) + ret << sep; + if (value) { + ret << CGIEscapeRet(key.AsStringRef()) << '=' + << CGIEscapeRet(value.AsStringRef()); + } else { + ret << CGIEscapeRet(key.AsStringRef()) << '='; + } + } + break; + } + case EFirstArgTypeId::List: { + ui64 wasItem = 0; + TUnboxedValue item; + const auto listIt = args[0].GetListIterator(); + while (listIt.Next(item)) { + if (wasItem++) + ret << sep; + TUnboxedValue key = item.GetElement(0), val = item.GetElement(1); + if (val) { + ret << CGIEscapeRet(key.AsStringRef()) << '=' + << CGIEscapeRet(val.AsStringRef()); + } else { + ret << CGIEscapeRet(key.AsStringRef()) << '='; + } + } + break; + } + default: + Y_ABORT("Current first parameter type is not yet implemented"); + } + return valueBuilder->NewString(ret); + } + + bool TBuildQueryString::DeclareSignature(const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + if (!userType) { + builder.SetError("Missing user type"); + return true; + } + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || !userTypeInspector.GetElementsCount()) { + builder.SetError("User type is not tuple"); + return true; + } + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, + userTypeInspector.GetElementType(0)); + if (!argsTypeInspector || !argsTypeInspector.GetElementsCount()) { + builder.SetError("Please provide at least one argument"); + return true; + } + const auto firstArgType = argsTypeInspector.GetElementType(0); + EFirstArgTypeId firstArgTypeId = EFirstArgTypeId::None; + + if (typeHelper->IsSameType(GetDictType(builder), firstArgType) || + typeHelper->IsSameType(GetDictType(builder, true), firstArgType)) { + firstArgTypeId = EFirstArgTypeId::Dict; + } else if (typeHelper->IsSameType(GetListType(builder), firstArgType) || + typeHelper->IsSameType(GetListType(builder, true), firstArgType) || + typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyList) + { + firstArgTypeId = EFirstArgTypeId::List; + } else if (typeHelper->IsSameType(GetFlattenDictType(builder), firstArgType) || + typeHelper->IsSameType(GetFlattenDictType(builder, true), firstArgType) || + typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyDict) + { + firstArgTypeId = EFirstArgTypeId::FlattenDict; + } + if (firstArgTypeId != EFirstArgTypeId::None) { + builder.Returns<TQueryStr>().OptionalArgs(1); + auto args = builder.Args(); + args->Add(firstArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); + args->Add<TSeparatorNArg>().Done(); + if (!typesOnly) { + builder.Implementation(new TBuildQueryString(builder.GetSourcePosition(), + firstArgTypeId)); + } + } else { + TStringBuilder sb; + sb << "Unsupported first argument type: "; + TTypePrinter(*typeHelper, firstArgType).Out(sb.Out); + builder.SetError(sb); + } + return true; + } + return false; + } +} diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.h b/yql/essentials/udfs/common/url_base/lib/url_query.h new file mode 100644 index 00000000000..552b8527823 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/url_query.h @@ -0,0 +1,134 @@ +#pragma once + +#include <yql/essentials/public/udf/udf_helpers.h> + +namespace NUrlUdf { + using namespace NYql::NUdf; + + struct TQueryStringConv : public TBoxedValue { + protected: + static constexpr char Separator[] = "Separator"; + + using TQueryStr = char*; + using TSeparatorNArg = TNamedArg<TQueryStr, Separator>; + + static inline TType* GetListType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + auto tupleType = optional ? + builder.Tuple()->Add<TQueryStr>().Add(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.Tuple()->Add<TQueryStr>().Add<TQueryStr>().Build(); + return builder.List()->Item(tupleType).Build(); + } + + static inline TType* GetDictType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + auto listType = optional ? + builder.List()->Item(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.List()->Item<TQueryStr>().Build(); + return builder.Dict()->Key<TQueryStr>().Value(listType).Build(); + } + + static inline TType* GetFlattenDictType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + return optional ? + builder.Dict()->Key<TQueryStr>().Value(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.Dict()->Key<TQueryStr>().Value<TQueryStr>().Build(); + } + }; + + struct TQueryStringParse: public TQueryStringConv { + explicit TQueryStringParse(TSourcePosition&& pos) : Pos_(std::move(pos)) {} + + protected: + static constexpr char KeepBlankValues[] = "KeepBlankValues"; + static constexpr char Strict[] = "Strict"; + static constexpr char MaxFields[] = "MaxFields"; + + using TKeepBlankValuesNArg = TNamedArg<bool, KeepBlankValues>; + using TStrictNArg = TNamedArg<bool, Strict>; + using TMaxFieldsNArg = TNamedArg<ui32, MaxFields>; + + static void MakeSignature(IFunctionTypeInfoBuilder& builder, const TType* retType); + + std::vector<std::pair<TString, TString>> + RunImpl(const TUnboxedValuePod* args) const; + + private: + TSourcePosition Pos_; + }; + + struct TQueryStringToList : public TQueryStringParse { + explicit TQueryStringToList(TSourcePosition&& pos) + : TQueryStringParse(std::forward<TSourcePosition>(pos)) {} + + static const TStringRef& Name() { + static const auto name = TStringRef::Of("QueryStringToList"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + }; + + struct TQueryStringToDict : public TQueryStringParse { + explicit TQueryStringToDict(TType* dictType, TSourcePosition&& pos) + : TQueryStringParse(std::move(pos)) + , DictType_(dictType) + {} + + static const TStringRef& Name() { + static const auto name = TStringRef::Of("QueryStringToDict"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + + private: + TType* DictType_; + }; + + class TBuildQueryString : public TQueryStringConv { + TSourcePosition Pos_; + enum class EFirstArgTypeId { + None, + Dict, + FlattenDict, + List, + } FirstArgTypeId_; + + public: + typedef bool TTypeAwareMarker; + + explicit TBuildQueryString(TSourcePosition&& pos, EFirstArgTypeId firstArgTypeId) + : Pos_(std::move(pos)) + , FirstArgTypeId_(firstArgTypeId) + {} + + static const TStringRef& Name() { + static const auto name = TStringRef::Of("BuildQueryString"); + return name; + } + + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + + static bool DeclareSignature(const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + }; +} diff --git a/yql/essentials/udfs/common/url_base/lib/ya.make b/yql/essentials/udfs/common/url_base/lib/ya.make new file mode 100644 index 00000000000..1d9cfa12d01 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/lib/ya.make @@ -0,0 +1,27 @@ +LIBRARY() + +YQL_ABI_VERSION( + 2 + 37 + 0 +) + +SRCS( + url_base_udf.cpp + url_parse.cpp + url_query.cpp +) + +PEERDIR( + library/cpp/charset + library/cpp/string_utils/quote + library/cpp/string_utils/url + library/cpp/tld + library/cpp/unicode/punycode + library/cpp/uri + yql/essentials/public/udf + yql/essentials/public/udf/arrow + contrib/libs/apache/arrow +) + +END() diff --git a/yql/essentials/udfs/common/url_base/test/canondata/result.json b/yql/essentials/udfs/common/url_base/test/canondata/result.json new file mode 100644 index 00000000000..98e905ecde1 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/result.json @@ -0,0 +1,47 @@ +{ + "test.test[BlockPunycode]": [ + { + "uri": "file://test.test_BlockPunycode_/results.txt" + } + ], + "test.test[BlockTld]": [ + { + "uri": "file://test.test_BlockTld_/results.txt" + } + ], + "test.test[BlockUrl]": [ + { + "uri": "file://test.test_BlockUrl_/results.txt" + } + ], + "test.test[Punycode]": [ + { + "uri": "file://test.test_Punycode_/results.txt" + } + ], + "test.test[Tld]": [ + { + "uri": "file://test.test_Tld_/results.txt" + } + ], + "test.test[UrlQueryMaxFieldsErr]": [ + { + "uri": "file://test.test_UrlQueryMaxFieldsErr_/extracted" + } + ], + "test.test[UrlQueryStrictErr]": [ + { + "uri": "file://test.test_UrlQueryStrictErr_/extracted" + } + ], + "test.test[UrlQuery]": [ + { + "uri": "file://test.test_UrlQuery_/results.txt" + } + ], + "test.test[Url]": [ + { + "uri": "file://test.test_Url_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt new file mode 100644 index 00000000000..69224c4ac6d --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockPunycode_/results.txt @@ -0,0 +1,106 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "hostname_utf"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "punycode_hostname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "hostname_utf_forced"; + [ + "DataType"; + "String" + ] + ]; + [ + "punycode_hostname_forced"; + [ + "DataType"; + "String" + ] + ]; + [ + "can_be_punycode"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "ab\xC3\246cd\xC3\266ef"; + #; + #; + "ab\xC3\246cd\xC3\266ef"; + "xn--abcdef-qua4k"; + %false + ]; + [ + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"; + #; + #; + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"; + "xn--d1acpjx3f.xn--p1ag"; + %false + ]; + [ + "yandex.ru"; + [ + "yandex.ru" + ]; + [ + "yandex.ru" + ]; + "yandex.ru"; + "yandex.ru"; + %false + ]; + [ + "xn--d1acpjx3f.xn--p1ag"; + [ + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83" + ]; + [ + "xn--d1acpjx3f.xn--p1ag" + ]; + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"; + "xn--d1acpjx3f.xn--p1ag"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt new file mode 100644 index 00000000000..f45bb011da2 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockTld_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "tld"; + [ + "DataType"; + "String" + ] + ]; + [ + "known"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "well_known"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "ru"; + %true; + %true + ]; + [ + "123"; + %false; + %false + ]; + [ + "yandex"; + %true; + %false + ]; + [ + "sdfsdfsdf"; + %false; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt new file mode 100644 index 00000000000..7b5257063b6 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_BlockUrl_/results.txt @@ -0,0 +1,1212 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "encode"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "decode"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "param"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_qs_and_fragment"; + [ + "DataType"; + "String" + ] + ]; + [ + "host"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_www"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_www2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "tld"; + [ + "DataType"; + "String" + ] + ]; + [ + "punycode"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_scheme"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "host_port"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "scheme_host"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "scheme_host_port"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "tail"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "path"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "fragment"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "port"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "domain0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "domain1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "domain3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "domain_level"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "norm"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "http://"; + [ + "http://" + ]; + [ + "http://" + ]; + #; + "http://"; + #; + #; + #; + ""; + [ + "http://" + ]; + #; + #; + [ + "http://" + ]; + [ + "http://" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + #; + #; + "1"; + # + ]; + [ + "http://lenta.ru"; + [ + "http://lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + #; + "http://lenta.ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + "ru"; + [ + "http://lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "http://lenta.ru/" + ] + ]; + [ + "http://someone.livejournal.com/blog"; + [ + "http://someone.livejournal.com/blog" + ]; + [ + "http://someone.livejournal.com/blog" + ]; + #; + "http://someone.livejournal.com/blog"; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + "com"; + [ + "http://someone.livejournal.com/blog" + ]; + [ + "someone.livejournal.com/blog" + ]; + [ + "someone.livejournal.com" + ]; + [ + "http://someone.livejournal.com" + ]; + [ + "http://someone.livejournal.com" + ]; + [ + "/blog" + ]; + [ + "/blog" + ]; + #; + [ + "80" + ]; + #; + [ + "com" + ]; + [ + "someone.livejournal.com" + ]; + "3"; + [ + "http://someone.livejournal.com/blog" + ] + ]; + [ + "http://bbc.co.uk/"; + [ + "http://bbc.co.uk/" + ]; + [ + "http://bbc.co.uk/" + ]; + #; + "http://bbc.co.uk/"; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + "uk"; + [ + "http://bbc.co.uk/" + ]; + [ + "bbc.co.uk/" + ]; + [ + "bbc.co.uk" + ]; + [ + "http://bbc.co.uk" + ]; + [ + "http://bbc.co.uk" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + [ + "uk" + ]; + [ + "bbc.co.uk" + ]; + "3"; + [ + "http://bbc.co.uk/" + ] + ]; + [ + "https://www.yandex.com.tr/search"; + [ + "https://www.yandex.com.tr/search" + ]; + [ + "https://www.yandex.com.tr/search" + ]; + #; + "https://www.yandex.com.tr/search"; + [ + "www.yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + "tr"; + [ + "https://www.yandex.com.tr/search" + ]; + [ + "www.yandex.com.tr/search" + ]; + [ + "www.yandex.com.tr" + ]; + [ + "https://www.yandex.com.tr" + ]; + [ + "https://www.yandex.com.tr" + ]; + [ + "/search" + ]; + [ + "/search" + ]; + #; + [ + "443" + ]; + #; + [ + "tr" + ]; + [ + "yandex.com.tr" + ]; + "4"; + [ + "https://www.yandex.com.tr/search" + ] + ]; + [ + "https://www2.yandex.com.tr/search"; + [ + "https://www2.yandex.com.tr/search" + ]; + [ + "https://www2.yandex.com.tr/search" + ]; + #; + "https://www2.yandex.com.tr/search"; + [ + "www2.yandex.com.tr" + ]; + [ + "www2.yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + "tr"; + [ + "https://www2.yandex.com.tr/search" + ]; + [ + "www2.yandex.com.tr/search" + ]; + [ + "www2.yandex.com.tr" + ]; + [ + "https://www2.yandex.com.tr" + ]; + [ + "https://www2.yandex.com.tr" + ]; + [ + "/search" + ]; + [ + "/search" + ]; + #; + [ + "443" + ]; + #; + [ + "tr" + ]; + [ + "yandex.com.tr" + ]; + "4"; + [ + "https://www2.yandex.com.tr/search" + ] + ]; + [ + "lenta.ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + #; + "lenta.ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + "ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + #; + #; + [ + "ru" + ]; + #; + "2"; + # + ]; + [ + "bbc.co.uk/news"; + [ + "bbc.co.uk/news" + ]; + [ + "bbc.co.uk/news" + ]; + #; + "bbc.co.uk/news"; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + "uk"; + [ + "bbc.co.uk/news" + ]; + [ + "bbc.co.uk/news" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "/news" + ]; + [ + "/news" + ]; + #; + #; + #; + [ + "uk" + ]; + [ + "bbc.co.uk" + ]; + "3"; + # + ]; + [ + "yandex.com.tr/maps?foo="; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "" + ]; + "yandex.com.tr/maps"; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + "tr"; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "/maps?foo=" + ]; + [ + "/maps" + ]; + #; + #; + #; + [ + "tr" + ]; + [ + "yandex.com.tr" + ]; + "3"; + # + ]; + [ + "someone.livejournal.com?foo=bar#top"; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "bar" + ]; + "someone.livejournal.com"; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + "com"; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "/?foo=bar#top" + ]; + [ + "/" + ]; + [ + "top" + ]; + #; + #; + [ + "com" + ]; + [ + "someone.livejournal.com" + ]; + "3"; + # + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n"; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + #; + "a.b.c.d.e.f.g.h.i.j.k"; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + "k"; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "/#l.m.n" + ]; + [ + "/" + ]; + [ + "l.m.n" + ]; + #; + #; + [ + "k" + ]; + [ + "i.j.k" + ]; + "11"; + # + ]; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "foo.tl.md/ \xD1\x8E\xD0\xBD\xD0\xB8\xD0\xBA\xD0\xBE\xD0\xB4" + ]; + #; + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + "md"; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + #; + #; + #; + [ + "md" + ]; + [ + "foo.tl.md" + ]; + "3"; + # + ]; + [ + "xn--d1acpjx3f.xn--p1ai"; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + #; + "xn--d1acpjx3f.xn--p1ai"; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + "xn--p1ai"; + [ + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x84" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + #; + #; + [ + "xn--p1ai" + ]; + #; + "2"; + # + ]; + [ + "https://ya.ru:80/search/?text=test&lr=213#top"; + [ + "https://ya.ru:80/search/?text=test&lr=213#top" + ]; + [ + "https://ya.ru:80/search/?text=test&lr=213#top" + ]; + #; + "https://ya.ru:80/search/"; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + "ru"; + [ + "https://ya.ru:80/search/?text=test&lr=213#top" + ]; + [ + "ya.ru:80/search/?text=test&lr=213#top" + ]; + [ + "ya.ru:80" + ]; + [ + "https://ya.ru" + ]; + [ + "https://ya.ru:80" + ]; + [ + "/search/?text=test&lr=213#top" + ]; + [ + "/search/" + ]; + [ + "top" + ]; + [ + "80" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "https://ya.ru:80/search/?text=test&lr=213" + ] + ]; + [ + "https://ya.ru/search/?text=%2B"; + [ + "https://ya.ru/search/?text=%2B" + ]; + [ + "https://ya.ru/search/?text=+" + ]; + #; + "https://ya.ru/search/"; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + "ru"; + [ + "https://ya.ru/search/?text=%2B" + ]; + [ + "ya.ru/search/?text=%2B" + ]; + [ + "ya.ru" + ]; + [ + "https://ya.ru" + ]; + [ + "https://ya.ru" + ]; + [ + "/search/?text=%2B" + ]; + [ + "/search/" + ]; + #; + [ + "443" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "https://ya.ru/search/?text=%2B" + ] + ]; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible"; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible" + ]; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible" + ]; + #; + "goal://market.yandex.ru/product-page_scroll-box_product_visible"; + [ + "market.yandex.ru" + ]; + [ + "market.yandex.ru" + ]; + [ + "market.yandex.ru" + ]; + "ru"; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible" + ]; + [ + "market.yandex.ru/product-page_scroll-box_product_visible" + ]; + [ + "market.yandex.ru" + ]; + [ + "goal://market.yandex.ru" + ]; + [ + "goal://market.yandex.ru" + ]; + [ + "/product-page_scroll-box_product_visible" + ]; + [ + "/product-page_scroll-box_product_visible" + ]; + #; + #; + #; + [ + "ru" + ]; + [ + "market.yandex.ru" + ]; + "3"; + # + ]; + [ + "Http://ya.ru"; + [ + "Http://ya.ru" + ]; + [ + "Http://ya.ru" + ]; + #; + "Http://ya.ru"; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + "ru"; + [ + "Http://ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "Http://ya.ru" + ]; + [ + "Http://ya.ru" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "http://ya.ru/" + ] + ]; + [ + "ftp://someone.livejournal.com:80/blog"; + [ + "ftp://someone.livejournal.com:80/blog" + ]; + [ + "ftp://someone.livejournal.com:80/blog" + ]; + #; + "ftp://someone.livejournal.com:80/blog"; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + "com"; + [ + "ftp://someone.livejournal.com:80/blog" + ]; + [ + "someone.livejournal.com:80/blog" + ]; + [ + "someone.livejournal.com:80" + ]; + [ + "ftp://someone.livejournal.com" + ]; + [ + "ftp://someone.livejournal.com:80" + ]; + [ + "/blog" + ]; + [ + "/blog" + ]; + #; + [ + "80" + ]; + #; + [ + "com" + ]; + [ + "someone.livejournal.com" + ]; + "3"; + [ + "ftp://someone.livejournal.com:80/blog" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt new file mode 100644 index 00000000000..69224c4ac6d --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Punycode_/results.txt @@ -0,0 +1,106 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "hostname_utf"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "punycode_hostname"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "hostname_utf_forced"; + [ + "DataType"; + "String" + ] + ]; + [ + "punycode_hostname_forced"; + [ + "DataType"; + "String" + ] + ]; + [ + "can_be_punycode"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "ab\xC3\246cd\xC3\266ef"; + #; + #; + "ab\xC3\246cd\xC3\266ef"; + "xn--abcdef-qua4k"; + %false + ]; + [ + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"; + #; + #; + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"; + "xn--d1acpjx3f.xn--p1ag"; + %false + ]; + [ + "yandex.ru"; + [ + "yandex.ru" + ]; + [ + "yandex.ru" + ]; + "yandex.ru"; + "yandex.ru"; + %false + ]; + [ + "xn--d1acpjx3f.xn--p1ag"; + [ + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83" + ]; + [ + "xn--d1acpjx3f.xn--p1ag" + ]; + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x83"; + "xn--d1acpjx3f.xn--p1ag"; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt new file mode 100644 index 00000000000..f45bb011da2 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Tld_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "tld"; + [ + "DataType"; + "String" + ] + ]; + [ + "known"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "well_known"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "ru"; + %true; + %true + ]; + [ + "123"; + %false; + %false + ]; + [ + "yandex"; + %true; + %false + ]; + [ + "sdfsdfsdf"; + %false; + %false + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted new file mode 100644 index 00000000000..e80a93b2997 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryMaxFieldsErr_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:4:10: Fatal: Max number of fields (2) exceeded: got 3 + Url::QueryStringToList("glfilter=78318%3A79492&glfilter=561%3A121037&hid=904", 2 AS MaxFields) + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted new file mode 100644 index 00000000000..6326332de1f --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQueryStrictErr_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Fatal: Execution + + <tmp_path>/program.sql:<main>:2:1: Fatal: Execution of node: Result + SELECT + ^ + <tmp_path>/program.sql:<main>:4:10: Fatal: Bad query field: "mistaken" + Url::QueryStringToList("glfilter=78318%3A79492&mistaken&glfilter=1&hid=904") + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt new file mode 100644 index 00000000000..303f480161c --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_UrlQuery_/results.txt @@ -0,0 +1,1112 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "base_list"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + [ + "base_list_build"; + [ + "DataType"; + "String" + ] + ]; + [ + "keep_blank_list"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + [ + "keep_blank_list_build"; + [ + "DataType"; + "String" + ] + ]; + [ + "sep_semicol_list"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + [ + "sep_semicol_list_build"; + [ + "DataType"; + "String" + ] + ]; + [ + "base_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ]; + [ + "base_dict_build"; + [ + "DataType"; + "String" + ] + ]; + [ + "keep_blank_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ]; + [ + "keep_blank_dict_build"; + [ + "DataType"; + "String" + ] + ]; + [ + "sep_semicol_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ]; + [ + "sep_semicol_dict_build"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0"; + [ + [ + "glfilter"; + "7893318:7949252" + ]; + [ + "glfilter"; + "5099461:12103637" + ]; + [ + "glfilter"; + "8292645:1" + ]; + [ + "hid"; + "90404" + ]; + [ + "onstock"; + "0" + ]; + [ + "local-offers-first"; + "0" + ] + ]; + "glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0"; + [ + [ + "glfilter"; + "7893318:7949252" + ]; + [ + "glfilter"; + "5099461:12103637" + ]; + [ + "glfilter"; + "8292645:1" + ]; + [ + "hid"; + "90404" + ]; + [ + "onstock"; + "0" + ]; + [ + "local-offers-first"; + "0" + ] + ]; + "glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0"; + [ + [ + "glfilter"; + "7893318:7949252&glfilter=5099461:12103637&glfilter=8292645:1&hid=90404&onstock=0&local-offers-first=0" + ] + ]; + "glfilter=7893318%3A7949252%26glfilter%3D5099461%3A12103637%26glfilter%3D8292645%3A1%26hid%3D90404%26onstock%3D0%26local-offers-first%3D0"; + [ + [ + "glfilter"; + [ + "7893318:7949252"; + "5099461:12103637"; + "8292645:1" + ] + ]; + [ + "hid"; + [ + "90404" + ] + ]; + [ + "onstock"; + [ + "0" + ] + ]; + [ + "local-offers-first"; + [ + "0" + ] + ] + ]; + "local-offers-first=0&onstock=0&hid=90404&glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1"; + [ + [ + "glfilter"; + [ + "7893318:7949252"; + "5099461:12103637"; + "8292645:1" + ] + ]; + [ + "hid"; + [ + "90404" + ] + ]; + [ + "onstock"; + [ + "0" + ] + ]; + [ + "local-offers-first"; + [ + "0" + ] + ] + ]; + "local-offers-first=0&onstock=0&hid=90404&glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1"; + [ + [ + "glfilter"; + [ + "7893318:7949252&glfilter=5099461:12103637&glfilter=8292645:1&hid=90404&onstock=0&local-offers-first=0" + ] + ] + ]; + "glfilter=7893318%3A7949252%26glfilter%3D5099461%3A12103637%26glfilter%3D8292645%3A1%26hid%3D90404%26onstock%3D0%26local-offers-first%3D0" + ]; + [ + ""; + []; + ""; + []; + ""; + []; + ""; + []; + ""; + []; + ""; + []; + "" + ]; + [ + "&"; + []; + ""; + []; + ""; + []; + ""; + []; + ""; + []; + ""; + []; + "" + ]; + [ + "&&"; + []; + ""; + []; + ""; + []; + ""; + []; + ""; + []; + ""; + []; + "" + ]; + [ + "="; + []; + ""; + [ + [ + ""; + "" + ] + ]; + "="; + []; + ""; + []; + ""; + [ + [ + ""; + [ + "" + ] + ] + ]; + "="; + []; + "" + ]; + [ + "&="; + []; + ""; + [ + [ + ""; + "" + ] + ]; + "="; + []; + ""; + []; + ""; + [ + [ + ""; + [ + "" + ] + ] + ]; + "="; + []; + "" + ]; + [ + "=&"; + []; + ""; + [ + [ + ""; + "" + ] + ]; + "="; + [ + [ + ""; + "&" + ] + ]; + "=%26"; + []; + ""; + [ + [ + ""; + [ + "" + ] + ] + ]; + "="; + [ + [ + ""; + [ + "&" + ] + ] + ]; + "=%26" + ]; + [ + "&=="; + [ + [ + ""; + "=" + ] + ]; + "=%3D"; + [ + [ + ""; + "=" + ] + ]; + "=%3D"; + [ + [ + "&"; + "=" + ] + ]; + "%26=%3D"; + [ + [ + ""; + [ + "=" + ] + ] + ]; + "=%3D"; + [ + [ + ""; + [ + "=" + ] + ] + ]; + "=%3D"; + [ + [ + "&"; + [ + "=" + ] + ] + ]; + "%26=%3D" + ]; + [ + "&==&"; + [ + [ + ""; + "=" + ] + ]; + "=%3D"; + [ + [ + ""; + "=" + ] + ]; + "=%3D"; + [ + [ + "&"; + "=&" + ] + ]; + "%26=%3D%26"; + [ + [ + ""; + [ + "=" + ] + ] + ]; + "=%3D"; + [ + [ + ""; + [ + "=" + ] + ] + ]; + "=%3D"; + [ + [ + "&"; + [ + "=&" + ] + ] + ]; + "%26=%3D%26" + ]; + [ + "==&"; + [ + [ + ""; + "=" + ] + ]; + "=%3D"; + [ + [ + ""; + "=" + ] + ]; + "=%3D"; + [ + [ + ""; + "=&" + ] + ]; + "=%3D%26"; + [ + [ + ""; + [ + "=" + ] + ] + ]; + "=%3D"; + [ + [ + ""; + [ + "=" + ] + ] + ]; + "=%3D"; + [ + [ + ""; + [ + "=&" + ] + ] + ]; + "=%3D%26" + ]; + [ + "=&="; + []; + ""; + [ + [ + ""; + "" + ]; + [ + ""; + "" + ] + ]; + "=&="; + [ + [ + ""; + "&=" + ] + ]; + "=%26%3D"; + []; + ""; + [ + [ + ""; + [ + ""; + "" + ] + ] + ]; + "=&="; + [ + [ + ""; + [ + "&=" + ] + ] + ]; + "=%26%3D" + ]; + [ + "=a"; + [ + [ + ""; + "a" + ] + ]; + "=a"; + [ + [ + ""; + "a" + ] + ]; + "=a"; + [ + [ + ""; + "a" + ] + ]; + "=a"; + [ + [ + ""; + [ + "a" + ] + ] + ]; + "=a"; + [ + [ + ""; + [ + "a" + ] + ] + ]; + "=a"; + [ + [ + ""; + [ + "a" + ] + ] + ]; + "=a" + ]; + [ + "a"; + []; + ""; + [ + [ + "a"; + "" + ] + ]; + "a="; + []; + ""; + []; + ""; + [ + [ + "a"; + [ + "" + ] + ] + ]; + "a="; + []; + "" + ]; + [ + "a="; + []; + ""; + [ + [ + "a"; + "" + ] + ]; + "a="; + []; + ""; + []; + ""; + [ + [ + "a"; + [ + "" + ] + ] + ]; + "a="; + []; + "" + ]; + [ + "&a=b"; + [ + [ + "a"; + "b" + ] + ]; + "a=b"; + [ + [ + "a"; + "b" + ] + ]; + "a=b"; + [ + [ + "&a"; + "b" + ] + ]; + "%26a=b"; + [ + [ + "a"; + [ + "b" + ] + ] + ]; + "a=b"; + [ + [ + "a"; + [ + "b" + ] + ] + ]; + "a=b"; + [ + [ + "&a"; + [ + "b" + ] + ] + ]; + "%26a=b" + ]; + [ + "a=a+b&b=b+c"; + [ + [ + "a"; + "a b" + ]; + [ + "b"; + "b c" + ] + ]; + "a=a+b&b=b+c"; + [ + [ + "a"; + "a b" + ]; + [ + "b"; + "b c" + ] + ]; + "a=a+b&b=b+c"; + [ + [ + "a"; + "a b&b=b c" + ] + ]; + "a=a+b%26b%3Db+c"; + [ + [ + "a"; + [ + "a b" + ] + ]; + [ + "b"; + [ + "b c" + ] + ] + ]; + "b=b+c&a=a+b"; + [ + [ + "a"; + [ + "a b" + ] + ]; + [ + "b"; + [ + "b c" + ] + ] + ]; + "b=b+c&a=a+b"; + [ + [ + "a"; + [ + "a b&b=b c" + ] + ] + ]; + "a=a+b%26b%3Db+c" + ]; + [ + "a=a+b=&b=b+=c"; + [ + [ + "a"; + "a b=" + ]; + [ + "b"; + "b =c" + ] + ]; + "a=a+b%3D&b=b+%3Dc"; + [ + [ + "a"; + "a b=" + ]; + [ + "b"; + "b =c" + ] + ]; + "a=a+b%3D&b=b+%3Dc"; + [ + [ + "a"; + "a b=&b=b =c" + ] + ]; + "a=a+b%3D%26b%3Db+%3Dc"; + [ + [ + "a"; + [ + "a b=" + ] + ]; + [ + "b"; + [ + "b =c" + ] + ] + ]; + "b=b+%3Dc&a=a+b%3D"; + [ + [ + "a"; + [ + "a b=" + ] + ]; + [ + "b"; + [ + "b =c" + ] + ] + ]; + "b=b+%3Dc&a=a+b%3D"; + [ + [ + "a"; + [ + "a b=&b=b =c" + ] + ] + ]; + "a=a+b%3D%26b%3Db+%3Dc" + ]; + [ + "a=1&a=2"; + [ + [ + "a"; + "1" + ]; + [ + "a"; + "2" + ] + ]; + "a=1&a=2"; + [ + [ + "a"; + "1" + ]; + [ + "a"; + "2" + ] + ]; + "a=1&a=2"; + [ + [ + "a"; + "1&a=2" + ] + ]; + "a=1%26a%3D2"; + [ + [ + "a"; + [ + "1"; + "2" + ] + ] + ]; + "a=1&a=2"; + [ + [ + "a"; + [ + "1"; + "2" + ] + ] + ]; + "a=1&a=2"; + [ + [ + "a"; + [ + "1&a=2" + ] + ] + ]; + "a=1%26a%3D2" + ]; + [ + ";a=b"; + [ + [ + ";a"; + "b" + ] + ]; + ";a=b"; + [ + [ + ";a"; + "b" + ] + ]; + ";a=b"; + [ + [ + "a"; + "b" + ] + ]; + "a=b"; + [ + [ + ";a"; + [ + "b" + ] + ] + ]; + ";a=b"; + [ + [ + ";a"; + [ + "b" + ] + ] + ]; + ";a=b"; + [ + [ + "a"; + [ + "b" + ] + ] + ]; + "a=b" + ]; + [ + "a=a+b;b=b+c"; + [ + [ + "a"; + "a b;b=b c" + ] + ]; + "a=a+b;b%3Db+c"; + [ + [ + "a"; + "a b;b=b c" + ] + ]; + "a=a+b;b%3Db+c"; + [ + [ + "a"; + "a b" + ]; + [ + "b"; + "b c" + ] + ]; + "a=a+b;b=b+c"; + [ + [ + "a"; + [ + "a b;b=b c" + ] + ] + ]; + "a=a+b;b%3Db+c"; + [ + [ + "a"; + [ + "a b;b=b c" + ] + ] + ]; + "a=a+b;b%3Db+c"; + [ + [ + "a"; + [ + "a b" + ] + ]; + [ + "b"; + [ + "b c" + ] + ] + ]; + "b=b+c;a=a+b" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt new file mode 100644 index 00000000000..09115e94005 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/canondata/test.test_Url_/results.txt @@ -0,0 +1,1640 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "value"; + [ + "DataType"; + "String" + ] + ]; + [ + "parse"; + [ + "StructType"; + [ + [ + "Frag"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "Host"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "ParseError"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "Pass"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "Path"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "Port"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "Query"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "Scheme"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "User"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + [ + "significant_domain"; + [ + "DataType"; + "String" + ] + ]; + [ + "tl_is_significant"; + [ + "DataType"; + "String" + ] + ]; + [ + "tld"; + [ + "DataType"; + "String" + ] + ]; + [ + "punycode"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "encode"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "decode"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "param"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_qs_and_fragment"; + [ + "DataType"; + "String" + ] + ]; + [ + "cut_www"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_www2"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "host"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "host_port"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "scheme_host"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "scheme_host_port"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "cut_scheme"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "tail"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "path"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "fragment"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "port"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "domain0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "domain1"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "domain3"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "domain_level"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "norm"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "http://"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + ""; + ""; + ""; + [ + "http://" + ]; + [ + "http://" + ]; + [ + "http://" + ]; + #; + "http://"; + #; + #; + #; + #; + [ + "http://" + ]; + [ + "http://" + ]; + #; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + #; + #; + "1"; + # + ]; + [ + "http://lenta.ru"; + [ + #; + [ + "lenta.ru" + ]; + #; + #; + [ + "/" + ]; + #; + #; + [ + "http" + ]; + # + ]; + "lenta.ru"; + "lenta.ru"; + "ru"; + [ + "http://lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + #; + "http://lenta.ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + [ + "http://lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "http://lenta.ru/" + ] + ]; + [ + "http://someone.livejournal.com/blog"; + [ + #; + [ + "someone.livejournal.com" + ]; + #; + #; + [ + "/blog" + ]; + #; + #; + [ + "http" + ]; + # + ]; + "livejournal.com"; + "livejournal.com"; + "com"; + [ + "http://someone.livejournal.com/blog" + ]; + [ + "http://someone.livejournal.com/blog" + ]; + [ + "http://someone.livejournal.com/blog" + ]; + #; + "http://someone.livejournal.com/blog"; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "http://someone.livejournal.com" + ]; + [ + "http://someone.livejournal.com" + ]; + [ + "someone.livejournal.com/blog" + ]; + [ + "/blog" + ]; + [ + "/blog" + ]; + #; + [ + "80" + ]; + #; + [ + "com" + ]; + [ + "someone.livejournal.com" + ]; + "3"; + [ + "http://someone.livejournal.com/blog" + ] + ]; + [ + "http://bbc.co.uk/"; + [ + #; + [ + "bbc.co.uk" + ]; + #; + #; + [ + "/" + ]; + #; + #; + [ + "http" + ]; + # + ]; + "bbc.co.uk"; + "co.uk"; + "uk"; + [ + "http://bbc.co.uk/" + ]; + [ + "http://bbc.co.uk/" + ]; + [ + "http://bbc.co.uk/" + ]; + #; + "http://bbc.co.uk/"; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "http://bbc.co.uk" + ]; + [ + "http://bbc.co.uk" + ]; + [ + "bbc.co.uk/" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + [ + "uk" + ]; + [ + "bbc.co.uk" + ]; + "3"; + [ + "http://bbc.co.uk/" + ] + ]; + [ + "https://www.yandex.com.tr/search"; + [ + #; + [ + "www.yandex.com.tr" + ]; + #; + #; + [ + "/search" + ]; + #; + #; + [ + "https" + ]; + # + ]; + "yandex.com.tr"; + "com.tr"; + "tr"; + [ + "https://www.yandex.com.tr/search" + ]; + [ + "https://www.yandex.com.tr/search" + ]; + [ + "https://www.yandex.com.tr/search" + ]; + #; + "https://www.yandex.com.tr/search"; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "www.yandex.com.tr" + ]; + [ + "www.yandex.com.tr" + ]; + [ + "https://www.yandex.com.tr" + ]; + [ + "https://www.yandex.com.tr" + ]; + [ + "www.yandex.com.tr/search" + ]; + [ + "/search" + ]; + [ + "/search" + ]; + #; + [ + "443" + ]; + #; + [ + "tr" + ]; + [ + "yandex.com.tr" + ]; + "4"; + [ + "https://www.yandex.com.tr/search" + ] + ]; + [ + "https://www2.yandex.com.tr/search"; + [ + #; + [ + "www2.yandex.com.tr" + ]; + #; + #; + [ + "/search" + ]; + #; + #; + [ + "https" + ]; + # + ]; + "yandex.com.tr"; + "com.tr"; + "tr"; + [ + "https://www2.yandex.com.tr/search" + ]; + [ + "https://www2.yandex.com.tr/search" + ]; + [ + "https://www2.yandex.com.tr/search" + ]; + #; + "https://www2.yandex.com.tr/search"; + [ + "www2.yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "www2.yandex.com.tr" + ]; + [ + "www2.yandex.com.tr" + ]; + [ + "https://www2.yandex.com.tr" + ]; + [ + "https://www2.yandex.com.tr" + ]; + [ + "www2.yandex.com.tr/search" + ]; + [ + "/search" + ]; + [ + "/search" + ]; + #; + [ + "443" + ]; + #; + [ + "tr" + ]; + [ + "yandex.com.tr" + ]; + "4"; + [ + "https://www2.yandex.com.tr/search" + ] + ]; + [ + "lenta.ru"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "lenta.ru"; + "lenta.ru"; + "ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + #; + "lenta.ru"; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "lenta.ru" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + #; + #; + [ + "ru" + ]; + #; + "2"; + # + ]; + [ + "bbc.co.uk/news"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "bbc.co.uk"; + "co.uk"; + "uk"; + [ + "bbc.co.uk/news" + ]; + [ + "bbc.co.uk/news" + ]; + [ + "bbc.co.uk/news" + ]; + #; + "bbc.co.uk/news"; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk" + ]; + [ + "bbc.co.uk/news" + ]; + [ + "/news" + ]; + [ + "/news" + ]; + #; + #; + #; + [ + "uk" + ]; + [ + "bbc.co.uk" + ]; + "3"; + # + ]; + [ + "yandex.com.tr/maps?foo="; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "yandex.com.tr"; + "com.tr"; + "tr"; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "" + ]; + "yandex.com.tr/maps"; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr" + ]; + [ + "yandex.com.tr/maps?foo=" + ]; + [ + "/maps?foo=" + ]; + [ + "/maps" + ]; + #; + #; + #; + [ + "tr" + ]; + [ + "yandex.com.tr" + ]; + "3"; + # + ]; + [ + "someone.livejournal.com?foo=bar#top"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "livejournal.com"; + "livejournal.com"; + "com"; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "bar" + ]; + "someone.livejournal.com"; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com?foo=bar#top" + ]; + [ + "/?foo=bar#top" + ]; + [ + "/" + ]; + [ + "top" + ]; + #; + #; + [ + "com" + ]; + [ + "someone.livejournal.com" + ]; + "3"; + # + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "j.k"; + "j.k"; + "k"; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + #; + "a.b.c.d.e.f.g.h.i.j.k"; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k" + ]; + [ + "a.b.c.d.e.f.g.h.i.j.k#l.m.n" + ]; + [ + "/#l.m.n" + ]; + [ + "/" + ]; + [ + "l.m.n" + ]; + #; + #; + [ + "k" + ]; + [ + "i.j.k" + ]; + "11"; + # + ]; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "tl.md"; + "foo.tl.md"; + "md"; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "foo.tl.md/ \xD1\x8E\xD0\xBD\xD0\xB8\xD0\xBA\xD0\xBE\xD0\xB4" + ]; + #; + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md" + ]; + [ + "foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + [ + "/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4" + ]; + #; + #; + #; + [ + "md" + ]; + [ + "foo.tl.md" + ]; + "3"; + # + ]; + [ + "xn--d1acpjx3f.xn--p1ai"; + [ + #; + #; + [ + "ParsedBadFormat" + ]; + #; + #; + #; + #; + #; + # + ]; + "xn--d1acpjx3f.xn--p1ai"; + "xn--d1acpjx3f.xn--p1ai"; + "xn--p1ai"; + [ + "\xD1\x8F\xD0\xBD\xD0\xB4\xD0\xB5\xD0\xBA\xD1\x81.\xD1\x80\xD1\x84" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + #; + "xn--d1acpjx3f.xn--p1ai"; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "xn--d1acpjx3f.xn--p1ai" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + #; + #; + [ + "xn--p1ai" + ]; + #; + "2"; + # + ]; + [ + "https://ya.ru:80/search/?text=test&lr=213#top"; + [ + [ + "top" + ]; + [ + "ya.ru" + ]; + #; + #; + [ + "/search/" + ]; + [ + "80" + ]; + [ + "text=test&lr=213" + ]; + [ + "https" + ]; + # + ]; + "ya.ru"; + "ya.ru"; + "ru"; + [ + "https://ya.ru:80/search/?text=test&lr=213#top" + ]; + [ + "https://ya.ru:80/search/?text=test&lr=213#top" + ]; + [ + "https://ya.ru:80/search/?text=test&lr=213#top" + ]; + #; + "https://ya.ru:80/search/"; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru:80" + ]; + [ + "https://ya.ru" + ]; + [ + "https://ya.ru:80" + ]; + [ + "ya.ru:80/search/?text=test&lr=213#top" + ]; + [ + "/search/?text=test&lr=213#top" + ]; + [ + "/search/" + ]; + [ + "top" + ]; + [ + "80" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "https://ya.ru:80/search/?text=test&lr=213" + ] + ]; + [ + "https://ya.ru/search/?text=%2B"; + [ + #; + [ + "ya.ru" + ]; + #; + #; + [ + "/search/" + ]; + #; + [ + "text=%2B" + ]; + [ + "https" + ]; + # + ]; + "ya.ru"; + "ya.ru"; + "ru"; + [ + "https://ya.ru/search/?text=%2B" + ]; + [ + "https://ya.ru/search/?text=%2B" + ]; + [ + "https://ya.ru/search/?text=+" + ]; + #; + "https://ya.ru/search/"; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "https://ya.ru" + ]; + [ + "https://ya.ru" + ]; + [ + "ya.ru/search/?text=%2B" + ]; + [ + "/search/?text=%2B" + ]; + [ + "/search/" + ]; + #; + [ + "443" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "https://ya.ru/search/?text=%2B" + ] + ]; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible"; + [ + #; + #; + [ + "ParsedBadScheme" + ]; + #; + #; + #; + #; + #; + # + ]; + "yandex.ru"; + "yandex.ru"; + "ru"; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible" + ]; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible" + ]; + [ + "goal://market.yandex.ru/product-page_scroll-box_product_visible" + ]; + #; + "goal://market.yandex.ru/product-page_scroll-box_product_visible"; + [ + "market.yandex.ru" + ]; + [ + "market.yandex.ru" + ]; + [ + "market.yandex.ru" + ]; + [ + "market.yandex.ru" + ]; + [ + "goal://market.yandex.ru" + ]; + [ + "goal://market.yandex.ru" + ]; + [ + "market.yandex.ru/product-page_scroll-box_product_visible" + ]; + [ + "/product-page_scroll-box_product_visible" + ]; + [ + "/product-page_scroll-box_product_visible" + ]; + #; + #; + #; + [ + "ru" + ]; + [ + "market.yandex.ru" + ]; + "3"; + # + ]; + [ + "Http://ya.ru"; + [ + #; + [ + "ya.ru" + ]; + #; + #; + [ + "/" + ]; + #; + #; + [ + "http" + ]; + # + ]; + "ya.ru"; + "ya.ru"; + "ru"; + [ + "Http://ya.ru" + ]; + [ + "Http://ya.ru" + ]; + [ + "Http://ya.ru" + ]; + #; + "Http://ya.ru"; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "Http://ya.ru" + ]; + [ + "Http://ya.ru" + ]; + [ + "ya.ru" + ]; + [ + "/" + ]; + [ + "/" + ]; + #; + [ + "80" + ]; + #; + [ + "ru" + ]; + #; + "2"; + [ + "http://ya.ru/" + ] + ]; + [ + "ftp://someone.livejournal.com:80/blog"; + [ + #; + [ + "someone.livejournal.com" + ]; + #; + #; + [ + "/blog" + ]; + [ + "80" + ]; + #; + [ + "ftp" + ]; + # + ]; + "livejournal.com"; + "livejournal.com"; + "com"; + [ + "ftp://someone.livejournal.com:80/blog" + ]; + [ + "ftp://someone.livejournal.com:80/blog" + ]; + [ + "ftp://someone.livejournal.com:80/blog" + ]; + #; + "ftp://someone.livejournal.com:80/blog"; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com" + ]; + [ + "someone.livejournal.com:80" + ]; + [ + "ftp://someone.livejournal.com" + ]; + [ + "ftp://someone.livejournal.com:80" + ]; + [ + "someone.livejournal.com:80/blog" + ]; + [ + "/blog" + ]; + [ + "/blog" + ]; + #; + [ + "80" + ]; + #; + [ + "com" + ]; + [ + "someone.livejournal.com" + ]; + "3"; + [ + "ftp://someone.livejournal.com:80/blog" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in new file mode 100644 index 00000000000..50a35c68766 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.in @@ -0,0 +1,4 @@ +{"key"="";"subkey"="";"value"="abæcdöef"}; +{"key"="";"subkey"="";"value"="яндекс.ру"}; +{"key"="";"subkey"="";"value"="yandex.ru"}; +{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ag"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql new file mode 100644 index 00000000000..3dcee5ca489 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/BlockPunycode.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +PRAGMA UseBlocks; +SELECT + value, + Url::PunycodeToHostName(value) AS hostname_utf, + Url::HostNameToPunycode(Url::PunycodeToHostName(value)) as punycode_hostname, + Url::ForcePunycodeToHostName(value) AS hostname_utf_forced, + Url::ForceHostNameToPunycode(Url::ForcePunycodeToHostName(value)) as punycode_hostname_forced, + Url::CanBePunycodeHostName(value) as can_be_punycode +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockTld.in b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.in new file mode 100644 index 00000000000..d98c9c25223 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.in @@ -0,0 +1,4 @@ +{"key"="";"subkey"="";"value"="ru"}; +{"key"="";"subkey"="";"value"="123"}; +{"key"="";"subkey"="";"value"="yandex"}; +{"key"="";"subkey"="";"value"="sdfsdfsdf"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql new file mode 100644 index 00000000000..7af5d71b1cb --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/BlockTld.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ +PRAGMA UseBlocks; +SELECT + Url::GetTLD(value) AS tld, + Url::IsKnownTLD(value) AS known, + Url::IsWellKnownTLD(value) AS well_known +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in new file mode 100644 index 00000000000..94f998d48f6 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.in @@ -0,0 +1,18 @@ +{"key"="";"subkey"="";"value"="http://"}; +{"key"="";"subkey"="";"value"="http://lenta.ru"}; +{"key"="";"subkey"="";"value"="http://someone.livejournal.com/blog"}; +{"key"="";"subkey"="";"value"="http://bbc.co.uk/"}; +{"key"="";"subkey"="";"value"="https://www.yandex.com.tr/search"}; +{"key"="";"subkey"="";"value"="https://www2.yandex.com.tr/search"}; +{"key"="";"subkey"="";"value"="lenta.ru"}; +{"key"="";"subkey"="";"value"="bbc.co.uk/news"}; +{"key"="";"subkey"="";"value"="yandex.com.tr/maps?foo="}; +{"key"="";"subkey"="";"value"="someone.livejournal.com?foo=bar#top"}; +{"key"="";"subkey"="";"value"="a.b.c.d.e.f.g.h.i.j.k#l.m.n"}; +{"key"="";"subkey"="";"value"="foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"}; +{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ai"}; +{"key"="";"subkey"="";"value"="https://ya.ru:80/search/?text=test&lr=213#top"}; +{"key"="";"subkey"="";"value"="https://ya.ru/search/?text=%2B"}; +{"key"="";"subkey"="";"value"="goal://market.yandex.ru/product-page_scroll-box_product_visible"}; +{"key"="";"subkey"="";"value"="Http://ya.ru"}; +{"key"="";"subkey"="";"value"="ftp://someone.livejournal.com:80/blog"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql new file mode 100644 index 00000000000..9b1726c1dab --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/BlockUrl.sql @@ -0,0 +1,27 @@ +/* syntax version 1 */ +PRAGMA UseBlocks; +SELECT + value, + Url::Encode(value) AS encode, + Url::Decode(value) AS decode, + Url::GetCGIParam(value, "foo") AS param, + Url::CutQueryStringAndFragment(value) AS cut_qs_and_fragment, + Url::GetHost(value) as host, + Url::CutWWW(Url::GetHost(value)) AS cut_www, + Url::CutWWW2(Url::GetHost(value)) AS cut_www2, + Url::GetTLD(value) AS tld, + Url::PunycodeToHostName(value) AS punycode, + Url::CutScheme(value) AS cut_scheme, + Url::GetHostPort(value) as host_port, + Url::GetSchemeHost(value) AS scheme_host, + Url::GetSchemeHostPort(value) AS scheme_host_port, + Url::GetTail(value) AS tail, + Url::GetPath(value) AS path, + Url::GetFragment(value) AS fragment, + Url::GetPort(value) AS port, + Url::GetDomain(value, 0) as domain0, + Url::GetDomain(value, 1) as domain1, + Url::GetDomain(value, 3) as domain3, + Url::GetDomainLevel(value) as domain_level, + Url::Normalize(value) as norm +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/Punycode.in b/yql/essentials/udfs/common/url_base/test/cases/Punycode.in new file mode 100644 index 00000000000..50a35c68766 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/Punycode.in @@ -0,0 +1,4 @@ +{"key"="";"subkey"="";"value"="abæcdöef"}; +{"key"="";"subkey"="";"value"="яндекс.ру"}; +{"key"="";"subkey"="";"value"="yandex.ru"}; +{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ag"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/Punycode.sql b/yql/essentials/udfs/common/url_base/test/cases/Punycode.sql new file mode 100644 index 00000000000..e5dad2e3037 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/Punycode.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +SELECT + value, + Url::PunycodeToHostName(value) AS hostname_utf, + Url::HostNameToPunycode(Url::PunycodeToHostName(value)) as punycode_hostname, + Url::ForcePunycodeToHostName(value) AS hostname_utf_forced, + Url::ForceHostNameToPunycode(Url::ForcePunycodeToHostName(value)) as punycode_hostname_forced, + Url::CanBePunycodeHostName(value) as can_be_punycode +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/Tld.in b/yql/essentials/udfs/common/url_base/test/cases/Tld.in new file mode 100644 index 00000000000..d98c9c25223 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/Tld.in @@ -0,0 +1,4 @@ +{"key"="";"subkey"="";"value"="ru"}; +{"key"="";"subkey"="";"value"="123"}; +{"key"="";"subkey"="";"value"="yandex"}; +{"key"="";"subkey"="";"value"="sdfsdfsdf"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/Tld.sql b/yql/essentials/udfs/common/url_base/test/cases/Tld.sql new file mode 100644 index 00000000000..a26600cf639 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/Tld.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +SELECT + Url::GetTLD(value) AS tld, + Url::IsKnownTLD(value) AS known, + Url::IsWellKnownTLD(value) AS well_known +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/Url.in b/yql/essentials/udfs/common/url_base/test/cases/Url.in new file mode 100644 index 00000000000..94f998d48f6 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/Url.in @@ -0,0 +1,18 @@ +{"key"="";"subkey"="";"value"="http://"}; +{"key"="";"subkey"="";"value"="http://lenta.ru"}; +{"key"="";"subkey"="";"value"="http://someone.livejournal.com/blog"}; +{"key"="";"subkey"="";"value"="http://bbc.co.uk/"}; +{"key"="";"subkey"="";"value"="https://www.yandex.com.tr/search"}; +{"key"="";"subkey"="";"value"="https://www2.yandex.com.tr/search"}; +{"key"="";"subkey"="";"value"="lenta.ru"}; +{"key"="";"subkey"="";"value"="bbc.co.uk/news"}; +{"key"="";"subkey"="";"value"="yandex.com.tr/maps?foo="}; +{"key"="";"subkey"="";"value"="someone.livejournal.com?foo=bar#top"}; +{"key"="";"subkey"="";"value"="a.b.c.d.e.f.g.h.i.j.k#l.m.n"}; +{"key"="";"subkey"="";"value"="foo.tl.md/+%D1%8E%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4"}; +{"key"="";"subkey"="";"value"="xn--d1acpjx3f.xn--p1ai"}; +{"key"="";"subkey"="";"value"="https://ya.ru:80/search/?text=test&lr=213#top"}; +{"key"="";"subkey"="";"value"="https://ya.ru/search/?text=%2B"}; +{"key"="";"subkey"="";"value"="goal://market.yandex.ru/product-page_scroll-box_product_visible"}; +{"key"="";"subkey"="";"value"="Http://ya.ru"}; +{"key"="";"subkey"="";"value"="ftp://someone.livejournal.com:80/blog"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/Url.sql b/yql/essentials/udfs/common/url_base/test/cases/Url.sql new file mode 100644 index 00000000000..ed50d29ae26 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/Url.sql @@ -0,0 +1,29 @@ +/* syntax version 1 */ +SELECT + value, + Url::Parse(value) AS parse, + Url::GetSignificantDomain(value) AS significant_domain, + Url::GetSignificantDomain(value, AsList("tl")) AS tl_is_significant, + Url::GetTLD(value) AS tld, + Url::PunycodeToHostName(value) AS punycode, + Url::Encode(value) AS encode, + Url::Decode(value) AS decode, + Url::GetCGIParam(value, "foo") AS param, + Url::CutQueryStringAndFragment(value) AS cut_qs_and_fragment, + Url::CutWWW(Url::GetHost(value)) AS cut_www, + Url::CutWWW2(Url::GetHost(value)) AS cut_www2, + Url::GetHost(value) as host, + Url::GetHostPort(value) as host_port, + Url::GetSchemeHost(value) AS scheme_host, + Url::GetSchemeHostPort(value) AS scheme_host_port, + Url::CutScheme(value) AS cut_scheme, + Url::GetTail(value) AS tail, + Url::GetPath(value) AS path, + Url::GetFragment(value) AS fragment, + Url::GetPort(value) AS port, + Url::GetDomain(value, 0) as domain0, + Url::GetDomain(value, 1) as domain1, + Url::GetDomain(value, 3) as domain3, + Url::GetDomainLevel(value) as domain_level, + Url::Normalize(value) as norm +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in new file mode 100644 index 00000000000..d9f5d84527c --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.in @@ -0,0 +1,20 @@ +{"key"="";"subkey"="";"value"="glfilter=7893318%3A7949252&glfilter=5099461%3A12103637&glfilter=8292645%3A1&hid=90404&onstock=0&local-offers-first=0"}; +{"key"="";"subkey"="";"value"=""}; +{"key"="";"subkey"="";"value"="&"}; +{"key"="";"subkey"="";"value"="&&"}; +{"key"="";"subkey"="";"value"="="}; +{"key"="";"subkey"="";"value"="&="}; +{"key"="";"subkey"="";"value"="=&"}; +{"key"="";"subkey"="";"value"="&=="}; +{"key"="";"subkey"="";"value"="&==&"}; +{"key"="";"subkey"="";"value"="==&"}; +{"key"="";"subkey"="";"value"="=&="}; +{"key"="";"subkey"="";"value"="=a"}; +{"key"="";"subkey"="";"value"="a"}; +{"key"="";"subkey"="";"value"="a="}; +{"key"="";"subkey"="";"value"="&a=b"}; +{"key"="";"subkey"="";"value"="a=a+b&b=b+c"}; +{"key"="";"subkey"="";"value"="a=a+b=&b=b+=c"}; +{"key"="";"subkey"="";"value"="a=1&a=2"}; +{"key"="";"subkey"="";"value"=";a=b"}; +{"key"="";"subkey"="";"value"="a=a+b;b=b+c"}; diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql new file mode 100644 index 00000000000..4179fd0a0c8 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQuery.sql @@ -0,0 +1,15 @@ +SELECT + value, + Url::QueryStringToList(value, false AS Strict) AS base_list, + Url::BuildQueryString(Url::QueryStringToList(value, false AS Strict)) AS base_list_build, + Url::QueryStringToList(value, true AS KeepBlankValues, false AS Strict) AS keep_blank_list, + Url::BuildQueryString(Url::QueryStringToList(value, true AS KeepBlankValues, false AS Strict)) AS keep_blank_list_build, + Url::QueryStringToList(value, ";" AS Separator, false AS Strict) AS sep_semicol_list, + Url::BuildQueryString(Url::QueryStringToList(value, ";" AS Separator, false AS Strict), ";" AS Separator) AS sep_semicol_list_build, + Url::QueryStringToDict(value, false AS Strict) AS base_dict, + Url::BuildQueryString(Url::QueryStringToDict(value, false AS Strict)) AS base_dict_build, + Url::QueryStringToDict(value, true AS KeepBlankValues, false AS Strict) AS keep_blank_dict, + Url::BuildQueryString(Url::QueryStringToDict(value, true AS KeepBlankValues, false AS Strict)) AS keep_blank_dict_build, + Url::QueryStringToDict(value, ";" AS Separator, false AS Strict) AS sep_semicol_dict, + Url::BuildQueryString(Url::QueryStringToDict(value, ";" AS Separator, false AS Strict), ";" AS Separator) AS sep_semicol_dict_build, +FROM Input; diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg new file mode 100644 index 00000000000..5dae597903c --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.cfg @@ -0,0 +1 @@ +xfail diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql new file mode 100644 index 00000000000..5cb0cbfc9dc --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryMaxFieldsErr.sql @@ -0,0 +1,2 @@ +SELECT + Url::QueryStringToList("glfilter=78318%3A79492&glfilter=561%3A121037&hid=904", 2 AS MaxFields) diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg new file mode 100644 index 00000000000..5dae597903c --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.cfg @@ -0,0 +1 @@ +xfail diff --git a/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql new file mode 100644 index 00000000000..b036ed5451d --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/cases/UrlQueryStrictErr.sql @@ -0,0 +1,2 @@ +SELECT + Url::QueryStringToList("glfilter=78318%3A79492&mistaken&glfilter=1&hid=904") diff --git a/yql/essentials/udfs/common/url_base/test/ya.make b/yql/essentials/udfs/common/url_base/test/ya.make new file mode 100644 index 00000000000..b6abb70add3 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/test/ya.make @@ -0,0 +1,17 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/url_base) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +DATA( + sbr://451427803 # Robots.in +) + +END() diff --git a/yql/essentials/udfs/common/url_base/url_base.cpp b/yql/essentials/udfs/common/url_base/url_base.cpp new file mode 100644 index 00000000000..628abe7a301 --- /dev/null +++ b/yql/essentials/udfs/common/url_base/url_base.cpp @@ -0,0 +1,7 @@ +#include <yql/essentials/public/udf/udf_helpers.h> + +#include "lib/url_base_udf.h" + +SIMPLE_MODULE(TUrlModule, EXPORTED_URL_BASE_UDF) +REGISTER_MODULES(TUrlModule) + diff --git a/yql/essentials/udfs/common/url_base/ya.make b/yql/essentials/udfs/common/url_base/ya.make new file mode 100644 index 00000000000..ce981f1f2ac --- /dev/null +++ b/yql/essentials/udfs/common/url_base/ya.make @@ -0,0 +1,32 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319908145 OUT_NOAUTO liburl_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(url_udf) + + YQL_ABI_VERSION( + 2 + 37 + 0 + ) + + SRCS( + url_base.cpp + ) + + PEERDIR( + yql/essentials/public/udf + yql/essentials/udfs/common/url_base/lib + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) + + diff --git a/yql/essentials/udfs/common/ya.make b/yql/essentials/udfs/common/ya.make new file mode 100644 index 00000000000..29266857edf --- /dev/null +++ b/yql/essentials/udfs/common/ya.make @@ -0,0 +1,31 @@ +RECURSE( + compress_base + datetime2 + digest + file + histogram + hyperloglog + ip_base + json + json2 + math + pire + protobuf + python + re2 + set + stat + streaming + string + top + topfreq + unicode_base + url_base + yson2 +) + +IF (ARCH_X86_64) + RECURSE( + hyperscan + ) +ENDIF() diff --git a/yql/essentials/udfs/common/yson2/test/canondata/result.json b/yql/essentials/udfs/common/yson2/test/canondata/result.json new file mode 100644 index 00000000000..e8db385e454 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/result.json @@ -0,0 +1,172 @@ +{ + "test.test[AccessJson]": [ + { + "uri": "file://test.test_AccessJson_/results.txt" + } + ], + "test.test[Access]": [ + { + "uri": "file://test.test_Access_/results.txt" + } + ], + "test.test[Attrs]": [ + { + "uri": "file://test.test_Attrs_/results.txt" + } + ], + "test.test[AutoConvertTo]": [ + { + "uri": "file://test.test_AutoConvertTo_/results.txt" + } + ], + "test.test[Contains]": [ + { + "uri": "file://test.test_Contains_/results.txt" + } + ], + "test.test[ConvertTo]": [ + { + "uri": "file://test.test_ConvertTo_/results.txt" + } + ], + "test.test[Dicts]": [ + { + "uri": "file://test.test_Dicts_/results.txt" + } + ], + "test.test[EmptyDicts]": [ + { + "uri": "file://test.test_EmptyDicts_/results.txt" + } + ], + "test.test[EmptyLists]": [ + { + "uri": "file://test.test_EmptyLists_/results.txt" + } + ], + "test.test[Equals]": [ + { + "uri": "file://test.test_Equals_/results.txt" + } + ], + "test.test[From]": [ + { + "uri": "file://test.test_From_/results.txt" + } + ], + "test.test[GenericConvertToEmptyStruct]": [ + { + "uri": "file://test.test_GenericConvertToEmptyStruct_/results.txt" + } + ], + "test.test[GenericConvertToWithAutoConvert]": [ + { + "uri": "file://test.test_GenericConvertToWithAutoConvert_/results.txt" + } + ], + "test.test[GenericConvertToWithNoStrict]": [ + { + "uri": "file://test.test_GenericConvertToWithNoStrict_/results.txt" + } + ], + "test.test[GenericConvertTo]": [ + { + "uri": "file://test.test_GenericConvertTo_/results.txt" + } + ], + "test.test[GenericFrom]": [ + { + "uri": "file://test.test_GenericFrom_/results.txt" + } + ], + "test.test[GetHash]": [ + { + "uri": "file://test.test_GetHash_/results.txt" + } + ], + "test.test[Get]": [ + { + "uri": "file://test.test_Get_/results.txt" + } + ], + "test.test[GoodForYsonBadForJson]": [ + { + "uri": "file://test.test_GoodForYsonBadForJson_/results.txt" + } + ], + "test.test[ImplicitFromRes]": [ + { + "uri": "file://test.test_ImplicitFromRes_/results.txt" + } + ], + "test.test[IsType]": [ + { + "uri": "file://test.test_IsType_/results.txt" + } + ], + "test.test[JsonSerializeSkipMapEntity]": [ + { + "uri": "file://test.test_JsonSerializeSkipMapEntity_/results.txt" + } + ], + "test.test[JsonWithUtf8]": [ + { + "uri": "file://test.test_JsonWithUtf8_/results.txt" + } + ], + "test.test[JsonWithNanAsString]": [ + { + "uri": "file://test.test_JsonWithNanAsString_/results.txt" + } + ], + "test.test[Lists]": [ + { + "uri": "file://test.test_Lists_/results.txt" + } + ], + "test.test[Lookup]": [ + { + "uri": "file://test.test_Lookup_/results.txt" + } + ], + "test.test[NegativeArrayIndex]": [ + { + "uri": "file://test.test_NegativeArrayIndex_/results.txt" + } + ], + "test.test[ParseString]": [ + { + "uri": "file://test.test_ParseString_/results.txt" + } + ], + "test.test[Scalars]": [ + { + "uri": "file://test.test_Scalars_/results.txt" + } + ], + "test.test[SerializeDouble]": [ + { + "uri": "file://test.test_SerializeDouble_/results.txt" + } + ], + "test.test[Serialize]": [ + { + "uri": "file://test.test_Serialize_/results.txt" + } + ], + "test.test[WeakYsonRest]": [ + { + "uri": "file://test.test_WeakYsonRest_/results.txt" + } + ], + "test.test[WithAttrs]": [ + { + "uri": "file://test.test_WithAttrs_/results.txt" + } + ], + "test.test[YPath]": [ + { + "uri": "file://test.test_YPath_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt new file mode 100644 index 00000000000..b3f28755183 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AccessJson_/results.txt @@ -0,0 +1,126 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt new file mode 100644 index 00000000000..c9b9064518f --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Access_/results.txt @@ -0,0 +1,33 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "text"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "hello world" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt new file mode 100644 index 00000000000..72c297fab48 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Attrs_/results.txt @@ -0,0 +1,568 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column12"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column13"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column14"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column15"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column16"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column17"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column18"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column19"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column20"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column21"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column22"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column23"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column24"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column25"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column26"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column27"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column28"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column29"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column30"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column31"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column32"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column33"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column34"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column35"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ]; + [ + "column36"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column37"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column38"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ]; + [ + "column39"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column40"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ]; + [ + "column41"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column42"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + [ + %true + ]; + #; + #; + #; + #; + #; + #; + #; + [ + "1" + ]; + [ + "2" + ]; + #; + #; + #; + #; + #; + [ + "1" + ]; + [ + "2" + ]; + #; + #; + #; + #; + #; + [ + "1" + ]; + [ + "2" + ]; + [ + "3" + ]; + #; + #; + #; + #; + #; + #; + #; + [ + "foo" + ]; + [ + "very loooooooooooooooooong string" + ]; + [ + [ + "1" + ]; + [ + "2" + ]; + [ + "3" + ] + ]; + [ + "b"; + "c" + ]; + [ + "a"; + "b" + ]; + [ + [ + "1" + ]; + [ + "2" + ] + ]; + [ + "a"; + "b" + ]; + [ + [ + "1" + ]; + [ + "2" + ] + ]; + [ + "a"; + "b" + ]; + [ + [ + "1" + ]; + [ + "2" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt new file mode 100644 index 00000000000..034a71cae2c --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_AutoConvertTo_/results.txt @@ -0,0 +1,556 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "zero_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bool_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "int_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "uint_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "negative_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "double_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "string_to_bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "zero_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "bool_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "int_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "uint_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "negative_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "double_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "string_to_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "zero_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "bool_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "int_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "uint_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "negative_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "double_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "string_to_uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "zero_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "bool_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "int_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "uint_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "negative_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "double_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "string_to_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "zero_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "bool_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "int_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "uint_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "negative_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "double_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "string_to_string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "struct_stub"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "x"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "y"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "z"; + [ + "ListType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ]; + [ + "tuple_stub"; + [ + "OptionalType"; + [ + "TupleType"; + [ + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ]; + [ + "DataType"; + "Int64" + ]; + [ + "ListType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %false + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + %true + ]; + [ + "0" + ]; + [ + "1" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "-123" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "0" + ]; + [ + "1" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "18446744073709551493" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "0" + ]; + [ + "1" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "-123" + ]; + [ + "123.456" + ]; + [ + "123" + ]; + [ + "0" + ]; + [ + "true" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "-123" + ]; + [ + "123.456" + ]; + [ + "123" + ]; + [ + [ + #; + "0"; + [] + ] + ]; + [ + [ + #; + "0"; + [] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt new file mode 100644 index 00000000000..8d72c6dad2c --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Contains_/results.txt @@ -0,0 +1,122 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ]; + [ + %false + ]; + [ + %false + ]; + [ + %true + ]; + [ + %false + ]; + [ + %true + ]; + [ + %false + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt new file mode 100644 index 00000000000..4061e6a4f8c --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ConvertTo_/results.txt @@ -0,0 +1,245 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "incorrect"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "number_list"; + [ + "ListType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "string_list"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "yson_list"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ]; + [ + "incorrect_list"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "number_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "string_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "yson_dict"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "incorrect_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "123" + ]; + [ + "123" + ]; + #; + [ + "1"; + "2"; + "3" + ]; + [ + "a"; + "b"; + "c" + ]; + [ + [ + "123" + ]; + [ + "{\"a\":1,\"b\":2,\"c\":3}" + ]; + [ + "{\"a\":4,\"b\":5,\"c\":6}" + ] + ]; + []; + [ + [ + "a"; + "1" + ]; + [ + "b"; + "2" + ]; + [ + "c"; + "3" + ] + ]; + [ + [ + "a"; + "aaa" + ]; + [ + "b"; + "bbb" + ]; + [ + "c"; + "ccc" + ] + ]; + [ + "[\"ccc\",\"ddd\"]" + ]; + [] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt new file mode 100644 index 00000000000..e4b9cb54357 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Dicts_/results.txt @@ -0,0 +1,178 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ]; + [ + "column4"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column5"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column9"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column10"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + %false; + "2"; + [ + "a"; + "b" + ]; + [ + [ + "1" + ]; + # + ]; + [ + [ + "a"; + [ + "1" + ] + ]; + [ + "b"; + # + ] + ]; + %true; + %false; + [ + "1" + ]; + #; + [ + "c"; + "d" + ]; + [ + [ + "3" + ]; + [ + "4" + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt new file mode 100644 index 00000000000..34e7c5f7833 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyDicts_/results.txt @@ -0,0 +1,103 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column3"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ]; + [ + "column4"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + [ + "column5"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + %false; + "0"; + []; + []; + []; + %false; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt new file mode 100644 index 00000000000..7a8b8c82e5c --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_EmptyLists_/results.txt @@ -0,0 +1,50 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + %false; + "0"; + [] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt new file mode 100644 index 00000000000..736f668ab43 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Equals_/results.txt @@ -0,0 +1,124 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "a1"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "a2"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "a3"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "b"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "c"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "d"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "e"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "f"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "g"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "h"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "i"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "attrs1"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "attrs2"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %true; + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt new file mode 100644 index 00000000000..d1e62592cc2 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_From_/results.txt @@ -0,0 +1,188 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column2"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column3"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column4"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column5"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column6"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column7"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column8"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column9"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column10"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column11"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column12"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column13"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column14"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column15"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %true; + %false; + { + "$type" = "boolean"; + "$value" = "true" + }; + #; + { + "$type" = "int64"; + "$value" = "1" + }; + #; + { + "$type" = "uint64"; + "$value" = "2" + }; + #; + { + "$type" = "double"; + "$value" = "3" + }; + #; + { + "$type" = "string"; + "$value" = "foo" + }; + { + "$type" = "string"; + "$value" = "fooooooooooooooooooooooooooooooooo" + }; + #; + [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "int64"; + "$value" = "2" + }; + { + "$type" = "int64"; + "$value" = "3" + } + ]; + { + "a" = { + "$type" = "string"; + "$value" = "x" + }; + "b" = { + "$type" = "string"; + "$value" = "y" + } + } + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt new file mode 100644 index 00000000000..1029a098e13 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToEmptyStruct_/results.txt @@ -0,0 +1,34 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "list_of_empty_structs"; + [ + "ListType"; + [ + "StructType"; + [] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + []; + [] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt new file mode 100644 index 00000000000..875f9a5aae1 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithAutoConvert_/results.txt @@ -0,0 +1,338 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "bool"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "int"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "uint"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "optional_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "empty_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "string"; + [ + "DataType"; + "String" + ] + ]; + [ + "utf8"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "int_list"; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "nested_list"; + [ + "ListType"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ]; + [ + "int_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "tuple"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Int8" + ]; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ]; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "DataType"; + "Yson" + ]; + [ + "DataType"; + "Json" + ] + ] + ] + ]; + [ + "struct"; + [ + "StructType"; + [ + [ + "a"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "b"; + [ + "DataType"; + "Int8" + ] + ]; + [ + "c"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "d"; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "e"; + [ + "TupleType"; + [ + [ + "DataType"; + "Double" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ]; + [ + "x"; + [ + "TupleType"; + [ + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ]; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ]; + [ + "y"; + [ + "OptionalType"; + [ + "DataType"; + "Int16" + ] + ] + ]; + [ + "z"; + [ + "ListType"; + [ + "DataType"; + "Int8" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + "0"; + "123"; + [ + "1.23" + ]; + [ + "0" + ]; + "1.23"; + "0"; + [ + "1"; + "2"; + "3"; + "7"; + "8"; + "0" + ]; + [ + [ + [ + "1" + ]; + [ + "2" + ] + ]; + [ + [ + "3" + ]; + # + ]; + []; + [] + ]; + [ + [ + "bar"; + "2" + ]; + [ + "foo"; + "1" + ]; + [ + "xxx"; + "0" + ] + ]; + [ + %false; + "1"; + [ + "foo" + ]; + [ + "1"; + "2" + ]; + #; + "null" + ]; + [ + %false; + "1"; + [ + "foo" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "bar" + ]; + [ + #; + # + ]; + #; + [] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt new file mode 100644 index 00000000000..5b6f73d6096 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertToWithNoStrict_/results.txt @@ -0,0 +1,287 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "bool"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "int"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "uint"; + [ + "OptionalType"; + [ + "DataType"; + "Uint8" + ] + ] + ]; + [ + "optional_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "empty_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "string"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "utf8"; + [ + "OptionalType"; + [ + "DataType"; + "Utf8" + ] + ] + ]; + [ + "int_list"; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "nested_list"; + [ + "ListType"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ]; + [ + "int_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "tuple"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Int8" + ]; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ]; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ]; + [ + "struct"; + [ + "StructType"; + [ + [ + "a"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "b"; + [ + "DataType"; + "Int8" + ] + ]; + [ + "c"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "d"; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "e"; + [ + "TupleType"; + [ + [ + "DataType"; + "Double" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ]; + [ + "y"; + [ + "OptionalType"; + [ + "DataType"; + "Int16" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + #; + #; + #; + #; + #; + #; + [ + "1"; + "2"; + "3" + ]; + [ + [ + [ + "1" + ]; + [ + "2" + ] + ]; + [ + [ + "3" + ]; + # + ] + ]; + [ + [ + "foo"; + "1" + ] + ]; + [ + %false; + "1"; + #; + [ + "1"; + "2" + ] + ]; + [ + %false; + "1"; + [ + "foo" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "bar" + ]; + # + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt new file mode 100644 index 00000000000..c1e579f2581 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericConvertTo_/results.txt @@ -0,0 +1,379 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "bool"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "int"; + [ + "DataType"; + "Int64" + ] + ]; + [ + "uint"; + [ + "DataType"; + "Uint8" + ] + ]; + [ + "optional_double"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "empty_int"; + [ + "OptionalType"; + [ + "DataType"; + "Int32" + ] + ] + ]; + [ + "string"; + [ + "DataType"; + "String" + ] + ]; + [ + "utf8"; + [ + "DataType"; + "Utf8" + ] + ]; + [ + "yson"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "json"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "int_list"; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "nested_list"; + [ + "ListType"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ]; + [ + "int_dict"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "tuple"; + [ + "TupleType"; + [ + [ + "DataType"; + "Bool" + ]; + [ + "DataType"; + "Int8" + ]; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ]; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ]; + [ + "struct"; + [ + "StructType"; + [ + [ + "a"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "b"; + [ + "DataType"; + "Int8" + ] + ]; + [ + "c"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "d"; + [ + "ListType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "e"; + [ + "TupleType"; + [ + [ + "DataType"; + "Double" + ]; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ] + ]; + [ + "resource"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "list_of_floats"; + [ + "ListType"; + [ + "DataType"; + "Float" + ] + ] + ]; + [ + "bad_member"; + [ + "OptionalType"; + [ + "StructType"; + [ + [ + "a"; + [ + "StructType"; + [ + [ + "b"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + [ + "bad_element"; + [ + "OptionalType"; + [ + "TupleType"; + [ + [ + "DataType"; + "Int64" + ]; + [ + "TupleType"; + [ + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + "123"; + "123"; + [ + "1.23" + ]; + #; + "123"; + "\xD0\xBF\xD1\x80\xD0\xB8\xD1\x91\xD0\xBC"; + [ + { + "a" = { + "$type" = "string"; + "$value" = "b" + }; + "c" = # + } + ]; + [ + "[{\"a\":1},{\"a\":2},{\"a\":3}]" + ]; + [ + "1"; + "2"; + "3" + ]; + [ + [ + [ + "1" + ]; + [ + "2" + ] + ]; + [ + [ + "3" + ]; + # + ] + ]; + [ + [ + "bar"; + "2" + ]; + [ + "foo"; + "1" + ] + ]; + [ + %false; + "1"; + [ + "foo" + ]; + [ + "1"; + "2" + ] + ]; + [ + %false; + "1"; + [ + "foo" + ]; + [ + "1"; + "2" + ]; + [ + "1"; + "bar" + ] + ]; + { + "$type" = "string"; + "$value" = "foo" + }; + [ + "-3.1416"; + "42"; + "0.003" + ]; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt new file mode 100644 index 00000000000..7281b38aef0 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GenericFrom_/results.txt @@ -0,0 +1,345 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "null"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "empty_list"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "empty_dict"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "bool"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "int"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "uint"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "optional_double"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "empty_int"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "string"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "int_list"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "nested_list"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "int_dict"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "tuple"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "struct"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "utf8"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "yson"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "json"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "resource_list"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "variants"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "double_optional"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + []; + {}; + { + "$type" = "boolean"; + "$value" = "true" + }; + { + "$type" = "int64"; + "$value" = "123" + }; + { + "$type" = "uint64"; + "$value" = "123" + }; + { + "$type" = "double"; + "$value" = "1.23" + }; + #; + { + "$type" = "string"; + "$value" = "123" + }; + [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "int64"; + "$value" = "2" + }; + { + "$type" = "int64"; + "$value" = "3" + } + ]; + [ + [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "int64"; + "$value" = "2" + } + ]; + [ + { + "$type" = "int64"; + "$value" = "3" + }; + # + ] + ]; + { + "bar" = { + "$type" = "int64"; + "$value" = "2" + }; + "foo" = { + "$type" = "int64"; + "$value" = "1" + } + }; + [ + { + "$type" = "boolean"; + "$value" = "false" + }; + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "string"; + "$value" = "foo" + }; + [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "int64"; + "$value" = "2" + } + ] + ]; + { + "a" = { + "$type" = "boolean"; + "$value" = "false" + }; + "b" = { + "$type" = "int64"; + "$value" = "1" + }; + "c" = { + "$type" = "string"; + "$value" = "foo" + }; + "d" = [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "int64"; + "$value" = "2" + } + ]; + "e" = [ + { + "$type" = "double"; + "$value" = "1" + }; + { + "$type" = "string"; + "$value" = "bar" + } + ] + }; + { + "$type" = "string"; + "$value" = "\xC3\x90\xC2\xBF\xC3\x91\xC2\x80\xC3\x90\xC2\xBE\xC3\x90\xC2\xB2\xC3\x90\xC2\xB5\xC3\x91\xC2\x80\xC3\x90\xC2\xBA\xC3\x90\xC2\xB0 \xC3\x91\xC2\x81\xC3\x90\xC2\xB2\xC3\x91\xC2\x8F\xC3\x90\xC2\xB7\xC3\x90\xC2\xB8" + }; + { + "a" = { + "$type" = "int64"; + "$value" = "1" + }; + "b" = # + }; + { + "a" = { + "$type" = "string"; + "$value" = "foo" + }; + "b" = [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "int64"; + "$value" = "2" + }; + { + "$type" = "int64"; + "$value" = "3" + }; + { + "$type" = "int64"; + "$value" = "4" + }; + { + "$type" = "int64"; + "$value" = "5" + } + ] + }; + [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "string"; + "$value" = "foo" + } + ]; + [ + { + "$type" = "int64"; + "$value" = "1" + }; + { + "$type" = "string"; + "$value" = "2" + } + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt new file mode 100644 index 00000000000..273c8cc1254 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GetHash_/results.txt @@ -0,0 +1,116 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "a1"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "a2"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "a3"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "b"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "c"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "d"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "e"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "f"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "g"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "h"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "i"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "j"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ]; + "Data" = [ + [ + "7079824331463246373"; + "7079824331463246373"; + "7079824331463246373"; + "9619972962658888907"; + "7079824331463246372"; + "16786623923823870811"; + "5024551639089484741"; + "18074785969708127853"; + "12660212615513087259"; + "6712964724129011716"; + "6712964724129011716"; + "16371845032759913096" + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt new file mode 100644 index 00000000000..45886142d03 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Get_/results.txt @@ -0,0 +1,57 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "list_length"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "dict_length"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "scalar_length"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "3" + ]; + [ + "1" + ]; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt new file mode 100644 index 00000000000..e40c1593de7 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_GoodForYsonBadForJson_/results.txt @@ -0,0 +1,82 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ]; + "Data" = [ + [ + { + "$type" = "double"; + "$value" = "inf" + }; + { + "$type" = "string"; + "$value" = "\"12345\xC3\2667\"" + } + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt new file mode 100644 index 00000000000..e3310fc1181 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ImplicitFromRes_/results.txt @@ -0,0 +1,41 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + []; + [ + [] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt new file mode 100644 index 00000000000..33e0a72bb98 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_IsType_/results.txt @@ -0,0 +1,154 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "is_string"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_int64"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_uint64"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_double"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_entity"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_bool"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_list"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "is_dict"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ]; + "Data" = [ + [ + %true; + %false; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + %false; + %true; + %false; + %false; + %false; + %false; + %false; + %false + ]; + [ + %false; + %false; + %true; + %false; + %false; + %false; + %false; + %false + ]; + [ + %false; + %false; + %false; + %true; + %false; + %false; + %false; + %false + ]; + [ + %false; + %false; + %false; + %false; + %true; + %false; + %false; + %false + ]; + [ + %false; + %false; + %false; + %false; + %false; + %true; + %false; + %false + ]; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %true; + %false + ]; + [ + %false; + %false; + %false; + %false; + %false; + %false; + %false; + %true + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt new file mode 100644 index 00000000000..00bda17c89f --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonSerializeSkipMapEntity_/results.txt @@ -0,0 +1,124 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "res1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res3"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res4"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res5"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res6"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res7"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "res8"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "{\"a\":123}" + ]; + [ + "{}" + ]; + [ + "{\"a\":123}" + ]; + [ + "[123,null]" + ]; + [ + "{\"a\":1,\"c\":1}" + ]; + [ + "{\"a\":{\"$attributes\":{\"c\":1,\"e\":3},\"$value\":23},\"b\":1}" + ]; + [ + "{\"b\":1}" + ]; + [ + "{\"$attributes\":{},\"$value\":23}" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt new file mode 100644 index 00000000000..ad19dad97df --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\"nan\"" + ]; + [ + "\"inf\"" + ]; + [ + "\"-inf\"" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt new file mode 100644 index 00000000000..6b113045b03 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_JsonWithUtf8_/results.txt @@ -0,0 +1,67 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\"\xD0\xA5\xD1\x8D\xD0\xBB\xD0\xBB\xD0\xBE\xD1\x83!\"" + ]; + [ + "\"\xC3\x90\xC2\xA5\xC3\x91\xC2\x8D\xC3\x90\xC2\xBB\xC3\x90\xC2\xBB\xC3\x90\xC2\xBE\xC3\x91\xC2\x83!\"" + ]; + %true; + [ + "\"\xC3\x90\xC2\xA5\xC3\x91\xC2\x8D\xC3\x90\xC2\xBB\xC3\x90\xC2\xBB\xC3\x90\xC2\xBE\xC3\x91\xC2\x83!\"" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt new file mode 100644 index 00000000000..dde36e18651 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lists_/results.txt @@ -0,0 +1,142 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Bool" + ] + ]; + [ + "column1"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ]; + [ + "column3"; + [ + "ListType"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + %false; + "3"; + [ + [ + "1" + ]; + [ + "2" + ]; + # + ]; + [ + []; + []; + [ + [ + "3" + ]; + [ + "4" + ] + ] + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + #; + [ + "456" + ] + ]; + [ + [ + "123" + ]; + # + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt new file mode 100644 index 00000000000..2938fff72b3 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Lookup_/results.txt @@ -0,0 +1,225 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column10"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column12"; + [ + "DataType"; + "Uint64" + ] + ]; + [ + "column13"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column14"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column15"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column16"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ]; + [ + %true + ]; + [ + "1" + ]; + [ + "1" + ]; + [ + "2" + ]; + [ + "2" + ]; + [ + "3" + ]; + [ + "3" + ]; + [ + "x" + ]; + [ + "x" + ]; + "2"; + [ + "2" + ]; + "1"; + [ + "1" + ]; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt new file mode 100644 index 00000000000..86f5896b243 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_NegativeArrayIndex_/results.txt @@ -0,0 +1,133 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "2" + ]; + [ + "7" + ]; + [ + "3" + ]; + [ + "6" + ]; + [ + "7" + ]; + [ + "1" + ]; + #; + #; + [ + "1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt new file mode 100644 index 00000000000..f599aeaeaa4 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_ParseString_/results.txt @@ -0,0 +1,128 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + { + "$type" = "uint64"; + "$value" = "0" + } + ]; + [ + { + "$type" = "uint64"; + "$value" = "1" + } + ]; + [ + { + "$type" = "int64"; + "$value" = "2" + } + ]; + [ + { + "$type" = "int64"; + "$value" = "3" + } + ]; + #; + #; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt new file mode 100644 index 00000000000..32556166cba --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Scalars_/results.txt @@ -0,0 +1,462 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column4"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column5"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column6"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column7"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column8"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "column9"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column10"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column11"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column12"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column13"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column14"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column15"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "column16"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column17"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column18"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column19"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column20"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column21"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column22"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column23"; + [ + "OptionalType"; + [ + "DataType"; + "Uint64" + ] + ] + ]; + [ + "column24"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column25"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column26"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column27"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column28"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column29"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column30"; + [ + "OptionalType"; + [ + "DataType"; + "Double" + ] + ] + ]; + [ + "column31"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column32"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column33"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column34"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column35"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column36"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column37"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + #; + [ + %true + ]; + [ + %true + ]; + [ + %false + ]; + #; + #; + #; + #; + #; + #; + #; + [ + "1" + ]; + [ + "2" + ]; + #; + #; + #; + #; + #; + #; + [ + "1" + ]; + [ + "2" + ]; + #; + #; + #; + #; + #; + [ + "1" + ]; + [ + "2" + ]; + [ + "3" + ]; + #; + #; + #; + #; + #; + #; + #; + [ + "foo" + ]; + [ + "very loooooooooooooooooong string" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt new file mode 100644 index 00000000000..1891e2e7ab4 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_SerializeDouble_/results.txt @@ -0,0 +1,66 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Yson" + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + { + "double1" = { + "$type" = "double"; + "$value" = "1.0000000001" + }; + "double2" = { + "$type" = "double"; + "$value" = "1.000000001" + }; + "double3" = { + "$type" = "double"; + "$value" = "1000000000.5" + }; + "double4" = { + "$type" = "double"; + "$value" = "10000000005" + }; + "double5" = { + "$type" = "double"; + "$value" = "10000000000.5" + }; + "double6" = { + "$type" = "double"; + "$value" = "100000000005" + } + }; + [ + "{\"double1\":1.0000000001,\"double2\":1.000000001,\"double3\":1000000000.5,\"double4\":10000000005,\"double5\":10000000000.5,\"double6\":100000000005}" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt new file mode 100644 index 00000000000..a9f067d9a0c --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_Serialize_/results.txt @@ -0,0 +1,70 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ]; + "Data" = [ + [ + { + "$attributes" = { + "a" = { + "$type" = "int64"; + "$value" = "1" + } + }; + "$value" = [ + #; + { + "a" = { + "$type" = "int64"; + "$value" = "1" + } + }; + { + "b" = { + "$type" = "uint64"; + "$value" = "2" + }; + "c" = [] + }; + { + "$attributes" = { + "q" = { + "$type" = "string"; + "$value" = "foo" + } + }; + "$type" = "double"; + "$value" = "3" + }; + {}; + { + "$type" = "string"; + "$value" = "foo" + }; + { + "$type" = "string"; + "$value" = "very loooooooooooooooooong string" + } + ] + } + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt new file mode 100644 index 00000000000..46991df7fbe --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WeakYsonRest_/results.txt @@ -0,0 +1,53 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "animal"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "wombat" + ] + ]; + [ + [ + "dog" + ] + ]; + [ + [ + "chipmunk" + ] + ]; + [ + [ + "hamster" + ] + ]; + [ + [ + "dingo" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt new file mode 100644 index 00000000000..1149280a84b --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_WithAttrs_/results.txt @@ -0,0 +1,91 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "column3"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + { + "$attributes" = { + "a" = { + "$type" = "int64"; + "$value" = "2" + } + }; + "$type" = "int64"; + "$value" = "1" + } + ]; + [ + { + "$type" = "int64"; + "$value" = "1" + } + ]; + #; + [ + { + "$attributes" = { + "b" = { + "$type" = "int64"; + "$value" = "3" + } + }; + "$type" = "int64"; + "$value" = "1" + } + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt b/yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt new file mode 100644 index 00000000000..b1f2759eda8 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/canondata/test.test_YPath_/results.txt @@ -0,0 +1,112 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "data"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "attrs"; + [ + "DictType"; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "miss"; + [ + "OptionalType"; + [ + "DataType"; + "Yson" + ] + ] + ]; + [ + "num"; + [ + "OptionalType"; + [ + "DataType"; + "Int64" + ] + ] + ]; + [ + "str_attr"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "miss_attr"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ]; + [ + "bad_conv"; + [ + "OptionalType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "123" + ]; + [ + [ + "x"; + "y" + ] + ]; + #; + [ + "123" + ]; + [ + "y" + ]; + #; + # + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/cases/Access.sql b/yql/essentials/udfs/common/yson2/test/cases/Access.sql new file mode 100644 index 00000000000..13ae815e27b --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Access.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +$yson = cast('{"commands"=[{"command"="say";"text"="hello world"}]}' as yson); +SELECT Yson::ConvertToString($yson["command" || "s"].0["text"]) as text; + diff --git a/yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql b/yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql new file mode 100644 index 00000000000..504da2c7119 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/AccessJson.sql @@ -0,0 +1,12 @@ +PRAGMA yson.DisableStrict; +$yson = cast(@@{a="привет"}@@ as yson); +$yson_node = Yson::Parse($yson); + +select Yson::ConvertToString($yson.a); +select Yson::ConvertToString($yson_node.a); + +$json = cast(@@{"a":"привет"}@@ as json); +$json_node = Yson::ParseJson($json); + +select Yson::ConvertToString($json.a); +select Yson::ConvertToString($json_node.a); diff --git a/yql/essentials/udfs/common/yson2/test/cases/Attrs.sql b/yql/essentials/udfs/common/yson2/test/cases/Attrs.sql new file mode 100644 index 00000000000..41709ce806f --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Attrs.sql @@ -0,0 +1,56 @@ +/* syntax version 1 */ + +$no_strict = Yson::Options(false AS Strict); + +select +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>#')), $no_strict), +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>%true'))), +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>1')), $no_strict), +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>2u')), $no_strict), +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>3.0')), $no_strict), +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>foo')), $no_strict), +Yson::ConvertToBool(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict), + +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>#')), $no_strict), +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>%true')), $no_strict), +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>1'))), +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>2u'))), +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>3.0')), $no_strict), +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>foo')), $no_strict), +Yson::ConvertToInt64(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict), + +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>#')), $no_strict), +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>%true')), $no_strict), +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>1'))), +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>2u'))), +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>3.0')), $no_strict), +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>foo')), $no_strict), +Yson::ConvertToUint64(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict), + +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>#')), $no_strict), +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>%true')), $no_strict), +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>1'))), +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>2u'))), +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>3.0'))), +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>foo')), $no_strict), +Yson::ConvertToDouble(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"')), $no_strict), + +Yson::ConvertToString(Yson::Parse(Yson('<a=1>#')), $no_strict), +Yson::ConvertToString(Yson::Parse(Yson('<a=1>%true')), $no_strict), +Yson::ConvertToString(Yson::Parse(Yson('<a=1>1')), $no_strict), +Yson::ConvertToString(Yson::Parse(Yson('<a=1>2u')), $no_strict), +Yson::ConvertToString(Yson::Parse(Yson('<a=1>3.0')), $no_strict), +Yson::ConvertToString(Yson::Parse(Yson('<a=1>foo'))), +Yson::ConvertToString(Yson::Parse(Yson('<a=1>"very loooooooooooooooooong string"'))), + +ListMap(Yson::ConvertToList(Yson::Parse(Yson('<a=1>[1;2;3]'))), Yson::ConvertToInt64), +DictKeys(Yson::ConvertToDict(Yson::Parse(Yson('<a=1>{b=1;c=2}')))), + +DictKeys(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>#')))), +ListMap(DictPayloads(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>#')))), Yson::ConvertToInt64), + +DictKeys(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>[]')))), +ListMap(DictPayloads(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>[]')))), Yson::ConvertToInt64), + +DictKeys(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>{}')))), +ListMap(DictPayloads(Yson::Attributes(Yson::Parse(Yson('<a=1;b=2>{}')))), Yson::ConvertToInt64); diff --git a/yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql b/yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql new file mode 100644 index 00000000000..0655fea2eca --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/AutoConvertTo.sql @@ -0,0 +1,53 @@ +$zero = Yson::Parse("0u"); +$bool = Yson::FromBool(true); +$int = Yson::Parse("123"); +$uint = Yson::Parse("123u"); +$negative = Yson::Parse("-123"); +$double = Yson::Parse("123.456"); +$string = Yson::Parse("\"123\""); + +$options = Yson::Options(true AS AutoConvert); + +SELECT + Yson::ConvertToBool($zero, $options) AS zero_to_bool, + Yson::ConvertToBool($bool, $options) AS bool_to_bool, + Yson::ConvertToBool($int, $options) AS int_to_bool, + Yson::ConvertToBool($uint, $options) AS uint_to_bool, + Yson::ConvertToBool($negative, $options) AS negative_to_bool, + Yson::ConvertToBool($double, $options) AS double_to_bool, + Yson::ConvertToBool($string, $options) AS string_to_bool, + + Yson::ConvertToInt64($zero, $options) AS zero_to_int, + Yson::ConvertToInt64($bool, $options) AS bool_to_int, + Yson::ConvertToInt64($int, $options) AS int_to_int, + Yson::ConvertToInt64($uint, $options) AS uint_to_int, + Yson::ConvertToInt64($negative, $options) AS negative_to_int, + Yson::ConvertToInt64($double, $options) AS double_to_int, + Yson::ConvertToInt64($string, $options) AS string_to_int, + + Yson::ConvertToUint64($zero, $options) AS zero_to_uint, + Yson::ConvertToUint64($bool, $options) AS bool_to_uint, + Yson::ConvertToUint64($int, $options) AS int_to_uint, + Yson::ConvertToUint64($uint, $options) AS uint_to_uint, + Yson::ConvertToUint64($negative, $options) AS negative_to_uint, + Yson::ConvertToUint64($double, $options) AS double_to_uint, + Yson::ConvertToUint64($string, $options) AS string_to_uint, + + Yson::ConvertToDouble($zero, $options) AS zero_to_double, + Yson::ConvertToDouble($bool, $options) AS bool_to_double, + Yson::ConvertToDouble($int, $options) AS int_to_double, + Yson::ConvertToDouble($uint, $options) AS uint_to_double, + Yson::ConvertToDouble($negative, $options) AS negative_to_double, + Yson::ConvertToDouble($double, $options) AS double_to_double, + Yson::ConvertToDouble($string, $options) AS string_to_double, + + Yson::ConvertToString($zero, $options) AS zero_to_string, + Yson::ConvertToString($bool, $options) AS bool_to_string, + Yson::ConvertToString($int, $options) AS int_to_string, + Yson::ConvertToString($uint, $options) AS uint_to_string, + Yson::ConvertToString($negative, $options) AS negative_to_string, + Yson::ConvertToString($double, $options) AS double_to_string, + Yson::ConvertToString($string, $options) AS string_to_string, + + Yson::ConvertTo($string, Struct<x:Double?, y:Int64, z:List<Bool>>, $options) AS struct_stub, + Yson::ConvertTo($double, Tuple<Double?, Int64, List<Bool>>, $options) AS tuple_stub; diff --git a/yql/essentials/udfs/common/yson2/test/cases/Contains.sql b/yql/essentials/udfs/common/yson2/test/cases/Contains.sql new file mode 100644 index 00000000000..565a999e5f3 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Contains.sql @@ -0,0 +1,11 @@ +/* syntax version 1 */ + +select +Yson::Contains(Yson::Parse('{a=1}'),'a'), +Yson::Contains(Yson::Parse('{a=1}'),'b'), +Yson::Contains(Yson::Parse('[]'),'0'), +Yson::Contains(Yson::Parse('[1;2]'),'0'), +Yson::Contains(Yson::Parse('[1;2]'),'2'), +Yson::Contains(Yson::Parse('[1;2]'),'-2'), +Yson::Contains(Yson::Parse('[1;2]'),'-3'), +Yson::Contains(Yson::Parse('2'),'2', Yson::Options(false AS Strict)); diff --git a/yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql b/yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql new file mode 100644 index 00000000000..b15c5249201 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/ConvertTo.sql @@ -0,0 +1,34 @@ +/* syntax version 1 */ +$bool = Yson::Parse("true"); +$number = Yson::Parse("123"); +$string = Yson::Parse("\"123\""); +$number_list = Yson::Parse("[1;2;3]"); +$string_list = Yson::Parse("[\"a\";\"b\";\"c\"]"); +$yson_list = Yson::Parse("[123;{a=1;b=2;c=3};{a=4;b=5;c=6}]"); +$number_dict = Yson::Parse("{a=1;b=2;c=3}"); +$string_dict = Yson::Parse("{a=\"aaa\";b=\"bbb\";c=\"ccc\"}"); +$yson_dict = Yson::Parse("{a=123;b=\"bbb\";c=[\"ccc\";\"ddd\"]}"); +$options = Yson::Options(true AS Strict); +$no_strict = Yson::Options(false AS Strict); + +SELECT + Yson::ConvertToBool($bool, $options) AS `bool`, + Yson::ConvertToInt64($number, $options) AS `int`, + Yson::ConvertToUint64($number, $options) AS `uint`, + Yson::ConvertToDouble($number, $options) AS `double`, + Yson::ConvertToString($string, $options) AS `string`, + Yson::ConvertToInt64($string, $no_strict) AS incorrect, + Yson::ConvertToUint64List($number_list) AS number_list, + Yson::ConvertToStringList($string_list) AS string_list, + ListMap( + Yson::ConvertToList($yson_list), + ($item) -> { return Yson::SerializeJson($item); } + ) AS yson_list, + Yson::ConvertToStringList($number_list, $no_strict) AS incorrect_list, + Yson::ConvertToInt64Dict($number_dict) AS number_dict, + Yson::ConvertToStringDict($string_dict) AS string_dict, + Yson::SerializeJson( + Yson::ConvertToDict($yson_dict)["c"] + ) AS yson_dict, + Yson::ConvertToBoolDict($number_dict, $no_strict) AS incorrect_dict; + diff --git a/yql/essentials/udfs/common/yson2/test/cases/Dicts.sql b/yql/essentials/udfs/common/yson2/test/cases/Dicts.sql new file mode 100644 index 00000000000..a0b9ce59685 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Dicts.sql @@ -0,0 +1,13 @@ +$x = Yson::Parse("{a=1;a=2;b={c=3;d=4}}"); +$no_strict = Yson::Options(false AS Strict); +select Yson::ConvertToDict($x) is null, + DictLength(Yson::ConvertToDict($x)), + DictKeys(Yson::ConvertToDict($x)), + ListMap(DictPayloads(Yson::ConvertToDict($x)), ($i)->(Yson::ConvertToInt64($i, $no_strict))), + ListMap(DictItems(Yson::ConvertToDict($x)),($p)->(($p.0,Yson::ConvertToInt64($p.1, $no_strict)))), + DictContains(Yson::ConvertToDict($x),"a"), + DictContains(Yson::ConvertToDict($x),"c"), + Yson::ConvertToInt64(DictLookup(Yson::ConvertToDict($x),"a")), + Yson::ConvertToInt64(DictLookup(Yson::ConvertToDict($x),"c")), + DictKeys(Yson::ConvertToDict(Yson::ConvertToDict($x)["b"])), + ListMap(DictPayloads(Yson::ConvertToDict(Yson::ConvertToDict($x)["b"])),($y)->(Yson::ConvertToInt64($y))) diff --git a/yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql b/yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql new file mode 100644 index 00000000000..779634413ae --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/EmptyDicts.sql @@ -0,0 +1,9 @@ +/* syntax version 1 */ +$x = Yson::Parse("{}"); +select Yson::ConvertToDict($x) is null, + DictLength(Yson::ConvertToDict($x)), + DictKeys(Yson::ConvertToDict($x)), + ListMap(DictPayloads(Yson::ConvertToDict($x)),($y)->(Yson::ConvertToInt64($y))), + ListMap(DictItems(Yson::ConvertToDict($x)),($p)->(($p.0,Yson::ConvertToInt64($p.1)))), + DictContains(Yson::ConvertToDict($x),"a"), + Yson::ConvertToInt64(DictLookup(Yson::ConvertToDict($x),"a")); diff --git a/yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql b/yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql new file mode 100644 index 00000000000..60c879a9481 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/EmptyLists.sql @@ -0,0 +1,5 @@ +/* syntax version 1 */ +$x = Yson::Parse("[]"); +select Yson::ConvertToList($x) is null, + ListLength(Yson::ConvertToList($x)), + ListMap(Yson::ConvertToList($x), ($y)->(Yson::ConvertToInt64($y))); diff --git a/yql/essentials/udfs/common/yson2/test/cases/Equals.sql b/yql/essentials/udfs/common/yson2/test/cases/Equals.sql new file mode 100644 index 00000000000..a64ea70b054 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Equals.sql @@ -0,0 +1,26 @@ +$a1 = Yson::Parse(Yson("{a=1;b=2}")); +$a2 = Yson::Parse(Yson("{a=1;b=2;}")); +$a3 = Yson::Parse(Yson("{b=2;a=1}")); +$b = Yson::Parse(Yson("#")); +$c = Yson::Parse(Yson("{a=1;b=3}")); +$d = Yson::Parse(Yson("{a=#}")); +$e = Yson::Parse(Yson("[a;1;b;2]")); +$f = Yson::Parse(Yson("{a=1u;b=2}")); +$g = Yson::Parse(Yson("{a=1;b=\"2\"}")); +$h = Yson::Parse(Yson("<foo=bar>{a=1;b=2}")); +$i = Yson::Parse(Yson("{a=1;b=<foo=bar>2}")); + +SELECT + Yson::Equals($a1, $a1) AS a1, + Yson::Equals($a1, $a2) AS a2, + Yson::Equals($a1, $a3) AS a3, + Yson::Equals($a1, $b) AS b, + Yson::Equals($a1, $c) AS c, + Yson::Equals($a1, $d) AS d, + Yson::Equals($a1, $e) AS e, + Yson::Equals($a1, $f) AS f, + Yson::Equals($a1, $g) AS g, + Yson::Equals($a1, $h) AS h, + Yson::Equals($a1, $i) AS i, + Yson::Equals($h, $i) AS attrs1, + Yson::Equals($i, $i) AS attrs2; diff --git a/yql/essentials/udfs/common/yson2/test/cases/From.sql b/yql/essentials/udfs/common/yson2/test/cases/From.sql new file mode 100644 index 00000000000..2a1f6ed15a3 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/From.sql @@ -0,0 +1,21 @@ +/* syntax version 1 */ +select +Yson::IsEntity(Yson::From(NULL)), +Yson::IsEntity(Yson::Parse(Yson("#"))), +Yson::IsEntity(Yson::Parse(Yson("1"))), + +Yson::SerializeText(Yson::FromBool(true)), +Yson::SerializeText(Yson::FromBool(Nothing(Bool?))), +Yson::SerializeText(Yson::FromInt64(1l)), +Yson::SerializeText(Yson::FromInt64(Nothing(Int64?))), +Yson::SerializeText(Yson::FromUint64(2ul)), +Yson::SerializeText(Yson::FromUint64(Nothing(Uint64?))), +Yson::SerializeText(Yson::FromDouble(3.)), +Yson::SerializeText(Yson::FromDouble(Nothing(Double?))), +Yson::SerializeText(Yson::FromString("foo")), +Yson::SerializeText(Yson::FromString("fooooooooooooooooooooooooooooooooo")), +Yson::SerializeText(Yson::FromString(Nothing(String?))), + +Yson::SerializeText(Yson::FromList(Yson::ConvertToList(Yson::Parse(Yson("[1;2;3]"))))), +Yson::SerializeText(Yson::FromDict(Yson::ConvertToDict(Yson::Parse(Yson("{a=x;b=y}"))))); + diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql new file mode 100644 index 00000000000..b23d524cff0 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertTo.sql @@ -0,0 +1,19 @@ +SELECT + Yson::ConvertTo(Yson::Parse(Yson("%true")), Bool) AS `bool`, + Yson::ConvertTo(Yson::Parse(Yson("123")), Int64) AS `int`, + Yson::ConvertTo(Yson::Parse(Yson("123u")), Uint8) AS `uint`, + Yson::ConvertTo(Yson::Parse(Yson("1.23")), Double?) AS optional_double, + Yson::ConvertTo(Yson::Parse(Yson("#")), Int32?) AS empty_int, + Yson::ConvertTo(Yson::Parse(Yson("\"123\"")), String) AS `string`, + Yson::ConvertTo(Yson::Parse(Yson("\"приём\"")), Utf8) AS `utf8`, + Yson::ConvertTo(Yson::Parse(Yson("{a=b;c=#}")), Yson?) AS `yson`, + Yson::ConvertTo(Yson::Parse(Yson("[{a=1};{a=2};{a=3}]")), Json?) AS `json`, + Yson::ConvertTo(Yson::Parse(Yson("[1;2;3]")), List<Int64>) AS int_list, + Yson::ConvertTo(Yson::Parse(Yson("[[1;2];[3;#]]")), List<List<Int64?>>) AS nested_list, + Yson::ConvertTo(Yson::Parse(Yson("{foo=1;bar=2}")), Dict<String,Int64>) AS int_dict, + Yson::ConvertTo(Yson::Parse(Yson("[%false;1;\"foo\";[1;2]]")), Tuple<Bool,Int8,String?,List<Int64>>) AS `tuple`, + Yson::ConvertTo(Yson::Parse(Yson("{a=%false;b=1;c=foo;d=[1;2];e=[1.0;bar]}")), Struct<a:Bool,b:Int8,c:String?,d:List<Int64>,e:Tuple<Double,String>>) AS `struct`, + Yson::Serialize(Yson::ConvertTo(Yson::Parse(Yson("foo")), Resource<'Yson2.Node'>)) AS `resource`, + Yson::ConvertTo(Yson::Parse(Yson("[-3.1416; 42.0; 0.003]")), List<Float>) AS list_of_floats, + Yson::ConvertTo(@@{a=[1]}@@y, Struct<a: Struct<b: Int64>>?, Yson::Options(false AS Strict)) as bad_member, + Yson::ConvertTo(@@[1;2]@@y, Tuple<Int64, Tuple<Int64>>?, Yson::Options(false AS Strict)) as bad_element; diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql new file mode 100644 index 00000000000..7210c1d47ae --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToEmptyStruct.sql @@ -0,0 +1,2 @@ +SELECT + Yson::ConvertTo(@@[{"year"="9999"; "a"="three"; "b"=3}; {"year"="9999"; "a"="four"; "b"=4}]@@y, List<Struct<>>) as list_of_empty_structs; diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql new file mode 100644 index 00000000000..6f1e0a9ff4e --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithAutoConvert.sql @@ -0,0 +1,15 @@ +$ac = Yson::Options(true AS AutoConvert); + +SELECT + Yson::ConvertTo(Yson::Parse(Yson("yes")), Bool, $ac) AS `bool`, + Yson::ConvertTo(Yson::Parse(Yson("no")), Int64, $ac) AS `int`, + Yson::ConvertTo(Yson::Parse(Yson("123.7")), Uint8, $ac) AS `uint`, + Yson::ConvertTo(Yson::Parse(Yson(@@"1.23"@@)), Double?, $ac) AS optional_double, + Yson::ConvertTo(Yson::Parse(Yson("many")), Int32?, $ac) AS empty_int, + Yson::ConvertTo(Yson::Parse(Yson("1.23")), String, $ac) AS `string`, + Yson::ConvertTo(Yson::Parse(Yson("0u")), Utf8, $ac) AS `utf8`, + Yson::ConvertTo(Yson::Parse(Yson(@@[1;2;3;7.7;"8";"9.0"]@@)), List<Int64>, $ac) AS int_list, + Yson::ConvertTo(Yson::Parse(Yson("[[1;2];[3;#];5;#]")), List<List<Int64?>>, $ac) AS nested_list, + Yson::ConvertTo(Yson::Parse(Yson("{foo=1;bar=2.0;xxx=#}")), Dict<String,Int64>, $ac) AS int_dict, + Yson::ConvertTo(Yson::Parse(Yson("[%false;1;\"foo\";[1;2]]")), Tuple<Bool,Int8,String?,List<Int64>,Yson,Json>, $ac) AS `tuple`, + Yson::ConvertTo(Yson::Parse(Yson("{a=%false;b=1;c=foo;d=[1;2];e=[1.0;bar]}")), Struct<a:Bool,b:Int8,c:String?,d:List<Int64>,e:Tuple<Double,String>,x:Tuple<Double?,String?>,y:Int16?,z:List<Int8>>, $ac) AS `struct`; diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql new file mode 100644 index 00000000000..f87b30c9973 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GenericConvertToWithNoStrict.sql @@ -0,0 +1,15 @@ +$ns = Yson::Options(false AS Strict); + +SELECT + Yson::ConvertTo(Yson::Parse(Yson("yes")), Bool?, $ns) AS `bool`, + Yson::ConvertTo(Yson::Parse(Yson("no")), Int64?, $ns) AS `int`, + Yson::ConvertTo(Yson::Parse(Yson("123.7")), Uint8?, $ns) AS `uint`, + Yson::ConvertTo(Yson::Parse(Yson(@@"1.23"@@)), Double?, $ns) AS optional_double, + Yson::ConvertTo(Yson::Parse(Yson("many")), Int32?, $ns) AS empty_int, + Yson::ConvertTo(Yson::Parse(Yson("1.23")), String?, $ns) AS `string`, + Yson::ConvertTo(Yson::Parse(Yson("0u")), Utf8?, $ns) AS `utf8`, + Yson::ConvertTo(Yson::Parse(Yson(@@[1;2;3;7.7;"8";"9.0"]@@)), List<Int64>, $ns) AS int_list, + Yson::ConvertTo(Yson::Parse(Yson("[[1;2];[3;#];5;#]")), List<List<Int64?>>, $ns) AS nested_list, + Yson::ConvertTo(Yson::Parse(Yson("{foo=1;bar=2.0;xxx=#}")), Dict<String,Int64>, $ns) AS int_dict, + Yson::ConvertTo(Yson::Parse(Yson("[%false;1;42;[1;2;3.3]]")), Tuple<Bool,Int8,String?,List<Int64>>, $ns) AS `tuple`, + Yson::ConvertTo(Yson::Parse(Yson("{a=%false;b=1;c=foo;d=[1;2];e=[1.0;bar]}")), Struct<a:Bool,b:Int8,c:String?,d:List<Int64>,e:Tuple<Double,String>,y:Int16?>, $ns) AS `struct`; diff --git a/yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql b/yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql new file mode 100644 index 00000000000..2f42d8fb95d --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GenericFrom.sql @@ -0,0 +1,21 @@ +SELECT + Yson::Serialize(Yson::From(null)) AS `null`, + Yson::Serialize(Yson::From([])) AS `empty_list`, + Yson::Serialize(Yson::From({})) AS `empty_dict`, + Yson::Serialize(Yson::From(true)) AS `bool`, + Yson::Serialize(Yson::From(123)) AS `int`, + Yson::Serialize(Yson::From(123u)) AS `uint`, + Yson::Serialize(Yson::From(Just(1.23))) AS optional_double, + Yson::Serialize(Yson::From(Nothing(Int8?))) AS empty_int, + Yson::Serialize(Yson::From("123")) AS `string`, + Yson::Serialize(Yson::From(AsList(1, 2, 3))) AS int_list, + Yson::Serialize(Yson::From(AsList(AsList(1, 2), AsList(3, 1/0)))) AS nested_list, + Yson::Serialize(Yson::From(AsDict(AsTuple("foo", 1), AsTuple("bar", 2)))) AS int_dict, + Yson::Serialize(Yson::From(AsTuple(false, 1, "foo", AsList(1,2)))) AS `tuple`, + Yson::Serialize(Yson::From(AsStruct(false AS a, 1 AS b, "foo" AS c, AsList(1,2) AS d, AsTuple(1.0, "bar") AS e))) AS `struct`, + Yson::Serialize(Yson::From(Utf8("проверка связи"))) AS `utf8`, + Yson::Serialize(Yson::From(Yson("{a=1;b=#}"))) AS `yson`, + Yson::Serialize(Yson::From(Json(@@{"a":"foo","b":[1,2,3,4,5]}@@))) AS `json`, + Yson::Serialize(Yson::From(AsList(Yson::From(1), Yson::From("foo")))) AS resource_list, + Yson::Serialize(Yson::From([AsVariant(1, "one"), AsVariant("2", "two")])) AS `variants`, + Yson::Serialize(Yson::From(Just(1u/0u))) AS double_optional; diff --git a/yql/essentials/udfs/common/yson2/test/cases/Get.sql b/yql/essentials/udfs/common/yson2/test/cases/Get.sql new file mode 100644 index 00000000000..4449aef41de --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Get.sql @@ -0,0 +1,8 @@ +$list = Yson::Parse("[\"abc\"; 123; #;]"); +$dict = Yson::Parse("{\"a\"=1;}"); +$scalar = Yson::Parse("123"); + +SELECT + Yson::GetLength($list) AS list_length, + Yson::GetLength($dict) AS dict_length, + Yson::GetLength($scalar, Yson::Options(false AS Strict)) AS scalar_length; diff --git a/yql/essentials/udfs/common/yson2/test/cases/GetHash.sql b/yql/essentials/udfs/common/yson2/test/cases/GetHash.sql new file mode 100644 index 00000000000..e869c45e9aa --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GetHash.sql @@ -0,0 +1,27 @@ +$a1 = Yson::Parse(Yson("{a=1;b=2}")); +$a2 = Yson::Parse(Yson("{a=1;b=2;}")); +$a3 = Yson::Parse(Yson("{b=2;a=1}")); +$b = Yson::Parse(Yson("#")); +$c = Yson::Parse(Yson("{a=1;b=3}")); +$d = Yson::Parse(Yson("{a=#}")); +$e = Yson::Parse(Yson("[a;1;b;2]")); +$f = Yson::Parse(Yson("{a=1u;b=2}")); +$g = Yson::Parse(Yson("{a=1;b=\"2\"}")); +$h = Yson::Parse(Yson("<foo=bar>{a=1;b=2}")); +$i = Yson::Parse(Yson("{a=1;b=<foo=bar>2}")); +$j = Yson::Parse(Yson("[1;a;b;2]")); + +SELECT + Yson::GetHash($a1) AS a1, + Yson::GetHash($a2) AS a2, + Yson::GetHash($a3) AS a3, + Yson::GetHash($b) AS b, + Yson::GetHash($c) AS c, + Yson::GetHash($d) AS d, + Yson::GetHash($e) AS e, + Yson::GetHash($f) AS f, + Yson::GetHash($g) AS g, + Yson::GetHash($h) AS h, + Yson::GetHash($i) AS i, + Yson::GetHash($j) AS j, + diff --git a/yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql b/yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql new file mode 100644 index 00000000000..034b19e8905 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/GoodForYsonBadForJson.sql @@ -0,0 +1,7 @@ +$inf = Yson::From(1./0.); +$binary = Yson::From("\"12345\xf67\""); + +SELECT Yson::Serialize($inf), Yson::Serialize($binary); + +PRAGMA yson.DisableStrict; +SELECT Yson::SerializeJson($inf), Yson::SerializeJson($binary); diff --git a/yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql b/yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql new file mode 100644 index 00000000000..a42e370c598 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/ImplicitFromRes.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +select + Yson::Parse(Yson::Parse("[]"y)), + Yson::ParseJson(Yson::Parse("[]"y)); diff --git a/yql/essentials/udfs/common/yson2/test/cases/IsType.sql b/yql/essentials/udfs/common/yson2/test/cases/IsType.sql new file mode 100644 index 00000000000..1407ad43661 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/IsType.sql @@ -0,0 +1,12 @@ +$all = [@@"str"@@y, "-13"y, "42u"y, "3.14"y, "#"y, "%false"y, "[1;2;3;]"y, "{}"y]; + +select + Yson::IsString(y) as is_string, + Yson::IsInt64(y) as is_int64, + Yson::IsUint64(y) as is_uint64, + Yson::IsDouble(y) as is_double, + Yson::IsEntity(y) as is_entity, + Yson::IsBool(y) as is_bool, + Yson::IsList(y) as is_list, + Yson::IsDict(y) as is_dict +FROM AS_TABLE(ListMap($all, ($y)->(<|'y':$y|>))); diff --git a/yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql b/yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql new file mode 100644 index 00000000000..e2b292f495b --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/JsonSerializeSkipMapEntity.sql @@ -0,0 +1,18 @@ +$node1 = Yson::Parse(Yson(@@{a=123}@@)); +$node2 = Yson::Parse(Yson(@@{a=#}@@)); +$node3 = Yson::Parse(Yson(@@{a=123;b=#}@@)); +$node4 = Yson::Parse(Yson(@@[123;#]@@)); +$node5 = Yson::Parse(Yson(@@{a=1;b=#;c=1;d=#;e=#}@@)); +$node6 = Yson::Parse(Yson(@@{b=1;a=<c=1;d=#;e=3>23}@@)); +$node7 = Yson::Parse(Yson(@@{b=1;a=<c=1;d=#;e=3>#}@@)); +$node8 = Yson::Parse(Yson(@@<d=#>23@@)); + +SELECT + Yson::SerializeJson($node1, true as SkipMapEntity) AS res1, + Yson::SerializeJson($node2, true as SkipMapEntity) AS res2, + Yson::SerializeJson($node3, true as SkipMapEntity) AS res3, + Yson::SerializeJson($node4, true as SkipMapEntity) AS res4, + Yson::SerializeJson($node5, true as SkipMapEntity) AS res5, + Yson::SerializeJson($node6, true as SkipMapEntity) AS res6, + Yson::SerializeJson($node7, true as SkipMapEntity) AS res7, + Yson::SerializeJson($node8, true as SkipMapEntity) AS res8; diff --git a/yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql b/yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql new file mode 100644 index 00000000000..33002ffc034 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/JsonWithNanAsString.sql @@ -0,0 +1,8 @@ +$src = Yson::From(0./0.); -- nan +$src1 = Yson::From(1./0.); -- inf +$src2 = Yson::From(-1./0.); -- -inf + +SELECT + Yson::SerializeJson($src, true AS WriteNanAsString), + Yson::SerializeJson($src1, true AS WriteNanAsString), + Yson::SerializeJson($src2, true AS WriteNanAsString)
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql b/yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql new file mode 100644 index 00000000000..00f4c98cd98 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/JsonWithUtf8.sql @@ -0,0 +1,5 @@ +$src = Yson::From("Хэллоу!"); +SELECT Yson::SerializeJson($src, false AS EncodeUtf8), Yson::SerializeJson($src, true AS EncodeUtf8), + Yson::Equals(Yson::ParseJson(Json("\"\xD0\xA5\xD1\x8D\xD0\xBB\xD0\xBB\xD0\xBE\xD1\x83!\"")), Yson::ParseJsonDecodeUtf8(Json("\"\xC3\x90\xC2\xA5\xC3\x91\xC2\x8D\xC3\x90\xC2\xBB\xC3\x90\xC2\xBB\xC3\x90\xC2\xBE\xC3\x91\xC2\x83!\""))), + Yson::SerializeJsonEncodeUtf8($src); + diff --git a/yql/essentials/udfs/common/yson2/test/cases/Lists.sql b/yql/essentials/udfs/common/yson2/test/cases/Lists.sql new file mode 100644 index 00000000000..4d1c74e6cbc --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Lists.sql @@ -0,0 +1,10 @@ +/* syntax version 1 */ +$x = Yson::Parse("[1;2;[3;4]]"); +$no_strict = Yson::Options(false AS Strict); +select Yson::ConvertToList($x) is null, + ListLength(Yson::ConvertToList($x)), + ListMap(Yson::ConvertToList($x), ($i)->(Yson::ConvertToInt64($i,$no_strict))), + ListMap(Yson::ConvertToList($x), ($x)->(ListMap(Yson::ConvertToList($x, Yson::Options(false AS Strict)), Yson::ConvertToInt64))); + +$int_and_str = Yson(@@[123;"456"]@@); +SELECT ListMap(Yson::ConvertToList($int_and_str), Yson::ConvertToString), ListMap(Yson::ConvertToList($int_and_str), Yson::ConvertToInt64); diff --git a/yql/essentials/udfs/common/yson2/test/cases/Lookup.sql b/yql/essentials/udfs/common/yson2/test/cases/Lookup.sql new file mode 100644 index 00000000000..e24b1564177 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Lookup.sql @@ -0,0 +1,29 @@ +/* syntax version 1 */ +$no_strict = Yson::Options(false AS Strict); + +select +Yson::ConvertToBool(Yson::Lookup(Yson::Parse('{a=%true}'), 'a')), +Yson::LookupBool(Yson::Parse('{a=%true}'), 'a'), + +Yson::ConvertToInt64(Yson::Lookup(Yson::Parse('{a=1}'), 'a')), +Yson::LookupInt64(Yson::Parse('{a=1}'), 'a'), + +Yson::ConvertToUint64(Yson::Lookup(Yson::Parse('{a=2u}'), 'a')), +Yson::LookupUint64(Yson::Parse('{a=2u}'), 'a'), + +Yson::ConvertToDouble(Yson::Lookup(Yson::Parse('{a=3.0}'), 'a')), +Yson::LookupDouble(Yson::Parse('{a=3.0}'), 'a'), + +Yson::ConvertToString(Yson::Lookup(Yson::Parse('{a=x}'), 'a')), +Yson::LookupString(Yson::Parse('{a=x}'), 'a'), + +ListLength(Yson::ConvertToList(Yson::Lookup(Yson::Parse('{a=[1;2]}'), 'a'))), +ListLength(Yson::LookupList(Yson::Parse('{a=[1;2]}'), 'a')), + +DictLength(Yson::ConvertToDict(Yson::Lookup(Yson::Parse('{a={b=c}}'), 'a'))), +DictLength(Yson::LookupDict(Yson::Parse('{a={b=c}}'), 'a')), + +Yson::LookupString(Yson::Parse('[]'), '0'), + +Yson::LookupString(Yson::Parse('{a=12345}'), 'a', $no_strict), +Yson::LookupDouble(Yson::Parse(@@{a="12345"}@@), 'a', $no_strict); diff --git a/yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql b/yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql new file mode 100644 index 00000000000..bc1f0bc6b5a --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/NegativeArrayIndex.sql @@ -0,0 +1,13 @@ +$node = Yson::Parse(@@[1;2;3;4;5;6;7]@@); + +SELECT + Yson::YPathInt64($node, "/+1"), + Yson::YPathInt64($node, "/-1"), + Yson::YPathInt64($node, "/+2"), + Yson::YPathInt64($node, "/-2"), + Yson::YPathInt64($node, "/+6"), + Yson::YPathInt64($node, "/-7"), + Yson::YPathInt64($node, "/+7"), + Yson::YPathInt64($node, "/-8"), + Yson::YPathInt64($node, "/0"); + diff --git a/yql/essentials/udfs/common/yson2/test/cases/ParseString.sql b/yql/essentials/udfs/common/yson2/test/cases/ParseString.sql new file mode 100644 index 00000000000..6968ec07c6a --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/ParseString.sql @@ -0,0 +1,11 @@ +$options = Yson::Options(false AS Strict); + +SELECT + Yson::Parse("0u"), + Yson::Parse(Just("1u")), + Yson::ParseJson("2"), + Yson::ParseJson(Just("3")), + Yson::Parse("", $options), + Yson::Parse(Just(""), $options), + Yson::ParseJson("", $options), + Yson::ParseJson(Just(""), $options); diff --git a/yql/essentials/udfs/common/yson2/test/cases/Scalars.sql b/yql/essentials/udfs/common/yson2/test/cases/Scalars.sql new file mode 100644 index 00000000000..2b8d751957b --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Scalars.sql @@ -0,0 +1,46 @@ +/* syntax version 1 */ +$no_strict = Yson::Options(false AS Strict); + +select +Yson::ConvertToBool(Yson::Parse('#'), $no_strict), +Yson::ConvertToBool(Yson::Parse('%true')), +Yson::ConvertToBool(Yson::Parse('true')), +Yson::ConvertToBool(Yson::Parse('false')), +Yson::ConvertToBool(Yson::Parse('1'), $no_strict), +Yson::ConvertToBool(Yson::Parse('2u'), $no_strict), +Yson::ConvertToBool(Yson::Parse('3.0'), $no_strict), +Yson::ConvertToBool(Yson::Parse('foo'), $no_strict), +Yson::ConvertToBool(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict), + +Yson::ConvertToInt64(Yson::Parse('#'), $no_strict), +Yson::ConvertToInt64(Yson::Parse('%true'), $no_strict), +Yson::ConvertToInt64(Yson::Parse('1')), +Yson::ConvertToInt64(Yson::Parse('2u')), +Yson::ConvertToInt64(Yson::Parse('3.0'), $no_strict), +Yson::ConvertToInt64(Yson::Parse('foo'), $no_strict), +Yson::ConvertToInt64(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict), + +Yson::ConvertToUint64(Yson::Parse('#'), $no_strict), +Yson::ConvertToUint64(Yson::Parse('%true'), $no_strict), +Yson::ConvertToUint64(Yson::Parse('-1'), $no_strict), +Yson::ConvertToUint64(Yson::Parse('1')), +Yson::ConvertToUint64(Yson::Parse('2u')), +Yson::ConvertToUint64(Yson::Parse('3.0'), $no_strict), +Yson::ConvertToUint64(Yson::Parse('foo'), $no_strict), +Yson::ConvertToUint64(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict), + +Yson::ConvertToDouble(Yson::Parse('#'), $no_strict), +Yson::ConvertToDouble(Yson::Parse('%true'), $no_strict), +Yson::ConvertToDouble(Yson::Parse('1')), +Yson::ConvertToDouble(Yson::Parse('2u')), +Yson::ConvertToDouble(Yson::Parse('3.0')), +Yson::ConvertToDouble(Yson::Parse('foo'), $no_strict), +Yson::ConvertToDouble(Yson::Parse('"very loooooooooooooooooong string"'), $no_strict), + +Yson::ConvertToString(Yson::Parse('#'), $no_strict), +Yson::ConvertToString(Yson::Parse('%true'), $no_strict), +Yson::ConvertToString(Yson::Parse('1'), $no_strict), +Yson::ConvertToString(Yson::Parse('2u'), $no_strict), +Yson::ConvertToString(Yson::Parse('3.0'), $no_strict), +Yson::ConvertToString(Yson::Parse('foo')), +Yson::ConvertToString(Yson::Parse('"very loooooooooooooooooong string"')), diff --git a/yql/essentials/udfs/common/yson2/test/cases/Serialize.sql b/yql/essentials/udfs/common/yson2/test/cases/Serialize.sql new file mode 100644 index 00000000000..88eb075d878 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/Serialize.sql @@ -0,0 +1,3 @@ +/* syntax version 1 */ +select +Yson::SerializeText(Yson::Parse(Yson('<a=1>[#;{a=1};{b=2u;c=[]};<q=foo>3.0;{};foo;"very loooooooooooooooooong string"]'))); diff --git a/yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql b/yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql new file mode 100644 index 00000000000..5b3b1c4402f --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/SerializeDouble.sql @@ -0,0 +1,13 @@ +$s = <| + double1: 1.0000000001, + double2: 1.000000001, + double3: 1000000000.5, + double4: 10000000005.0, + double5: 10000000000.5, + double6: 100000000005.0, +|>; + +SELECT + Yson::Serialize(Yson::From($s)), + Yson::SerializeJson(Yson::From($s)) +;
\ No newline at end of file diff --git a/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in new file mode 100644 index 00000000000..18703eb2520 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in @@ -0,0 +1,5 @@ +{"key"="020";"subkey"="1";"_rest"={"animal"="wombat";"size"="small";"weightMin"=20.;"weightMax"=35.;"wild"=%true};"binZ"="\x04";"strY"="\1\x08test";"realZ"="\x03\x18-DT\xfb!\t@" ;"uiData"=1}; +{"key"="075";"subkey"="5";"_rest"={"animal"="dog";"size"="huge";"weightMin"=5.;"weightMax"=75.;"pet"=%true} ;"binZ"=%true ;"strY"="\1\nfunny" ;"realZ"="\x03iW\x14\x8b\n\xbf\x05@" ;"uiData"=1u}; +{"key"="150";"subkey"="4";"_rest"={"animal"="chipmunk";"size"="small";"weightMin"=0.05;"weightMax"=0.15;"wild"=%true} ;"binZ"="\x05";"strY"="\1\nbunny" ;"realZ"="\x03\xcd;\x7ff\x9e\xa0\xf6?" ;"uiData"=100500u}; +{"key"="500";"subkey"="2";"_rest"={"animal"="hamster";"size"="verysmall";"weightMin"=0.015;"weightMax"=0.045;"pet"=%true} ;"binZ"=%false;"strY"="33.33" ;"realZ"="\x03\x00\x00\x00\x00\x00\x00\xf0?" ;"uiData"=10010005001000000u}; +{"key"="800";"subkey"="3";"_rest"={"animal"="dingo";"size"="huge";"weightMin"=10.;"weightMax"=20.;"wild"=%true} ;"binZ"=%false;"strY"="\1\x06zzz" ;"realZ"="\x03\x00\x00\x00\x00\x00\x00\xf0\xbf";"uiData"=33}; diff --git a/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr new file mode 100644 index 00000000000..6ce47a00153 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.in.attr @@ -0,0 +1,12 @@ +{"_yql_row_spec"={ + "Type"=["StructType";[ + ["key";["DataType";"String"]]; + ["subkey";["DataType";"String"]]; + ["_rest";["OptionalType";["DataType";"Yson"]]] + ]]; + "SortDirections"=[1;1;]; + "SortedBy"=["key";"subkey";]; + "SortedByTypes"=[["DataType";"String";];["DataType";"String";];]; + "SortMembers"=["key";"subkey";]; + "DefaultValues"={"key"="\"\"";"subkey"="\"\"";} +}} diff --git a/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql new file mode 100644 index 00000000000..2c84ad498bb --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/WeakYsonRest.sql @@ -0,0 +1,7 @@ +/* postgres can not */ +USE plato; + +--INSERT INTO Output +SELECT + WeakField(animal, "String") +FROM Input diff --git a/yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql b/yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql new file mode 100644 index 00000000000..a4a21610ad6 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/WithAttrs.sql @@ -0,0 +1,7 @@ +/* syntax version 1 */ + +select +Yson::SerializeText(Yson::WithAttributes(Yson::Parse('1'), Yson::Parse('{a=2}'))), +Yson::SerializeText(Yson::WithAttributes(Yson::Parse('1'), Yson::Parse('{}'))), +Yson::SerializeText(Yson::WithAttributes(Yson::Parse('1'), Yson::Parse('#'))), +Yson::SerializeText(Yson::WithAttributes(Yson::Parse('<c=2>1'), Yson::Parse('{b=3}'))); diff --git a/yql/essentials/udfs/common/yson2/test/cases/YPath.sql b/yql/essentials/udfs/common/yson2/test/cases/YPath.sql new file mode 100644 index 00000000000..30b3bc372c9 --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/cases/YPath.sql @@ -0,0 +1,13 @@ +$node = Yson::Parse(@@<x="y">{abc=123;}@@); +$data = Yson::YPath($node, "/abc"); +$attrs = Yson::YPath($node, "/@"); +$miss = Yson::YPath($node, "/def"); + +SELECT + Yson::ConvertToInt64($data) AS data, + Yson::ConvertToStringDict($attrs) AS attrs, + Yson::SerializePretty($miss) AS miss, + Yson::YPathInt64($node, "/abc") AS num, + Yson::YPathString($node, "/@/x") AS str_attr, + Yson::YPathBool($node, "/@/mis") AS miss_attr, + Yson::YPathString($node, "/abc", Yson::Options(false as Strict)) AS bad_conv; diff --git a/yql/essentials/udfs/common/yson2/test/ya.make b/yql/essentials/udfs/common/yson2/test/ya.make new file mode 100644 index 00000000000..a13d3b13b9c --- /dev/null +++ b/yql/essentials/udfs/common/yson2/test/ya.make @@ -0,0 +1,12 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/yson2) + +TIMEOUT(300) +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/yson2/ya.make b/yql/essentials/udfs/common/yson2/ya.make new file mode 100644 index 00000000000..64bb6b56ecc --- /dev/null +++ b/yql/essentials/udfs/common/yson2/ya.make @@ -0,0 +1,32 @@ +IF (YQL_PACKAGED) + PACKAGE() + FROM_SANDBOX(FILE 7319908881 OUT_NOAUTO libyson2_udf.so + ) + END() +ELSE () +YQL_UDF_CONTRIB(yson2_udf) + + YQL_ABI_VERSION( + 2 + 28 + 0 + ) + + SRCS( + yson2_udf.cpp + ) + + PEERDIR( + library/cpp/containers/stack_vector + library/cpp/yson_pull + yql/essentials/minikql/dom + ) + + END() +ENDIF () + + +RECURSE_FOR_TESTS( + test +) + diff --git a/yql/essentials/udfs/common/yson2/yson2_udf.cpp b/yql/essentials/udfs/common/yson2/yson2_udf.cpp new file mode 100644 index 00000000000..76dbe07c55a --- /dev/null +++ b/yql/essentials/udfs/common/yson2/yson2_udf.cpp @@ -0,0 +1,1203 @@ +#include <yql/essentials/minikql/dom/node.h> +#include <yql/essentials/minikql/dom/json.h> +#include <yql/essentials/minikql/dom/yson.h> +#include <yql/essentials/minikql/dom/make.h> +#include <yql/essentials/minikql/dom/peel.h> +#include <yql/essentials/minikql/dom/hash.h> +#include <yql/essentials/minikql/dom/convert.h> +#include <yql/essentials/public/udf/udf_helpers.h> +#include <yql/essentials/public/udf/udf_type_printer.h> + +#include <library/cpp/yson_pull/exceptions.h> + +#include <util/string/split.h> + +using namespace NYql::NUdf; +using namespace NYql::NDom; +using namespace NYsonPull; + +namespace { + +constexpr char OptionsResourceName[] = "Yson2.Options"; + +using TOptionsResource = TResource<OptionsResourceName>; +using TNodeResource = TResource<NodeResourceName>; + +using TDictType = TDict<char*, TNodeResource>; +using TInt64DictType = TDict<char*, i64>; +using TUint64DictType = TDict<char*, ui64>; +using TBoolDictType = TDict<char*, bool>; +using TDoubleDictType = TDict<char*, double>; +using TStringDictType = TDict<char*, char*>; + +enum class EOptions : ui8 { + Strict = 1, + AutoConvert = 2 +}; + +union TOpts { + ui8 Raw = 0; + struct { + bool Strict: 1; + bool AutoConvert: 1; + }; +}; + +static_assert(sizeof(TOpts) == 1U, "Wrong TOpts size."); + +TOpts ParseOptions(TUnboxedValuePod x) { + if (x) { + return TOpts{x.Get<ui8>()}; + } + return {}; +} + +class TOptions : public TBoxedValue { + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + ui8 options = 0; + + if (args[0] && args[0].Get<bool>()) { + options |= ui8(EOptions::AutoConvert); + } + + if (args[1] && args[1].Get<bool>()) { + options |= ui8(EOptions::Strict); + } + + return TUnboxedValuePod(options); + } +public: + static const TStringRef& Name() { + static auto name = TStringRef::Of("Options"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto argsBuilder = builder.Args(2U); + argsBuilder->Add<TOptional<bool>>().Name(TStringRef::Of("AutoConvert")); + argsBuilder->Add<TOptional<bool>>().Name(TStringRef::Of("Strict")); + builder.Returns(builder.Resource(OptionsResourceName)); + builder.OptionalArgs(2U); + if (!typesOnly) { + builder.Implementation(new TOptions); + } + + builder.IsStrict(); + return true; + } else { + return false; + } + } +}; + +using TConverterPtr = TUnboxedValuePod (*)(TUnboxedValuePod, const IValueBuilder*, const TSourcePosition& pos); + +template <TConverterPtr Converter> +class TLazyConveterT : public TManagedBoxedValue { +public: + TLazyConveterT(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) + : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos) + {} +private: + template <bool NoSwap> + class TIterator: public TManagedBoxedValue { + public: + TIterator(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) + : Original(std::move(original)), ValueBuilder(valueBuilder), Pos_(pos) + {} + + private: + bool Skip() final { + return Original.Skip(); + } + + bool Next(TUnboxedValue& value) final { + if (Original.Next(value)) { + if constexpr (!NoSwap) { + value = Converter(value.Release(), ValueBuilder, Pos_); + } + return true; + } + return false; + } + + bool NextPair(TUnboxedValue& key, TUnboxedValue& payload) final { + if (Original.NextPair(key, payload)) { + if constexpr (NoSwap) { + payload = Converter(payload.Release(), ValueBuilder, Pos_); + } else { + key = Converter(key.Release(), ValueBuilder, Pos_); + } + return true; + } + return false; + } + + const TUnboxedValue Original; + const IValueBuilder *const ValueBuilder; + const TSourcePosition Pos_; + }; + + ui64 GetDictLength() const final { + return Original.GetDictLength(); + } + + ui64 GetListLength() const final { + return Original.GetListLength(); + } + + bool HasFastListLength() const final { + return Original.HasFastListLength(); + } + + bool HasDictItems() const final { + return Original.HasDictItems(); + } + + bool HasListItems() const final { + return Original.HasListItems(); + } + + TUnboxedValue GetListIterator() const final { + return TUnboxedValuePod(new TIterator<false>(Original.GetListIterator(), ValueBuilder, Pos_)); + } + + TUnboxedValue GetDictIterator() const final { + return TUnboxedValuePod(new TIterator<true>(Original.GetDictIterator(), ValueBuilder, Pos_)); + } + + TUnboxedValue GetKeysIterator() const final { + return TUnboxedValuePod(new TIterator<true>(Original.GetKeysIterator(), ValueBuilder, Pos_)); + } + + TUnboxedValue GetPayloadsIterator() const override { + return TUnboxedValuePod(new TIterator<false>(Original.GetPayloadsIterator(), ValueBuilder, Pos_)); + } + + bool Contains(const TUnboxedValuePod& key) const final { + return Original.Contains(key); + } + + TUnboxedValue Lookup(const TUnboxedValuePod& key) const final { + if (auto lookup = Original.Lookup(key)) { + return Converter(lookup.Release().GetOptionalValue(), ValueBuilder, Pos_).MakeOptional(); + } + return {}; + } + + bool IsSortedDict() const final { + return Original.IsSortedDict(); + } + + const TUnboxedValue Original; + const IValueBuilder *const ValueBuilder; + const TSourcePosition Pos_; +}; + +template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> +TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + if (!x) { + return valueBuilder->NewEmptyList().Release(); + } + + switch (GetNodeType(x)) { + case ENodeType::List: + if (!x.IsBoxed()) + break; + if constexpr (Converter != nullptr) { + if constexpr (Strict || AutoConvert) { + return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos)); + } else { + TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator> values; + if (const auto elements = x.GetElements()) { + const auto size = x.GetListLength(); + values.reserve(size); + for (ui32 i = 0U; i < size; ++i) { + if (auto converted = Converter(elements[i], valueBuilder, pos)) { + values.emplace_back(std::move(converted)); + } + } + } else { + const auto it = x.GetListIterator(); + for (TUnboxedValue v; it.Next(v);) { + if (auto converted = Converter(v.Release(), valueBuilder, pos)) { + values.emplace_back(std::move(converted)); + } + } + } + if (values.empty()) { + break; + } + return valueBuilder->NewList(values.data(), values.size()).Release(); + } + } + return x; + case ENodeType::Attr: + return ConvertToListImpl<Strict, AutoConvert, Converter>(x.GetVariantItem().Release(), valueBuilder, pos); + default: + if constexpr (Strict) { + if (!IsNodeType<ENodeType::List>(x)) { + UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse list from " << TDebugPrinter(x)).c_str()); + } + } + } + + return valueBuilder->NewEmptyList().Release(); +} + +template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> +TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + if (!x) { + return valueBuilder->NewEmptyList().Release(); + } + + switch (GetNodeType(x)) { + case ENodeType::Dict: + if (!x.IsBoxed()) + break; + if constexpr (Converter != nullptr) { + if constexpr (Strict || AutoConvert) { + return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos)); + } else if (const auto size = x.GetDictLength()) { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> pairs; + pairs.reserve(size); + const auto it = x.GetDictIterator(); + for (TUnboxedValue key, payload; it.NextPair(key, payload);) { + if (auto converted = Converter(payload, valueBuilder, pos)) { + pairs.emplace_back(std::move(key), std::move(converted)); + } + } + if (pairs.empty()) { + break; + } + return TUnboxedValuePod(IBoxedValuePtr(new TMapNode(pairs.data(), pairs.size()))); + } + } + return x; + case ENodeType::Attr: + return ConvertToDictImpl<Strict, AutoConvert, Converter>(x.GetVariantItem().Release(), valueBuilder, pos); + default: + if constexpr (Strict) { + if (!IsNodeType<ENodeType::Dict>(x)) { + UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Cannot parse dict from " << TDebugPrinter(x)).c_str()); + } + } + } + + return valueBuilder->NewEmptyList().Release(); +} + +template <TConverterPtr Converter = nullptr> +TUnboxedValuePod LookupImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(dict)) { + case ENodeType::Dict: + if (dict.IsBoxed()) { + if (auto payload = dict.Lookup(key)) { + if constexpr (Converter != nullptr) { + return Converter(payload.Release().GetOptionalValue(), valueBuilder, pos); + } + return payload.Release(); + } + } + return {}; + case ENodeType::List: + if (dict.IsBoxed()) { + if (const i32 size = dict.GetListLength()) { + if (i32 index; TryFromString(key.AsStringRef(), index) && index < size && index >= -size) { + if (index < 0) + index += size; + if constexpr (Converter != nullptr) { + return Converter(dict.Lookup(TUnboxedValuePod(index)).Release(), valueBuilder, pos); + } + return dict.Lookup(TUnboxedValuePod(index)).Release(); + } + } + } + return {}; + case ENodeType::Attr: + return LookupImpl<Converter>(dict.GetVariantItem().Release(), key, valueBuilder, pos); + default: + return {}; + } +} + +template <TConverterPtr Converter = nullptr> +TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + const std::string_view path = key.AsStringRef(); + if (path.size() < 2U || path.front() != '/' || path.back() == '/') { + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Invalid YPath: '" << path << "'.").data()); + } + + for (const auto s : StringSplitter(path.substr(path[1U] == '/' ? 2U : 1U)).Split('/')) { + const bool attr = IsNodeType<ENodeType::Attr>(dict); + if (const std::string_view subpath = s.Token(); subpath == "@") { + if (attr) + dict = SetNodeType<ENodeType::Dict>(dict); + else + return {}; + } else { + if (attr) { + dict = dict.GetVariantItem().Release(); + } + + const auto subkey = valueBuilder->SubString(key, std::distance(path.begin(), subpath.begin()), subpath.size()); + dict = LookupImpl<nullptr>(dict, subkey, valueBuilder, pos); + } + + if (!dict) { + return {}; + } + } + + if constexpr (Converter != nullptr) { + return Converter(dict, valueBuilder, pos); + } + + return dict; +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(dict)) { + case ENodeType::Attr: + return ContainsImpl<Strict, AutoConvert>(dict.GetVariantItem().Release(), key, valueBuilder, pos); + case ENodeType::Dict: + if (dict.IsBoxed()) + return TUnboxedValuePod(dict.Contains(key)); + else + return TUnboxedValuePod(false); + case ENodeType::List: + if (dict.IsBoxed()) { + if (const i32 size = dict.GetListLength()) { + if (i32 index; TryFromString(key.AsStringRef(), index)) { + return TUnboxedValuePod(index < size && index >= -size); + } + } + } + return TUnboxedValuePod(false); + default: + if constexpr (Strict && !AutoConvert) + UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't check contains on scalar " << TDebugPrinter(dict)).c_str()); + else + return {}; + } +} + +template<bool Strict, bool AutoConvert> +TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { + switch (GetNodeType(dict)) { + case ENodeType::Attr: + return GetLengthImpl<Strict, AutoConvert>(dict.GetVariantItem().Release(), valueBuilder, pos); + case ENodeType::Dict: + return TUnboxedValuePod(dict.IsBoxed() ? dict.GetDictLength() : ui64(0)); + case ENodeType::List: + return TUnboxedValuePod(dict.IsBoxed() ? dict.GetListLength() : ui64(0)); + default: + if constexpr (Strict && !AutoConvert) + UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't get container length from scalar " << TDebugPrinter(dict)).c_str()); + else + return {}; + } +} + +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBool, TOptional<bool>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToBool<true, true> : &ConvertToBool<true, false>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToBool<false, true> : &ConvertToBool<false, false>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64, TOptional<i64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToIntegral<true, true, i64> : &ConvertToIntegral<true, false, i64>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToIntegral<false, true, i64> : &ConvertToIntegral<false, false, i64>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToIntegral<true, true, ui64> : &ConvertToIntegral<true, false, ui64>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToIntegral<false, true, ui64> : &ConvertToIntegral<false, false, ui64>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDouble, TOptional<double>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToFloat<true, true, double> : &ConvertToFloat<true, false, double>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToFloat<false, true, double> : &ConvertToFloat<false, false, double>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToString, TOptional<char*>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToString<true, true, false> : &ConvertToString<true, false, false>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToString<false, true, false> : &ConvertToString<false, false, false>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToList, TListType<TNodeResource>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToListImpl<true, true> : &ConvertToListImpl<true, false>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToListImpl<false, true> : &ConvertToListImpl<false, false>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64List, TListType<i64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64List, TListType<ui64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolList, TListType<bool>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToBool<true, true>> : &ConvertToListImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToBool<false, true>> : &ConvertToListImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleList, TListType<double>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToListImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToListImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringList, TListType<char*>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToListImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToListImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDict, TDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToDictImpl<true, true> : &ConvertToDictImpl<true, false>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToDictImpl<false, true> : &ConvertToDictImpl<false, false>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64Dict, TInt64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64Dict, TUint64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolDict, TBoolDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToBool<true, true>> : &ConvertToDictImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToBool<false, true>> : &ConvertToDictImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleDict, TDoubleDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToDictImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToDictImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringDict, TStringDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToDictImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToDictImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap<TNodeResource>)) { + const auto x = args[0]; + if (IsNodeType<ENodeType::Attr>(x)) { + return x; + } + + return valueBuilder->NewEmptyList(); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TContains, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &ContainsImpl<true, true> : &ContainsImpl<true, false>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &ContainsImpl<false, true> : &ContainsImpl<false, false>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetLength, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &GetLengthImpl<true, true> : &GetLengthImpl<true, false>)(args[0], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &GetLengthImpl<false, true> : &GetLengthImpl<false, false>)(args[0], valueBuilder, GetPos()); +} + +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + return LookupImpl(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToBool<true, true>> : &LookupImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToBool<false, true>> : &LookupImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, i64>> : &LookupImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, i64>> : &LookupImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, ui64>> : &LookupImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, ui64>> : &LookupImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<true, true, double>> : &LookupImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<false, true, double>> : &LookupImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToString<true, true, false>> : &LookupImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToString<false, true, false>> : &LookupImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<true, true>> : &LookupImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<false, true>> : &LookupImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<true, true>> : &LookupImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<false, true>> : &LookupImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + return YPathImpl(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToBool<true, true>> : &YPathImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToBool<false, true>> : &YPathImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, i64>> : &YPathImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, i64>> : &YPathImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, ui64>> : &YPathImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, ui64>> : &YPathImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<true, true, double>> : &YPathImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<false, true, double>> : &YPathImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToString<true, true, false>> : &YPathImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToString<false, true, false>> : &YPathImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<true, true>> : &YPathImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<false, true>> : &YPathImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { + if (const auto options = ParseOptions(args[2]); options.Strict) + return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<true, true>> : &YPathImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); + else + return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<false, true>> : &YPathImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); +} + +SIMPLE_STRICT_UDF(TSerialize, TYson(TAutoMap<TNodeResource>)) { + return valueBuilder->NewString(SerializeYsonDomToBinary(args[0])); +} + +SIMPLE_STRICT_UDF(TSerializeText, TYson(TAutoMap<TNodeResource>)) { + return valueBuilder->NewString(SerializeYsonDomToText(args[0])); +} + +SIMPLE_STRICT_UDF(TSerializePretty, TYson(TAutoMap<TNodeResource>)) { + return valueBuilder->NewString(SerializeYsonDomToPrettyText(args[0])); +} + +constexpr char SkipMapEntity[] = "SkipMapEntity"; +constexpr char EncodeUtf8[] = "EncodeUtf8"; +constexpr char WriteNanAsString[] = "WriteNanAsString"; + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional<TJson>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>, TNamedArg<bool, SkipMapEntity>, TNamedArg<bool, EncodeUtf8>, TNamedArg<bool, WriteNanAsString>), 4) try { + return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false), args[4].GetOrDefault(false))); +} catch (const std::exception& e) { + if (ParseOptions(args[1]).Strict) { + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(GetPos()) << " " << e.what()).data()); + } + return {}; +} + +SIMPLE_STRICT_UDF(TWithAttributes, TOptional<TNodeResource>(TAutoMap<TNodeResource>, TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + TUnboxedValue x = args[0]; + auto y = args[1]; + + if (!IsNodeType<ENodeType::Dict>(y)) { + return {}; + } + + if (IsNodeType<ENodeType::Attr>(x)) { + x = x.GetVariantItem(); + } + + if (y.IsEmbedded()) { + return x; + } + + if (!y.IsBoxed()) { + return {}; + } + + // clone dict as attrnode + if (const auto resource = y.GetResource()) { + return SetNodeType<ENodeType::Attr>(TUnboxedValuePod(new TAttrNode(std::move(x), static_cast<const TPair*>(resource), y.GetDictLength()))); + } else { + TSmallVec<TPair, TStdAllocatorForUdf<TPair>> items; + items.reserve(y.GetDictLength()); + const auto it = y.GetDictIterator(); + for (TUnboxedValue x, y; it.NextPair(x, y);) { + items.emplace_back(std::move(x), std::move(y)); + } + + if (items.empty()) { + return x; + } + + return SetNodeType<ENodeType::Attr>(TUnboxedValuePod(new TAttrNode(std::move(x), items.data(), items.size()))); + } +} + +template<ENodeType Type> +TUnboxedValuePod IsTypeImpl(TUnboxedValuePod y) { + if (IsNodeType<ENodeType::Attr>(y)) { + y = y.GetVariantItem().Release(); + } + + return TUnboxedValuePod(IsNodeType<Type>(y)); +} + +SIMPLE_STRICT_UDF(TIsString, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::String>(*args); +} + +SIMPLE_STRICT_UDF(TIsInt64, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::Int64>(*args); +} + +SIMPLE_STRICT_UDF(TIsUint64, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::Uint64>(*args); +} + +SIMPLE_STRICT_UDF(TIsBool, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::Bool>(*args); +} + +SIMPLE_STRICT_UDF(TIsDouble, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::Double>(*args); +} + +SIMPLE_STRICT_UDF(TIsList, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::List>(*args); +} + +SIMPLE_STRICT_UDF(TIsDict, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::Dict>(*args); +} + +SIMPLE_STRICT_UDF(TIsEntity, bool(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return IsTypeImpl<ENodeType::Entity>(*args); +} + +SIMPLE_STRICT_UDF(TEquals, bool(TAutoMap<TNodeResource>, TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(EquateDoms(args[0], args[1])); +} + +SIMPLE_STRICT_UDF(TGetHash, ui64(TAutoMap<TNodeResource>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(HashDom(args[0])); +} + +namespace { + +class TBase: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; + + TBase(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) + : Pos_(pos), TypeHelper_(typeHelper), Shape_(shape) + {} + +protected: + template<bool MoreTypesAllowed> + static const TType* CheckType(const ITypeInfoHelper::TPtr typeHelper, const TType* shape) { + switch (const auto kind = typeHelper->GetTypeKind(shape)) { + case ETypeKind::Null: + case ETypeKind::EmptyList: + case ETypeKind::EmptyDict: + return MoreTypesAllowed ? nullptr : shape; + case ETypeKind::Data: + switch (TDataTypeInspector(*typeHelper, shape).GetTypeId()) { + case TDataType<char*>::Id: + case TDataType<TUtf8>::Id: + case TDataType<bool>::Id: + case TDataType<i8>::Id: + case TDataType<i16>::Id: + case TDataType<i32>::Id: + case TDataType<i64>::Id: + case TDataType<ui8>::Id: + case TDataType<ui16>::Id: + case TDataType<ui32>::Id: + case TDataType<ui64>::Id: + case TDataType<float>::Id: + case TDataType<double>::Id: + case TDataType<TYson>::Id: + case TDataType<TJson>::Id: + return nullptr; + default: + return shape; + } + case ETypeKind::Optional: + return CheckType<MoreTypesAllowed>(typeHelper, TOptionalTypeInspector(*typeHelper, shape).GetItemType()); + case ETypeKind::List: + return CheckType<MoreTypesAllowed>(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType()); + case ETypeKind::Dict: { + const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape); + if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType)) + if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id) + return CheckType<MoreTypesAllowed>(typeHelper, dictTypeInspector.GetValueType()); + return shape; + } + case ETypeKind::Tuple: + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) do + if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, tupleTypeInspector.GetElementType(--count))) + return bad; + while (count); + return nullptr; + case ETypeKind::Struct: + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) do + if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, structTypeInspector.GetMemberType(--count))) + return bad; + while (count); + return nullptr; + case ETypeKind::Variant: + if constexpr (MoreTypesAllowed) + return CheckType<MoreTypesAllowed>(typeHelper, TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType()); + else + return shape; + case ETypeKind::Resource: + if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) + return nullptr; + [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME + default: + return shape; + } + } + + const TSourcePosition Pos_; + const ITypeInfoHelper::TPtr TypeHelper_; + const TType *const Shape_; +}; + +class TFrom: public TBase { + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + return MakeDom(TypeHelper_.Get(), Shape_, *args, valueBuilder); + } +public: + static const TStringRef& Name() { + static auto name = TStringRef::Of("From"); + return name; + } + + TFrom(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) + : TBase(pos, typeHelper, shape) + {} + + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + if (!userType) { + builder.SetError("Missing user type."); + return true; + } + + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Invalid user type."); + return true; + } + + const auto argsTypeTuple = userTypeInspector.GetElementType(0); + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } + + if (argsTypeInspector.GetElementsCount() != 1) { + builder.SetError("Expected single argument."); + return true; + } + + const auto inputType = argsTypeInspector.GetElementType(0); + if (const auto badType = CheckType<true>(typeHelper, inputType)) { + ::TStringBuilder sb; + sb << "Impossible to create DOM from incompatible with Yson type: "; + TTypePrinter(*typeHelper, inputType).Out(sb.Out); + if (badType != inputType) { + sb << " Incompatible type: "; + TTypePrinter(*typeHelper, badType).Out(sb.Out); + } + builder.SetError(sb); + return true; + } + + builder.Args()->Add(inputType).Done().Returns(builder.Resource(NodeResourceName)); + + if (!typesOnly) { + builder.Implementation(new TFrom(builder.GetSourcePosition(), typeHelper, inputType)); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } +}; + +class TConvert: public TBase { + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + if (const auto options = ParseOptions(args[1]); options.Strict) + return (options.AutoConvert ? &PeelDom<true, true> : &PeelDom<true, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); + else + return (options.AutoConvert ? &PeelDom<false, true> : &PeelDom<false, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); + } + +public: + TConvert(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) + : TBase(pos, typeHelper, shape) + {} + + static const TStringRef& Name() { + static auto name = TStringRef::Of("ConvertTo"); + return name; + } + + + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build(); + builder.OptionalArgs(1); + + if (!userType) { + builder.SetError("Missing user type."); + return true; + } + + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 3) { + builder.SetError("Invalid user type."); + return true; + } + + const auto argsTypeTuple = userTypeInspector.GetElementType(0); + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } + + if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount < 1 || argsCount > 2) { + ::TStringBuilder sb; + sb << "Invalid user type - expected one or two arguments, got: " << argsCount; + builder.SetError(sb); + return true; + } + + const auto resultType = userTypeInspector.GetElementType(2); + if (const auto badType = CheckType<false>(typeHelper, resultType)) { + ::TStringBuilder sb; + sb << "Impossible to convert DOM to incompatible with Yson type: "; + TTypePrinter(*typeHelper, resultType).Out(sb.Out); + if (badType != resultType) { + sb << " Incompatible type: "; + TTypePrinter(*typeHelper, badType).Out(sb.Out); + } + builder.SetError(sb); + return true; + } + + builder.Args()->Add(builder.Resource(NodeResourceName)).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionsType); + builder.Returns(resultType); + + if (!typesOnly) { + builder.Implementation(new TConvert(builder.GetSourcePosition(), typeHelper, resultType)); + } + return true; + } else { + return false; + } + } +}; + +template<typename TYJson, bool DecodeUtf8 = false> +class TParse: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; +private: + const TSourcePosition Pos_; + const bool StrictType_; + + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final; +public: + TParse(TSourcePosition pos, bool strictType) + : Pos_(pos), StrictType_(strictType) + {} + + static const TStringRef& Name(); + + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + auto typeId = TDataType<TYJson>::Id; + if (userType) { + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Missing or invalid user type."); + return true; + } + + const auto argsTypeTuple = userTypeInspector.GetElementType(0); + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } + + const auto argsCount = argsTypeInspector.GetElementsCount(); + if (argsCount < 1 || argsCount > 2) { + ::TStringBuilder sb; + sb << "Invalid user type - expected one or two arguments, got: " << argsCount; + builder.SetError(sb); + return true; + } + + const auto inputType = argsTypeInspector.GetElementType(0); + auto dataType = inputType; + if (const auto optInspector = TOptionalTypeInspector(*typeHelper, inputType)) { + dataType = optInspector.GetItemType(); + } + + if (const auto resInspector = TResourceTypeInspector(*typeHelper, dataType)) { + typeId = TDataType<TYJson>::Id; + } else { + const auto dataInspector = TDataTypeInspector(*typeHelper, dataType); + typeId = dataInspector.GetTypeId(); + } + + builder.UserType(userType); + } + + const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build(); + builder.OptionalArgs(1); + + switch (typeId) { + case TDataType<TYJson>::Id: + builder.Args()->Add<TAutoMap<TYJson>>().Add(optionsType).Done().Returns(builder.Resource(NodeResourceName)); + builder.IsStrict(); + break; + case TDataType<TUtf8>::Id: + builder.Args()->Add<TAutoMap<TUtf8>>().Add(optionsType).Done().Returns(builder.Optional()->Item(builder.Resource(NodeResourceName)).Build()); + break; + default: + builder.Args()->Add<TAutoMap<char*>>().Add(optionsType).Done().Returns(builder.Optional()->Item(builder.Resource(NodeResourceName)).Build()); + break; + } + + if (!typesOnly) { + builder.Implementation(new TParse(builder.GetSourcePosition(), TDataType<TYJson>::Id == typeId)); + } + return true; + } else { + return false; + } + } +}; + +template<> +TUnboxedValue TParse<TYson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { + return TryParseYsonDom(args[0].AsStringRef(), valueBuilder); +} catch (const std::exception& e) { + if (StrictType_ || ParseOptions(args[1]).Strict) { + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data()); + } + return TUnboxedValuePod(); +} + +template<> +TUnboxedValue TParse<TJson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { + return TryParseJsonDom(args[0].AsStringRef(), valueBuilder); +} catch (const std::exception& e) { + if (StrictType_ || ParseOptions(args[1]).Strict) { + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data()); + } + return TUnboxedValuePod(); +} + +template<> +TUnboxedValue TParse<TJson, true>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { + return TryParseJsonDom(args[0].AsStringRef(), valueBuilder, true); +} catch (const std::exception& e) { + if (StrictType_ || ParseOptions(args[1]).Strict) { + UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(Pos_) << " " << e.what()).data()); + } + return TUnboxedValuePod(); +} + +template<> +const TStringRef& TParse<TYson, false>::Name() { + static auto yson = TStringRef::Of("Parse"); + return yson; +} + +template<> +const TStringRef& TParse<TJson, false>::Name() { + static auto yson = TStringRef::Of("ParseJson"); + return yson; +} + +template<> +const TStringRef& TParse<TJson, true>::Name() { + static auto yson = TStringRef::Of("ParseJsonDecodeUtf8"); + return yson; +} + +} + +// TODO: optimizer that marks UDFs as strict if Yson::Options(false as Strict) is given +SIMPLE_MODULE(TYson2Module, + TOptions, + TParse<TYson>, + TParse<TJson>, + TParse<TJson, true>, + TConvert, + TConvertToBool, + TConvertToInt64, + TConvertToUint64, + TConvertToDouble, + TConvertToString, + TConvertToList, + TConvertToBoolList, + TConvertToInt64List, + TConvertToUint64List, + TConvertToDoubleList, + TConvertToStringList, + TConvertToDict, + TConvertToBoolDict, + TConvertToInt64Dict, + TConvertToUint64Dict, + TConvertToDoubleDict, + TConvertToStringDict, + TAttributes, + TContains, + TLookup, + TLookupBool, + TLookupInt64, + TLookupUint64, + TLookupDouble, + TLookupString, + TLookupList, + TLookupDict, + TYPath, + TYPathBool, + TYPathInt64, + TYPathUint64, + TYPathDouble, + TYPathString, + TYPathList, + TYPathDict, + TSerialize, + TSerializeText, + TSerializePretty, + TSerializeJson, + TWithAttributes, + TIsString, + TIsInt64, + TIsUint64, + TIsBool, + TIsDouble, + TIsList, + TIsDict, + TIsEntity, + TFrom, + TGetLength, + TEquals, + TGetHash +); + +REGISTER_MODULES(TYson2Module); diff --git a/yql/essentials/udfs/ya.make b/yql/essentials/udfs/ya.make index b8d9b2d2132..c452d78775f 100644 --- a/yql/essentials/udfs/ya.make +++ b/yql/essentials/udfs/ya.make @@ -1,4 +1,5 @@ RECURSE( + common examples logs test |
